first code commit

2025-05-27 11:58:19 +02:00 · 2025-05-27 11:58:19 +02:00 · f7ad69f9ec
commit f7ad69f9ec
55 changed files with 10682 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,33 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 */__pycache__
 # Distribution / packaging
 dist/
 build/
 *.egg-info/
 # Environment
 venv/
 env/
 .env/
 .venv/
 # Tests
 .pytest_cache/
 .coverage
 htmlcov/
 # IDE files
 .idea/
 .vscode/
 *.swp
 *.swo
 # Project specific
 *.png
 *.jpg
 *.jpeg
 *.gif
 *.svg
--- a/21
+++ b/21
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2025 Duncan Tourolle
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1,10 @@
 include README.md
 include LICENSE
 include pyWebLayout/*.py
 recursive-include pyWebLayout/abstract *.py
 recursive-include pyWebLayout/concrete *.py
 recursive-include pyWebLayout/style *.py
 recursive-include pyWebLayout/core *.py
 recursive-include pyWebLayout/typesetting *.py
 recursive-include pyWebLayout/io *.py
 recursive-include pyWebLayout/examples *.py
--- a/README.md
+++ b/README.md
@ -0,0 +1,93 @@
 # PyWebLayout
 A Python library for HTML-like layout and rendering.
 ## Description
 PyWebLayout provides classes for rendering HTML-like content to images using a box-based layout system. It includes support for text, tables, and containers, as well as an HTML parser for converting HTML to layout objects.
 ## Features
 - HTML-like layout system
 - Text rendering with font support
 - Table layouts
 - Container elements
 - HTML parsing
 - Image output
 ## Installation
 ```bash
 pip install pyWebLayout
 ```
 ## Usage
 ### Basic Example
 ```python
 from pyWebLayout.concrete.page import Page, Container
 from pyWebLayout.abstract.inline import Line
 from pyWebLayout.layout import Alignment
 from PIL import ImageFont
 # Create a page
 page = Page(size=(800, 600), background_color=(240, 240, 240))
 # Add a title container
 title_container = Container(
    origin=(0, 0),
    size=(780, 60),
    direction='horizontal',
    spacing=10,
    padding=(10, 10, 10, 10),
    halign=Alignment.CENTER,
    valign=Alignment.CENTER
 )
 page.add_child(title_container)
 # Create a title line with text
 title_font = ImageFont.load_default()
 title_line = Line(
    spacing=(8, 15),
    origin=(0, 0),
    size=(760, 40),
    font=title_font,
    text_color=(0, 0, 0),
    halign=Alignment.CENTER
 )
 title_container.add_child(title_line)
 title_line.add_word("PyWebLayout", title_font)
 title_line.add_word("Example", title_font)
 # Layout and render the page
 page.layout()
 image = page.render()
 image.save("example.png")
 ```
 ### HTML Example
 ```python
 from pyWebLayout.html_parser import html_to_image
 html = """
 <div style="text-align: center; padding: 10px;">
    <h1>PyWebLayout HTML Example</h1>
    <p>This is a paragraph rendered from HTML.</p>
    <p>The library supports <b>bold</b>, <i>italic</i>, and <u>underlined</u> text.</p>
 </div>
 """
 # Render HTML to an image
 image = html_to_image(html, page_size=(800, 600))
 image.save("html_example.png")
 ```
 ## License
 MIT License
 ## Author
 Duncan Tourolle - duncan@tourolle.paris
--- a/pyWebLayout/init.py
+++ b/pyWebLayout/init.py
@ -0,0 +1,44 @@
 """
 PyWebLayout - A Python library for HTML-like layout and rendering.
 This library provides classes for rendering HTML-like content to images
 using a box-based layout system. It includes support for text, tables,
 and containers, as well as parsers for HTML and EPUB content. It also
 supports pagination for ebook-like content with the ability to pause,
 save state, and resume rendering.
 """
 __version__ = '0.1.0'
 # Core abstractions
 from pyWebLayout.core import Renderable, Interactable, Layoutable, Queriable
 # Style components
 from pyWebLayout.style import Alignment, Font, FontWeight, FontStyle, TextDecoration
 # Typesetting algorithms
 from pyWebLayout.typesetting import (
    FlowLayout, 
    Paginator, PaginationState,
    DocumentPaginator, DocumentPaginationState
 )
 # Abstract document model
 from pyWebLayout.abstract.document import Document, Book, Chapter, MetadataType
 # Concrete implementations
 from pyWebLayout.concrete.box import Box
 from pyWebLayout.concrete.text import Line
 from pyWebLayout.concrete.page import Container, Page
 # Abstract components
 from pyWebLayout.abstract.inline import Word
 # Layout components
 from pyWebLayout.table import Table, TableCell
 # IO functionality (reading and writing)
 from pyWebLayout.io import (
    parse_html, html_to_document,  # HTML parsing
    read_epub                      # EPUB reading
 )
--- a/pyWebLayout/main.py
+++ b/pyWebLayout/main.py
@ -0,0 +1,12 @@
 import os
 import sys
 # Add the parent directory to sys.path for direct execution
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 # Now import the example module
 from pyWebLayout.example import save_examples
 if __name__ == "__main__":
    print("Running PyWebLayout examples...")
    save_examples()
--- a/pyWebLayout/abstract/init.py
+++ b/pyWebLayout/abstract/init.py
@ -0,0 +1,6 @@
 from .block import Block, BlockType, Parapgraph, Heading, HeadingLevel, Quote, CodeBlock
 from .block import HList, ListItem, ListStyle, Table, TableRow, TableCell
 from .block import HorizontalRule, LineBreak, Image
 from .inline import Word, FormattedSpan
 from .document import Document, MetadataType, Chapter, Book
 from .functional import Link, LinkType, Button, Form, FormField, FormFieldType
--- a/pyWebLayout/abstract/block.py
+++ b/pyWebLayout/abstract/block.py
@ -0,0 +1,783 @@
 from typing import List, Iterator, Tuple, Dict, Optional, Union, Any
 from enum import Enum
 from .inline import Word, FormattedSpan
 class BlockType(Enum):
    """Enumeration of different block types for classification purposes"""
    PARAGRAPH = 1
    HEADING = 2
    QUOTE = 3
    CODE_BLOCK = 4
    LIST = 5
    LIST_ITEM = 6
    TABLE = 7
    TABLE_ROW = 8
    TABLE_CELL = 9
    HORIZONTAL_RULE = 10
    LINE_BREAK = 11
    IMAGE = 12
 class Block:
    """
    Base class for all block-level elements.
    Block elements typically represent visual blocks of content that stack vertically.
    """
    def __init__(self, block_type: BlockType):
        """
        Initialize a block element.
        Args:
            block_type: The type of block this element represents
        """
        self._block_type = block_type
        self._parent = None
    @property
    def block_type(self) -> BlockType:
        """Get the type of this block element"""
        return self._block_type
    @property
    def parent(self):
        """Get the parent block containing this block, if any"""
        return self._parent
    @parent.setter
    def parent(self, parent):
        """Set the parent block"""
        self._parent = parent
 class Parapgraph(Block):
    """
    A paragraph is a block-level element that contains a sequence of words.
    """
    def __init__(self):
        """Initialize an empty paragraph"""
        super().__init__(BlockType.PARAGRAPH)
        self._words: List[Word] = []
        self._spans: List[FormattedSpan] = []
    def add_word(self, word: Word):
        """
        Add a word to this paragraph.
        Args:
            word: The Word object to add
        """
        self._words.append(word)
    def add_span(self, span: FormattedSpan):
        """
        Add a formatted span to this paragraph.
        Args:
            span: The FormattedSpan object to add
        """
        self._spans.append(span)
    def words(self) -> Iterator[Tuple[int, Word]]:
        """
        Iterate over the words in this paragraph.
        Yields:
            Tuples of (index, word) for each word in the paragraph
        """
        for i, word in enumerate(self._words):
            yield i, word
    def spans(self) -> Iterator[FormattedSpan]:
        """
        Iterate over the formatted spans in this paragraph.
        Yields:
            Each FormattedSpan in the paragraph
        """
        for span in self._spans:
            yield span
    @property
    def word_count(self) -> int:
        """Get the number of words in this paragraph"""
        return len(self._words)
 class HeadingLevel(Enum):
    """Enumeration representing HTML heading levels (h1-h6)"""
    H1 = 1
    H2 = 2
    H3 = 3
    H4 = 4
    H5 = 5
    H6 = 6
 class Heading(Parapgraph):
    """
    A heading element (h1, h2, h3, etc.) that contains text with a specific heading level.
    Headings inherit from Paragraph as they contain words but have additional properties.
    """
    def __init__(self, level: HeadingLevel = HeadingLevel.H1):
        """
        Initialize a heading element.
        Args:
            level: The heading level (h1-h6)
        """
        super().__init__()
        self._block_type = BlockType.HEADING
        self._level = level
    @property
    def level(self) -> HeadingLevel:
        """Get the heading level"""
        return self._level
    @level.setter
    def level(self, level: HeadingLevel):
        """Set the heading level"""
        self._level = level
 class Quote(Block):
    """
    A blockquote element that can contain other block elements.
    """
    def __init__(self):
        """Initialize an empty blockquote"""
        super().__init__(BlockType.QUOTE)
        self._blocks: List[Block] = []
    def add_block(self, block: Block):
        """
        Add a block element to this quote.
        Args:
            block: The Block object to add
        """
        self._blocks.append(block)
        block.parent = self
    def blocks(self) -> Iterator[Block]:
        """
        Iterate over the blocks in this quote.
        Yields:
            Each Block in the quote
        """
        for block in self._blocks:
            yield block
 class CodeBlock(Block):
    """
    A code block element containing pre-formatted text with syntax highlighting.
    """
    def __init__(self, language: str = ""):
        """
        Initialize a code block.
        Args:
            language: The programming language for syntax highlighting
        """
        super().__init__(BlockType.CODE_BLOCK)
        self._language = language
        self._lines: List[str] = []
    @property
    def language(self) -> str:
        """Get the programming language"""
        return self._language
    @language.setter
    def language(self, language: str):
        """Set the programming language"""
        self._language = language
    def add_line(self, line: str):
        """
        Add a line of code to this code block.
        Args:
            line: The line of code to add
        """
        self._lines.append(line)
    def lines(self) -> Iterator[Tuple[int, str]]:
        """
        Iterate over the lines in this code block.
        Yields:
            Tuples of (line_number, line_text) for each line
        """
        for i, line in enumerate(self._lines):
            yield i, line
    @property
    def line_count(self) -> int:
        """Get the number of lines in this code block"""
        return len(self._lines)
 class ListStyle(Enum):
    """Enumeration of list styles"""
    UNORDERED = 1  # <ul>
    ORDERED = 2    # <ol>
    DEFINITION = 3 # <dl>
 class HList(Block):
    """
    An HTML list element (ul, ol, dl).
    """
    def __init__(self, style: ListStyle = ListStyle.UNORDERED):
        """
        Initialize a list.
        Args:
            style: The style of list (unordered, ordered, definition)
        """
        super().__init__(BlockType.LIST)
        self._style = style
        self._items: List[ListItem] = []
    @property
    def style(self) -> ListStyle:
        """Get the list style"""
        return self._style
    @style.setter
    def style(self, style: ListStyle):
        """Set the list style"""
        self._style = style
    def add_item(self, item: 'ListItem'):
        """
        Add an item to this list.
        Args:
            item: The ListItem to add
        """
        self._items.append(item)
        item.parent = self
    def items(self) -> Iterator['ListItem']:
        """
        Iterate over the items in this list.
        Yields:
            Each ListItem in the list
        """
        for item in self._items:
            yield item
    @property
    def item_count(self) -> int:
        """Get the number of items in this list"""
        return len(self._items)
 class ListItem(Block):
    """
    A list item element that can contain other block elements.
    """
    def __init__(self, term: Optional[str] = None):
        """
        Initialize a list item.
        Args:
            term: Optional term for definition lists (dt element)
        """
        super().__init__(BlockType.LIST_ITEM)
        self._blocks: List[Block] = []
        self._term = term
    @property
    def term(self) -> Optional[str]:
        """Get the definition term (for definition lists)"""
        return self._term
    @term.setter
    def term(self, term: str):
        """Set the definition term"""
        self._term = term
    def add_block(self, block: Block):
        """
        Add a block element to this list item.
        Args:
            block: The Block object to add
        """
        self._blocks.append(block)
        block.parent = self
    def blocks(self) -> Iterator[Block]:
        """
        Iterate over the blocks in this list item.
        Yields:
            Each Block in the list item
        """
        for block in self._blocks:
            yield block
 class TableCell(Block):
    """
    A table cell element that can contain other block elements.
    """
    def __init__(self, is_header: bool = False, colspan: int = 1, rowspan: int = 1):
        """
        Initialize a table cell.
        Args:
            is_header: Whether this cell is a header cell (th) or data cell (td)
            colspan: Number of columns this cell spans
            rowspan: Number of rows this cell spans
        """
        super().__init__(BlockType.TABLE_CELL)
        self._is_header = is_header
        self._colspan = colspan
        self._rowspan = rowspan
        self._blocks: List[Block] = []
    @property
    def is_header(self) -> bool:
        """Check if this is a header cell"""
        return self._is_header
    @is_header.setter
    def is_header(self, is_header: bool):
        """Set whether this is a header cell"""
        self._is_header = is_header
    @property
    def colspan(self) -> int:
        """Get the column span"""
        return self._colspan
    @colspan.setter
    def colspan(self, colspan: int):
        """Set the column span"""
        self._colspan = max(1, colspan)  # Ensure minimum of 1
    @property
    def rowspan(self) -> int:
        """Get the row span"""
        return self._rowspan
    @rowspan.setter
    def rowspan(self, rowspan: int):
        """Set the row span"""
        self._rowspan = max(1, rowspan)  # Ensure minimum of 1
    def add_block(self, block: Block):
        """
        Add a block element to this cell.
        Args:
            block: The Block object to add
        """
        self._blocks.append(block)
        block.parent = self
    def blocks(self) -> Iterator[Block]:
        """
        Iterate over the blocks in this cell.
        Yields:
            Each Block in the cell
        """
        for block in self._blocks:
            yield block
 class TableRow(Block):
    """
    A table row element containing table cells.
    """
    def __init__(self):
        """Initialize an empty table row"""
        super().__init__(BlockType.TABLE_ROW)
        self._cells: List[TableCell] = []
    def add_cell(self, cell: TableCell):
        """
        Add a cell to this row.
        Args:
            cell: The TableCell to add
        """
        self._cells.append(cell)
        cell.parent = self
    def cells(self) -> Iterator[TableCell]:
        """
        Iterate over the cells in this row.
        Yields:
            Each TableCell in the row
        """
        for cell in self._cells:
            yield cell
    @property
    def cell_count(self) -> int:
        """Get the number of cells in this row"""
        return len(self._cells)
 class Table(Block):
    """
    A table element containing rows and cells.
    """
    def __init__(self, caption: Optional[str] = None):
        """
        Initialize a table.
        Args:
            caption: Optional caption for the table
        """
        super().__init__(BlockType.TABLE)
        self._caption = caption
        self._rows: List[TableRow] = []
        self._header_rows: List[TableRow] = []
        self._footer_rows: List[TableRow] = []
    @property
    def caption(self) -> Optional[str]:
        """Get the table caption"""
        return self._caption
    @caption.setter
    def caption(self, caption: Optional[str]):
        """Set the table caption"""
        self._caption = caption
    def add_row(self, row: TableRow, section: str = "body"):
        """
        Add a row to this table.
        Args:
            row: The TableRow to add
            section: The section to add the row to ("header", "body", or "footer")
        """
        row.parent = self
        if section.lower() == "header":
            self._header_rows.append(row)
        elif section.lower() == "footer":
            self._footer_rows.append(row)
        else:  # Default to body
            self._rows.append(row)
    def header_rows(self) -> Iterator[TableRow]:
        """
        Iterate over the header rows in this table.
        Yields:
            Each TableRow in the table header
        """
        for row in self._header_rows:
            yield row
    def body_rows(self) -> Iterator[TableRow]:
        """
        Iterate over the body rows in this table.
        Yields:
            Each TableRow in the table body
        """
        for row in self._rows:
            yield row
    def footer_rows(self) -> Iterator[TableRow]:
        """
        Iterate over the footer rows in this table.
        Yields:
            Each TableRow in the table footer
        """
        for row in self._footer_rows:
            yield row
    def all_rows(self) -> Iterator[Tuple[str, TableRow]]:
        """
        Iterate over all rows in this table with their section.
        Yields:
            Tuples of (section, row) for each row
        """
        for row in self._header_rows:
            yield "header", row
        for row in self._rows:
            yield "body", row
        for row in self._footer_rows:
            yield "footer", row
    @property
    def row_count(self) -> Dict[str, int]:
        """Get the number of rows in each section"""
        return {
            "header": len(self._header_rows),
            "body": len(self._rows),
            "footer": len(self._footer_rows),
            "total": len(self._header_rows) + len(self._rows) + len(self._footer_rows)
        }
 class HorizontalRule(Block):
    """
    A horizontal rule element (<hr>).
    """
    def __init__(self):
        """Initialize a horizontal rule"""
        super().__init__(BlockType.HORIZONTAL_RULE)
 class LineBreak(Block):
    """
    A line break element (<br>).
    """
    def __init__(self):
        """Initialize a line break"""
        super().__init__(BlockType.LINE_BREAK)
 class Image(Block):
    """
    An image element that can be displayed in a document.
    """
    def __init__(self, source: str, alt_text: Optional[str] = None, 
                 width: Optional[int] = None, height: Optional[int] = None):
        """
        Initialize an image.
        Args:
            source: The path or URL to the image
            alt_text: Alternative text description of the image
            width: Optional width to display the image
            height: Optional height to display the image
        """
        super().__init__(BlockType.IMAGE)
        self._source = source
        self._alt_text = alt_text or ""
        self._width = width
        self._height = height
        self._loaded_image = None
        self._error = None
        # Try to load the image immediately
        self.load()
    @property
    def source(self) -> str:
        """Get the image source path or URL"""
        return self._source
    @source.setter
    def source(self, source: str):
        """Set the image source path or URL"""
        self._source = source
        self._loaded_image = None  # Reset loaded image when source changes
        self._error = None
        # Try to load the image with the new source
        self.load()
    @property
    def alt_text(self) -> str:
        """Get the alternative text for the image"""
        return self._alt_text
    @alt_text.setter
    def alt_text(self, alt_text: str):
        """Set the alternative text for the image"""
        self._alt_text = alt_text
    @property
    def width(self) -> Optional[int]:
        """Get the specified width for the image"""
        return self._width
    @width.setter
    def width(self, width: Optional[int]):
        """Set the specified width for the image"""
        self._width = width
    @property
    def height(self) -> Optional[int]:
        """Get the specified height for the image"""
        return self._height
    @height.setter
    def height(self, height: Optional[int]):
        """Set the specified height for the image"""
        self._height = height
    @property
    def loaded_image(self):
        """Get the loaded image data, if available"""
        return self._loaded_image
    @property
    def error(self) -> Optional[str]:
        """Get any error message from attempting to load the image"""
        return self._error
    def load(self):
        """
        Load the image from the source.
        This method handles loading from local files and URLs.
        Returns:
            True if the image was loaded successfully, False otherwise
        """
        try:
            import os
            from PIL import Image as PILImage
            # Handle different types of sources
            if os.path.isfile(self._source):
                # Local file
                self._loaded_image = PILImage.open(self._source)
                self._error = None
                return True
            elif self._source.startswith(('http://', 'https://')):
                # URL - requires requests library
                try:
                    import requests
                    from io import BytesIO
                    response = requests.get(self._source, stream=True)
                    if response.status_code == 200:
                        self._loaded_image = PILImage.open(BytesIO(response.content))
                        self._error = None
                        return True
                    else:
                        self._error = f"Failed to load image: HTTP status {response.status_code}"
                        return False
                except ImportError:
                    self._error = "Requests library not available for URL loading"
                    return False
                except Exception as e:
                    self._error = f"Error loading image from URL: {str(e)}"
                    return False
            elif self._source.startswith('data:image/'):
                # Data URI
                try:
                    import base64
                    from io import BytesIO
                    # Parse the data URI
                    # Format: data:image/png;base64,<base64-encoded-data>
                    header, encoded = self._source.split(',', 1)
                    mime_type = header.split(';')[0].split(':')[1]
                    # Decode the base64 data
                    decoded = base64.b64decode(encoded)
                    self._loaded_image = PILImage.open(BytesIO(decoded))
                    self._error = None
                    return True
                except Exception as e:
                    self._error = f"Error loading image from data URI: {str(e)}"
                    return False
            else:
                self._error = f"Unable to load image from source: {self._source}"
                return False
        except ImportError as e:
            self._error = f"PIL library not available: {str(e)}"
            return False
        except Exception as e:
            self._error = f"Error loading image: {str(e)}"
            return False
    def get_dimensions(self) -> Tuple[Optional[int], Optional[int]]:
        """
        Get the dimensions of the image.
        Returns:
            A tuple of (width, height), or (None, None) if the image is not loaded
        """
        if self._loaded_image:
            return self._loaded_image.size
        return self._width, self._height
    def get_aspect_ratio(self) -> Optional[float]:
        """
        Get the aspect ratio of the image (width/height).
        Returns:
            The aspect ratio as a float, or None if the image is not loaded
            and no dimensions are specified
        """
        if self._loaded_image:
            width, height = self._loaded_image.size
            if height > 0:
                return width / height
        elif self._width is not None and self._height is not None and self._height > 0:
            return self._width / self._height
        return None
    def calculate_scaled_dimensions(self, max_width: Optional[int] = None, 
                                   max_height: Optional[int] = None) -> Tuple[int, int]:
        """
        Calculate the scaled dimensions of the image within constraints.
        Args:
            max_width: The maximum width constraint
            max_height: The maximum height constraint
        Returns:
            A tuple of (width, height) that fits within the constraints
            while maintaining the aspect ratio
        """
        # Use specified dimensions if available
        if self._width is not None and self._height is not None:
            return self._width, self._height
        # If image is loaded, use its dimensions
        if self._loaded_image:
            orig_width, orig_height = self._loaded_image.size
        else:
            # If no image is loaded and no dimensions specified, use defaults
            return self._width or 300, self._height or 200
        # If only one dimension is specified, calculate the other
        if self._width is not None and self._height is None:
            aspect = orig_width / orig_height
            return self._width, int(self._width / aspect)
        elif self._height is not None and self._width is None:
            aspect = orig_width / orig_height
            return int(self._height * aspect), self._height
        # Apply max constraints if provided
        width, height = orig_width, orig_height
        if max_width is not None and width > max_width:
            height = int(height * (max_width / width))
            width = max_width
        if max_height is not None and height > max_height:
            width = int(width * (max_height / height))
            height = max_height
        return width, height
--- a/pyWebLayout/abstract/document.py
+++ b/pyWebLayout/abstract/document.py
@ -0,0 +1,377 @@
 from __future__ import annotations
 from typing import List, Dict, Optional, Tuple, Union, Any
 from enum import Enum
 from .block import Block, BlockType, Heading, HeadingLevel, Parapgraph
 from .functional import Link, Button, Form
 from .inline import Word, FormattedSpan
 class MetadataType(Enum):
    """Types of metadata that can be associated with a document"""
    TITLE = 1
    AUTHOR = 2
    DESCRIPTION = 3
    KEYWORDS = 4
    LANGUAGE = 5
    PUBLICATION_DATE = 6
    MODIFIED_DATE = 7
    PUBLISHER = 8
    IDENTIFIER = 9
    COVER_IMAGE = 10
    CUSTOM = 100
 class Document:
    """
    Abstract representation of a complete document like an HTML page or an ebook.
    This class manages the logical structure of the document without rendering concerns.
    """
    def __init__(self, title: Optional[str] = None, language: str = "en-US"):
        """
        Initialize a new document.
        Args:
            title: The document title
            language: The document language code
        """
        self._blocks: List[Block] = []
        self._metadata: Dict[MetadataType, Any] = {}
        self._anchors: Dict[str, Block] = {}  # Named anchors for navigation
        self._resources: Dict[str, Any] = {}  # External resources like images
        self._stylesheets: List[Dict[str, Any]] = []  # CSS stylesheets
        self._scripts: List[str] = []  # JavaScript code
        # Set basic metadata
        if title:
            self.set_metadata(MetadataType.TITLE, title)
        self.set_metadata(MetadataType.LANGUAGE, language)
    @property
    def blocks(self) -> List[Block]:
        """Get the top-level blocks in this document"""
        return self._blocks
    def add_block(self, block: Block):
        """
        Add a block to this document.
        Args:
            block: The block to add
        """
        self._blocks.append(block)
    def set_metadata(self, meta_type: MetadataType, value: Any):
        """
        Set a metadata value.
        Args:
            meta_type: The type of metadata
            value: The metadata value
        """
        self._metadata[meta_type] = value
    def get_metadata(self, meta_type: MetadataType) -> Optional[Any]:
        """
        Get a metadata value.
        Args:
            meta_type: The type of metadata
        Returns:
            The metadata value, or None if not set
        """
        return self._metadata.get(meta_type)
    def add_anchor(self, name: str, target: Block):
        """
        Add a named anchor to this document.
        Args:
            name: The anchor name
            target: The target block
        """
        self._anchors[name] = target
    def get_anchor(self, name: str) -> Optional[Block]:
        """
        Get a named anchor from this document.
        Args:
            name: The anchor name
        Returns:
            The target block, or None if not found
        """
        return self._anchors.get(name)
    def add_resource(self, name: str, resource: Any):
        """
        Add a resource to this document.
        Args:
            name: The resource name
            resource: The resource data
        """
        self._resources[name] = resource
    def get_resource(self, name: str) -> Optional[Any]:
        """
        Get a resource from this document.
        Args:
            name: The resource name
        Returns:
            The resource data, or None if not found
        """
        return self._resources.get(name)
    def add_stylesheet(self, stylesheet: Dict[str, Any]):
        """
        Add a stylesheet to this document.
        Args:
            stylesheet: The stylesheet data
        """
        self._stylesheets.append(stylesheet)
    def add_script(self, script: str):
        """
        Add a script to this document.
        Args:
            script: The script code
        """
        self._scripts.append(script)
    def get_title(self) -> Optional[str]:
        """
        Get the document title.
        Returns:
            The document title, or None if not set
        """
        return self.get_metadata(MetadataType.TITLE)
    def set_title(self, title: str):
        """
        Set the document title.
        Args:
            title: The document title
        """
        self.set_metadata(MetadataType.TITLE, title)
    def find_blocks_by_type(self, block_type: BlockType) -> List[Block]:
        """
        Find all blocks of a specific type.
        Args:
            block_type: The type of blocks to find
        Returns:
            A list of matching blocks
        """
        result = []
        def _find_recursive(blocks: List[Block]):
            for block in blocks:
                if block.block_type == block_type:
                    result.append(block)
                # Check for child blocks based on block type
                if hasattr(block, '_blocks'):
                    _find_recursive(block._blocks)
                elif hasattr(block, '_items') and isinstance(block._items, list):
                    _find_recursive(block._items)
        _find_recursive(self._blocks)
        return result
    def find_headings(self) -> List[Heading]:
        """
        Find all headings in the document.
        Returns:
            A list of heading blocks
        """
        blocks = self.find_blocks_by_type(BlockType.HEADING)
        return [block for block in blocks if isinstance(block, Heading)]
    def generate_table_of_contents(self) -> List[Tuple[int, str, Block]]:
        """
        Generate a table of contents from headings.
        Returns:
            A list of tuples containing (level, title, heading_block)
        """
        headings = self.find_headings()
        toc = []
        for heading in headings:
            # Extract text from the heading
            title = ""
            for _, word in heading.words():
                title += word.text + " "
            title = title.strip()
            # Add to TOC
            level = heading.level.value  # Get numeric value from HeadingLevel enum
            toc.append((level, title, heading))
        return toc
 class Chapter:
    """
    Represents a chapter or section in a document.
    A chapter contains a sequence of blocks and has metadata.
    """
    def __init__(self, title: Optional[str] = None, level: int = 1):
        """
        Initialize a new chapter.
        Args:
            title: The chapter title
            level: The chapter level (1 = top level, 2 = subsection, etc.)
        """
        self._title = title
        self._level = level
        self._blocks: List[Block] = []
        self._metadata: Dict[str, Any] = {}
    @property
    def title(self) -> Optional[str]:
        """Get the chapter title"""
        return self._title
    @title.setter
    def title(self, title: str):
        """Set the chapter title"""
        self._title = title
    @property
    def level(self) -> int:
        """Get the chapter level"""
        return self._level
    @property
    def blocks(self) -> List[Block]:
        """Get the blocks in this chapter"""
        return self._blocks
    def add_block(self, block: Block):
        """
        Add a block to this chapter.
        Args:
            block: The block to add
        """
        self._blocks.append(block)
    def set_metadata(self, key: str, value: Any):
        """
        Set a metadata value.
        Args:
            key: The metadata key
            value: The metadata value
        """
        self._metadata[key] = value
    def get_metadata(self, key: str) -> Optional[Any]:
        """
        Get a metadata value.
        Args:
            key: The metadata key
        Returns:
            The metadata value, or None if not set
        """
        return self._metadata.get(key)
 class Book(Document):
    """
    Abstract representation of an ebook.
    A book is a document that contains chapters.
    """
    def __init__(self, title: Optional[str] = None, author: Optional[str] = None, language: str = "en-US"):
        """
        Initialize a new book.
        Args:
            title: The book title
            author: The book author
            language: The book language code
        """
        super().__init__(title, language)
        self._chapters: List[Chapter] = []
        if author:
            self.set_metadata(MetadataType.AUTHOR, author)
    @property
    def chapters(self) -> List[Chapter]:
        """Get the chapters in this book"""
        return self._chapters
    def add_chapter(self, chapter: Chapter):
        """
        Add a chapter to this book.
        Args:
            chapter: The chapter to add
        """
        self._chapters.append(chapter)
    def create_chapter(self, title: Optional[str] = None, level: int = 1) -> Chapter:
        """
        Create and add a new chapter.
        Args:
            title: The chapter title
            level: The chapter level
        Returns:
            The new chapter
        """
        chapter = Chapter(title, level)
        self.add_chapter(chapter)
        return chapter
    def get_author(self) -> Optional[str]:
        """
        Get the book author.
        Returns:
            The book author, or None if not set
        """
        return self.get_metadata(MetadataType.AUTHOR)
    def set_author(self, author: str):
        """
        Set the book author.
        Args:
            author: The book author
        """
        self.set_metadata(MetadataType.AUTHOR, author)
    def generate_table_of_contents(self) -> List[Tuple[int, str, Chapter]]:
        """
        Generate a table of contents from chapters.
        Returns:
            A list of tuples containing (level, title, chapter)
        """
        toc = []
        for chapter in self._chapters:
            if chapter.title:
                toc.append((chapter.level, chapter.title, chapter))
        return toc
--- a/pyWebLayout/abstract/functional.py
+++ b/pyWebLayout/abstract/functional.py
@ -0,0 +1,310 @@
 from __future__ import annotations
 from enum import Enum
 from typing import Callable, Dict, Any, Optional, Union, List, Tuple
 from pyWebLayout.base import Interactable
 class LinkType(Enum):
    """Enumeration of different types of links for classification purposes"""
    INTERNAL = 1  # Links within the same document (e.g., chapter references, bookmarks)
    EXTERNAL = 2  # Links to external resources (e.g., websites, other documents)
    API = 3       # Links that trigger API calls (e.g., for settings management)
    FUNCTION = 4  # Links that execute a specific function
 class Link(Interactable):
    """
    A link that can navigate to a location or execute a function.
    Links can be used for navigation within a document, to external resources,
    or to trigger API calls for functionality like settings management.
    """
    def __init__(self, 
                 location: str, 
                 link_type: LinkType = LinkType.INTERNAL,
                 callback: Optional[Callable] = None,
                 params: Optional[Dict[str, Any]] = None,
                 title: Optional[str] = None):
        """
        Initialize a link.
        Args:
            location: The target location or identifier for this link
            link_type: The type of link (internal, external, API, function)
            callback: Optional callback function to execute when the link is activated
            params: Optional parameters to pass to the callback or API
            title: Optional title/tooltip for the link
        """
        super().__init__(callback)
        self._location = location
        self._link_type = link_type
        self._params = params or {}
        self._title = title
    @property
    def location(self) -> str:
        """Get the target location of this link"""
        return self._location
    @property
    def link_type(self) -> LinkType:
        """Get the type of this link"""
        return self._link_type
    @property
    def params(self) -> Dict[str, Any]:
        """Get the parameters for this link"""
        return self._params
    @property
    def title(self) -> Optional[str]:
        """Get the title/tooltip for this link"""
        return self._title
    def execute(self) -> Any:
        """
        Execute the link action based on its type.
        For internal and external links, returns the location.
        For API and function links, executes the callback with the provided parameters.
        Returns:
            The result of the link execution, which depends on the link type.
        """
        if self._link_type in (LinkType.API, LinkType.FUNCTION) and self._callback:
            return self._callback(self._location, **self._params)
        else:
            # For INTERNAL and EXTERNAL links, return the location
            # The renderer/browser will handle the navigation
            return self._location
 class Button(Interactable):
    """
    A button that can be clicked to execute an action.
    Buttons are similar to function links but are rendered differently.
    """
    def __init__(self, 
                 label: str,
                 callback: Callable,
                 params: Optional[Dict[str, Any]] = None,
                 enabled: bool = True):
        """
        Initialize a button.
        Args:
            label: The text label for the button
            callback: The function to execute when the button is clicked
            params: Optional parameters to pass to the callback
            enabled: Whether the button is initially enabled
        """
        super().__init__(callback)
        self._label = label
        self._params = params or {}
        self._enabled = enabled
    @property
    def label(self) -> str:
        """Get the button label"""
        return self._label
    @label.setter
    def label(self, label: str):
        """Set the button label"""
        self._label = label
    @property
    def enabled(self) -> bool:
        """Check if the button is enabled"""
        return self._enabled
    @enabled.setter
    def enabled(self, enabled: bool):
        """Enable or disable the button"""
        self._enabled = enabled
    def execute(self) -> Any:
        """
        Execute the button's callback function if the button is enabled.
        Returns:
            The result of the callback function, or None if the button is disabled.
        """
        if self._enabled and self._callback:
            return self._callback(**self._params)
        return None
 class Form(Interactable):
    """
    A form that can contain input fields and be submitted.
    Forms can be used for user input and settings configuration.
    """
    def __init__(self, 
                 form_id: str,
                 action: Optional[str] = None,
                 callback: Optional[Callable] = None):
        """
        Initialize a form.
        Args:
            form_id: The unique identifier for this form
            action: The action URL or endpoint for form submission
            callback: Optional callback function to execute on form submission
        """
        super().__init__(callback)
        self._form_id = form_id
        self._action = action
        self._fields: Dict[str, FormField] = {}
    @property
    def form_id(self) -> str:
        """Get the form ID"""
        return self._form_id
    @property
    def action(self) -> Optional[str]:
        """Get the form action"""
        return self._action
    def add_field(self, field: FormField):
        """
        Add a field to this form.
        Args:
            field: The FormField to add
        """
        self._fields[field.name] = field
        field.form = self
    def get_field(self, name: str) -> Optional[FormField]:
        """
        Get a field by name.
        Args:
            name: The name of the field to get
        Returns:
            The FormField with the specified name, or None if not found
        """
        return self._fields.get(name)
    def get_values(self) -> Dict[str, Any]:
        """
        Get the current values of all fields in this form.
        Returns:
            A dictionary mapping field names to their current values
        """
        return {name: field.value for name, field in self._fields.items()}
    def execute(self) -> Any:
        """
        Submit the form, executing the callback with the form values.
        Returns:
            The result of the callback function, or the form values if no callback is provided.
        """
        values = self.get_values()
        if self._callback:
            return self._callback(self._form_id, values)
        return values
 class FormFieldType(Enum):
    """Enumeration of different types of form fields"""
    TEXT = 1
    PASSWORD = 2
    CHECKBOX = 3
    RADIO = 4
    SELECT = 5
    TEXTAREA = 6
    NUMBER = 7
    DATE = 8
    TIME = 9
    EMAIL = 10
    URL = 11
    COLOR = 12
    RANGE = 13
    HIDDEN = 14
 class FormField:
    """
    A field in a form that can accept user input.
    """
    def __init__(self, 
                 name: str,
                 field_type: FormFieldType,
                 label: Optional[str] = None,
                 value: Any = None,
                 required: bool = False,
                 options: Optional[List[Tuple[str, str]]] = None):
        """
        Initialize a form field.
        Args:
            name: The name of this field
            field_type: The type of this field
            label: Optional label for this field
            value: Initial value for this field
            required: Whether this field is required
            options: Options for select, radio, or checkbox fields (list of (value, label) tuples)
        """
        self._name = name
        self._field_type = field_type
        self._label = label or name
        self._value = value
        self._required = required
        self._options = options or []
        self._form: Optional[Form] = None
    @property
    def name(self) -> str:
        """Get the field name"""
        return self._name
    @property
    def field_type(self) -> FormFieldType:
        """Get the field type"""
        return self._field_type
    @property
    def label(self) -> str:
        """Get the field label"""
        return self._label
    @property
    def value(self) -> Any:
        """Get the current field value"""
        return self._value
    @value.setter
    def value(self, value: Any):
        """Set the field value"""
        self._value = value
    @property
    def required(self) -> bool:
        """Check if the field is required"""
        return self._required
    @property
    def options(self) -> List[Tuple[str, str]]:
        """Get the field options"""
        return self._options
    @property
    def form(self) -> Optional[Form]:
        """Get the form containing this field"""
        return self._form
    @form.setter
    def form(self, form: Form):
        """Set the form containing this field"""
        self._form = form
--- a/pyWebLayout/abstract/inline.py
+++ b/pyWebLayout/abstract/inline.py
@ -0,0 +1,208 @@
 from __future__ import annotations
 from pyWebLayout.base import Queriable
 from pyWebLayout.style import Font
 from typing import Tuple, Union, List, Optional, Dict
 class Word:
    """
    An abstract representation of a word in a document. Words can be split across
    lines or pages during rendering. This class manages the logical representation
    of a word without any rendering specifics.
    """
    def __init__(self, text: str, style: Font, background=None, previous: Union[Word, None] = None):
        """
        Initialize a new Word.
        Args:
            text: The text content of the word
            style: Font style information for the word
            background: Optional background color override
            previous: Reference to the previous word in sequence
        """
        self._text = text
        self._style = style
        self._background = background if background else style.background
        self._previous = previous
        self._next = None
        self._hyphenated_parts = None  # Will store hyphenated parts if word is hyphenated
    @property
    def text(self) -> str:
        """Get the text content of the word"""
        return self._text
    @property
    def style(self) -> Font:
        """Get the font style of the word"""
        return self._style
    @property
    def background(self):
        """Get the background color of the word"""
        return self._background
    @property
    def previous(self) -> Union[Word, None]:
        """Get the previous word in sequence"""
        return self._previous
    @property
    def next(self) -> Union[Word, None]:
        """Get the next word in sequence"""
        return self._next
    @property
    def hyphenated_parts(self) -> Union[List[str], None]:
        """Get the hyphenated parts of the word if it has been hyphenated"""
        return self._hyphenated_parts
    def add_next(self, next_word: Word):
        """Set the next word in sequence"""
        self._next = next_word
    def can_hyphenate(self, language: str = None) -> bool:
        """
        Check if the word can be hyphenated.
        Args:
            language: Language code for hyphenation. If None, uses the style's language.
        Returns:
            bool: True if the word can be hyphenated, False otherwise.
        """
        # Only import pyphen when needed
        import pyphen
        # Use the provided language or fall back to style language
        lang = language if language else self._style.language
        dic = pyphen.Pyphen(lang=lang)
        # Check if the word can be hyphenated
        hyphenated = dic.inserted(self._text, hyphen='-')
        return '-' in hyphenated
    def hyphenate(self, language: str = None) -> bool:
        """
        Hyphenate the word and store the parts.
        Args:
            language: Language code for hyphenation. If None, uses the style's language.
        Returns:
            bool: True if the word was hyphenated, False otherwise.
        """
        # Only import pyphen when needed
        import pyphen
        # Use the provided language or fall back to style language
        lang = language if language else self._style.language
        dic = pyphen.Pyphen(lang=lang)
        # Get hyphenated version
        hyphenated = dic.inserted(self._text, hyphen='-')
        # If no hyphens were inserted, the word cannot be hyphenated
        if '-' not in hyphenated:
            return False
        # Split the word into parts by the hyphen
        parts = hyphenated.split('-')
        # Add the hyphen to all parts except the last one
        for i in range(len(parts) - 1):
            parts[i] = parts[i] + '-'
        self._hyphenated_parts = parts
        return True
    def dehyphenate(self):
        """Remove hyphenation"""
        self._hyphenated_parts = None
    def get_hyphenated_part(self, index: int) -> str:
        """
        Get a specific hyphenated part of the word.
        Args:
            index: The index of the part to retrieve.
        Returns:
            The text of the specified part.
        Raises:
            IndexError: If the index is out of range or the word has not been hyphenated.
        """
        if not self._hyphenated_parts:
            raise IndexError("Word has not been hyphenated")
        return self._hyphenated_parts[index]
    def get_hyphenated_part_count(self) -> int:
        """
        Get the number of hyphenated parts.
        Returns:
            The number of parts, or 0 if the word has not been hyphenated.
        """
        return len(self._hyphenated_parts) if self._hyphenated_parts else 0
 class FormattedSpan:
    """
    A run of words with consistent formatting.
    This represents a sequence of words that share the same style attributes.
    """
    def __init__(self, style: Font, background=None):
        """
        Initialize a new formatted span.
        Args:
            style: Font style information for all words in this span
            background: Optional background color override
        """
        self._style = style
        self._background = background if background else style.background
        self._words: List[Word] = []
    @property
    def style(self) -> Font:
        """Get the font style of this span"""
        return self._style
    @property
    def background(self):
        """Get the background color of this span"""
        return self._background
    @property
    def words(self) -> List[Word]:
        """Get the list of words in this span"""
        return self._words
    def add_word(self, text: str) -> Word:
        """
        Create and add a new word to this span.
        Args:
            text: The text content of the word
        Returns:
            The newly created Word object
        """
        # Get the previous word if any
        previous = self._words[-1] if self._words else None
        # Create the new word
        word = Word(text, self._style, self._background, previous)
        # Link the previous word to this new one
        if previous:
            previous.add_next(word)
        # Add the word to our list
        self._words.append(word)
        return word
--- a/pyWebLayout/base.py
+++ b/pyWebLayout/base.py
@ -0,0 +1,68 @@
 from abc import ABC
 import numpy as np
 from pyWebLayout.style import Alignment
 class Renderable(ABC):
    """
    Abstract base class for any object that can be rendered to an image.
    All renderable objects must implement the render method.
    """
    def render(self):
        """
        Render the object to an image.
        Returns:
            PIL.Image: The rendered image
        """
        pass
 class Interactable(ABC):
    """
    Abstract base class for any object that can be interacted with.
    Interactable objects must have a callback that is executed when interacted with.
    """
    def __init__(self, callback=None):
        """
        Initialize an interactable object.
        Args:
            callback: The function to call when this object is interacted with
        """
        self._callback = callback
    def interact(self, point: np.generic):
        """
        Handle interaction at the given point.
        Args:
            point: The coordinates of the interaction
        Returns:
            The result of calling the callback function with the point
        """
        if self._callback is None:
            return None
        return self._callback(point)
 class Layoutable(ABC):
    """
    Abstract base class for any object that can be laid out.
    Layoutable objects must implement the layout method which arranges their contents.
    """
    def layout(self):
        """
        Layout the object's contents.
        This method should be called before rendering to properly arrange the object's contents.
        """
        pass
 class Queriable(ABC):
    def in_object(self, point:np.generic):
        """
        check if a point is in the object
        """
        pass
--- a/pyWebLayout/concrete/init.py
+++ b/pyWebLayout/concrete/init.py
@ -0,0 +1,5 @@
 from .box import Box
 from .page import Container, Page
 from .text import Text, RenderableWord, Line
 from .functional import RenderableLink, RenderableButton, RenderableForm, RenderableFormField
 from .image import RenderableImage
--- a/pyWebLayout/concrete/box.py
+++ b/pyWebLayout/concrete/box.py
@ -0,0 +1,61 @@
 import numpy as np
 from PIL import Image
 from pyWebLayout.base import Renderable, Queriable
 from pyWebLayout.layout import Alignment
 class Box(Renderable, Queriable):
    def __init__(self,origin, size, callback = None, sheet : Image = None, mode: bool = None, halign=Alignment.CENTER, valign = Alignment.CENTER):
        self._origin = np.array(origin)
        self._size = np.array(size)
        self._end = self._origin +  self._size
        self._callback = callback
        self._sheet : Image = sheet
        if self._sheet == None:
            self._mode = mode
        else:
            self._mode = sheet.mode
        self._halign = halign
        self._valign = valign
    def in_shape(self, point):
        return np.all((point >= self.origin) & (point < self._end), axis=-1)
    def render(self) -> Image:
        # Create a new image canvas
        if self._sheet is not None:
            canvas = Image.new(self._sheet.mode, tuple(self._size))
        else:
            # Default to RGBA if no sheet is provided
            canvas = Image.new(self._mode if self._mode else 'RGBA', tuple(self._size))
        # Check if there's content to render
        if hasattr(self, '_content') and self._content is not None:
            content_render = self._content.render()
            # Calculate positioning based on alignment
            content_width, content_height = content_render.size
            box_width, box_height = self._size
            # Horizontal alignment
            if self._halign == Alignment.LEFT:
                x_offset = 0
            elif self._halign == Alignment.RIGHT:
                x_offset = box_width - content_width
            else:  # CENTER is default
                x_offset = (box_width - content_width) // 2
            # Vertical alignment
            if self._valign == Alignment.TOP:
                y_offset = 0
            elif self._valign == Alignment.BOTTOM:
                y_offset = box_height - content_height
            else:  # CENTER is default
                y_offset = (box_height - content_height) // 2
            # Paste the content onto the canvas
            canvas.paste(content_render, (x_offset, y_offset))
        return canvas
--- a/pyWebLayout/concrete/functional.py
+++ b/pyWebLayout/concrete/functional.py
@ -0,0 +1,545 @@
 from __future__ import annotations
 from typing import Optional, Dict, Any, Tuple, List, Union
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 from pyWebLayout.base import Renderable, Queriable
 from pyWebLayout.abstract.functional import Link, Button, Form, FormField, LinkType, FormFieldType
 from pyWebLayout.style import Font, TextDecoration
 from .box import Box
 from .text import Text
 class RenderableLink(Box, Queriable):
    """
    A concrete implementation for rendering Link objects.
    """
    def __init__(self, link: Link, text: str, font: Font, 
                 padding: Tuple[int, int, int, int] = (2, 4, 2, 4),
                 origin=None, size=None, callback=None, sheet=None, mode=None):
        """
        Initialize a renderable link.
        Args:
            link: The abstract Link object to render
            text: The text to display for the link
            font: The font to use for the link text
            padding: Padding as (top, right, bottom, left)
            origin: Optional origin coordinates
            size: Optional size override
            callback: Optional callback override
            sheet: Optional sheet for rendering
            mode: Optional mode for rendering
        """
        # Create link style font (typically underlined and colored)
        link_font = font.with_decoration(TextDecoration.UNDERLINE)
        if link.link_type == LinkType.INTERNAL:
            link_font = link_font.with_colour((0, 0, 200))  # Blue for internal links
        elif link.link_type == LinkType.EXTERNAL:
            link_font = link_font.with_colour((0, 0, 180))  # Darker blue for external links
        elif link.link_type == LinkType.API:
            link_font = link_font.with_colour((150, 0, 0))  # Red for API links
        elif link.link_type == LinkType.FUNCTION:
            link_font = link_font.with_colour((0, 120, 0))  # Green for function links
        # Create the text object for the link
        self._text_obj = Text(text, link_font)
        # Calculate size if not provided
        if size is None:
            text_width, text_height = self._text_obj.size
            size = (
                text_width + padding[1] + padding[3],  # width + right + left padding
                text_height + padding[0] + padding[2]  # height + top + bottom padding
            )
        # Use the link's callback if none provided
        if callback is None:
            callback = link.execute
        # Initialize the box
        super().__init__(origin or (0, 0), size, callback, sheet, mode)
        # Store the link object and rendering properties
        self._link = link
        self._padding = padding
        self._hovered = False
    @property
    def link(self) -> Link:
        """Get the abstract Link object"""
        return self._link
    def render(self) -> Image.Image:
        """
        Render the link.
        Returns:
            A PIL Image containing the rendered link
        """
        # Create the base canvas
        canvas = super().render()
        draw = ImageDraw.Draw(canvas)
        # Position the text within the padding
        text_x = self._padding[3]  # left padding
        text_y = self._padding[0]  # top padding
        # Render the text object
        text_img = self._text_obj.render()
        # Paste the text onto the canvas
        canvas.paste(text_img, (text_x, text_y), text_img)
        # Draw a highlight background if hovered
        if self._hovered:
            # Draw a semi-transparent highlight
            highlight_color = (220, 220, 255, 100)  # Light blue with alpha
            draw.rectangle([(0, 0), self._size], fill=highlight_color)
        return canvas
    def set_hovered(self, hovered: bool):
        """Set whether the link is being hovered over"""
        self._hovered = hovered
    def in_object(self, point):
        """Check if a point is within this link"""
        point_array = np.array(point)
        relative_point = point_array - self._origin
        # Check if the point is within the link boundaries
        return (0 <= relative_point[0] < self._size[0] and 
                0 <= relative_point[1] < self._size[1])
 class RenderableButton(Box, Queriable):
    """
    A concrete implementation for rendering Button objects.
    """
    def __init__(self, button: Button, font: Font, 
                 padding: Tuple[int, int, int, int] = (6, 10, 6, 10),
                 border_radius: int = 4,
                 origin=None, size=None, callback=None, sheet=None, mode=None):
        """
        Initialize a renderable button.
        Args:
            button: The abstract Button object to render
            font: The font to use for the button text
            padding: Padding as (top, right, bottom, left)
            border_radius: Radius for rounded corners
            origin: Optional origin coordinates
            size: Optional size override
            callback: Optional callback override
            sheet: Optional sheet for rendering
            mode: Optional mode for rendering
        """
        # Create the text object for the button
        self._text_obj = Text(button.label, font)
        # Calculate size if not provided
        if size is None:
            text_width, text_height = self._text_obj.size
            size = (
                text_width + padding[1] + padding[3],  # width + right + left padding
                text_height + padding[0] + padding[2]  # height + top + bottom padding
            )
        # Use the button's callback if none provided
        if callback is None:
            callback = button.execute
        # Initialize the box
        super().__init__(origin or (0, 0), size, callback, sheet, mode)
        # Store the button object and rendering properties
        self._button = button
        self._padding = padding
        self._border_radius = border_radius
        self._pressed = False
        self._hovered = False
    @property
    def button(self) -> Button:
        """Get the abstract Button object"""
        return self._button
    def render(self) -> Image.Image:
        """
        Render the button.
        Returns:
            A PIL Image containing the rendered button
        """
        # Create the base canvas
        canvas = super().render()
        draw = ImageDraw.Draw(canvas)
        # Determine button colors based on state
        if not self._button.enabled:
            # Disabled button
            bg_color = (200, 200, 200)
            border_color = (150, 150, 150)
            text_color = (100, 100, 100)
        elif self._pressed:
            # Pressed button
            bg_color = (70, 130, 180)
            border_color = (50, 100, 150)
            text_color = (255, 255, 255)
        elif self._hovered:
            # Hovered button
            bg_color = (100, 160, 220)
            border_color = (70, 130, 180)
            text_color = (255, 255, 255)
        else:
            # Normal button
            bg_color = (100, 150, 200)
            border_color = (70, 120, 170)
            text_color = (255, 255, 255)
        # Draw button background with rounded corners
        draw.rounded_rectangle([(0, 0), self._size], fill=bg_color, 
                               outline=border_color, width=1, 
                               radius=self._border_radius)
        # Position the text centered within the button
        text_img = self._text_obj.render()
        text_x = (self._size[0] - text_img.width) // 2
        text_y = (self._size[1] - text_img.height) // 2
        # Paste the text onto the canvas
        canvas.paste(text_img, (text_x, text_y), text_img)
        return canvas
    def set_pressed(self, pressed: bool):
        """Set whether the button is being pressed"""
        self._pressed = pressed
    def set_hovered(self, hovered: bool):
        """Set whether the button is being hovered over"""
        self._hovered = hovered
    def in_object(self, point):
        """Check if a point is within this button"""
        point_array = np.array(point)
        relative_point = point_array - self._origin
        # Check if the point is within the button boundaries
        return (0 <= relative_point[0] < self._size[0] and 
                0 <= relative_point[1] < self._size[1])
 class RenderableForm(Box):
    """
    A concrete implementation for rendering Form objects.
    """
    def __init__(self, form: Form, font: Font,
                 field_padding: Tuple[int, int, int, int] = (5, 10, 5, 10),
                 spacing: int = 10,
                 origin=None, size=None, callback=None, sheet=None, mode=None):
        """
        Initialize a renderable form.
        Args:
            form: The abstract Form object to render
            font: The font to use for form text
            field_padding: Padding for form fields
            spacing: Spacing between form elements
            origin: Optional origin coordinates
            size: Optional size override
            callback: Optional callback override
            sheet: Optional sheet for rendering
            mode: Optional mode for rendering
        """
        # Use the form's callback if none provided
        if callback is None:
            callback = form.execute
        # Initialize with temporary size, will be updated during layout
        temp_size = size or (400, 300)
        super().__init__(origin or (0, 0), temp_size, callback, sheet, mode)
        # Store the form object and rendering properties
        self._form = form
        self._font = font
        self._field_padding = field_padding
        self._spacing = spacing
        # Create renderable field objects
        self._renderable_fields: List[RenderableFormField] = []
        self._submit_button: Optional[RenderableButton] = None
        # Create the form elements
        self._create_form_elements()
        # If size was not provided, calculate it based on form elements
        if size is None:
            self._calculate_size()
    def _create_form_elements(self):
        """Create renderable field objects for each form field"""
        # Create field renderers
        for field_name, field in self._form._fields.items():
            renderable_field = RenderableFormField(field, self._font, self._field_padding)
            self._renderable_fields.append(renderable_field)
        # Create submit button
        submit_button = Button("Submit", self._form.execute)
        self._submit_button = RenderableButton(submit_button, self._font)
    def _calculate_size(self):
        """Calculate the size of the form based on its elements"""
        # Calculate the width based on the widest element
        max_width = max(
            [field.size[0] for field in self._renderable_fields] + 
            [self._submit_button.size[0] if self._submit_button else 0]
        ) + 20  # Add some padding
        # Calculate the height based on all elements and spacing
        total_height = sum(field.size[1] for field in self._renderable_fields)
        total_height += self._spacing * (len(self._renderable_fields) - 1 if self._renderable_fields else 0)
        # Add space for the submit button
        if self._submit_button:
            total_height += self._spacing + self._submit_button.size[1]
        # Add some padding
        total_height += 20
        self._size = np.array([max_width, total_height])
    def layout(self):
        """Layout the form elements"""
        y_pos = 10  # Start with some padding
        # Position each field
        for field in self._renderable_fields:
            field._origin = np.array([10, y_pos])
            y_pos += field.size[1] + self._spacing
        # Position the submit button
        if self._submit_button:
            # Center the submit button horizontally
            submit_x = (self._size[0] - self._submit_button.size[0]) // 2
            self._submit_button._origin = np.array([submit_x, y_pos])
    def render(self) -> Image.Image:
        """
        Render the form.
        Returns:
            A PIL Image containing the rendered form
        """
        # Layout elements before rendering
        self.layout()
        # Create the base canvas
        canvas = super().render()
        # Render each field
        for field in self._renderable_fields:
            field_img = field.render()
            field_pos = tuple(field._origin)
            canvas.paste(field_img, field_pos, field_img)
        # Render the submit button
        if self._submit_button:
            button_img = self._submit_button.render()
            button_pos = tuple(self._submit_button._origin)
            canvas.paste(button_img, button_pos, button_img)
        return canvas
    def handle_click(self, point):
        """
        Handle a click on the form.
        Args:
            point: The coordinates of the click
        Returns:
            The result of the clicked element's callback, or None if no element was clicked
        """
        # Check if the submit button was clicked
        if (self._submit_button and 
            self._submit_button.in_object(point)):
            return self._submit_button._callback()
        # Check if any field was clicked
        for field in self._renderable_fields:
            if field.in_object(point):
                return field.handle_click(point - field._origin)
        return None
 class RenderableFormField(Box, Queriable):
    """
    A concrete implementation for rendering FormField objects.
    """
    def __init__(self, field: FormField, font: Font,
                 padding: Tuple[int, int, int, int] = (5, 10, 5, 10),
                 origin=None, size=None, callback=None, sheet=None, mode=None):
        """
        Initialize a renderable form field.
        Args:
            field: The abstract FormField object to render
            font: The font to use for field text
            padding: Padding for the field
            origin: Optional origin coordinates
            size: Optional size override
            callback: Optional callback override
            sheet: Optional sheet for rendering
            mode: Optional mode for rendering
        """
        # Create the label text object
        self._label_text = Text(field.label, font)
        # Calculate size if not provided
        if size is None:
            label_width, label_height = self._label_text.size
            # Default field width based on type
            if field.field_type in (FormFieldType.TEXTAREA, FormFieldType.SELECT):
                field_width = 200
            else:
                field_width = 150
            # Default field height based on type
            if field.field_type == FormFieldType.TEXTAREA:
                field_height = 80
            elif field.field_type == FormFieldType.SELECT:
                field_height = 24
            else:
                field_height = 24
            # Calculate total width and height
            total_width = max(label_width, field_width) + padding[1] + padding[3]
            total_height = label_height + field_height + padding[0] + padding[2] + 5  # 5px between label and field
            size = (total_width, total_height)
        # Initialize the box
        super().__init__(origin or (0, 0), size, callback, sheet, mode)
        # Store the field object and rendering properties
        self._field = field
        self._font = font
        self._padding = padding
        self._focused = False
    def render(self) -> Image.Image:
        """
        Render the form field.
        Returns:
            A PIL Image containing the rendered form field
        """
        # Create the base canvas
        canvas = super().render()
        draw = ImageDraw.Draw(canvas)
        # Position the label
        label_x = self._padding[3]
        label_y = self._padding[0]
        # Render the label
        label_img = self._label_text.render()
        canvas.paste(label_img, (label_x, label_y), label_img)
        # Calculate field position
        field_x = self._padding[3]
        field_y = self._padding[0] + label_img.height + 5  # 5px between label and field
        # Calculate field dimensions
        field_width = self._size[0] - self._padding[1] - self._padding[3]
        if self._field.field_type == FormFieldType.TEXTAREA:
            field_height = 80
        else:
            field_height = 24
        # Draw field background
        bg_color = (255, 255, 255)
        border_color = (200, 200, 200)
        if self._focused:
            border_color = (100, 150, 200)
        # Draw field with border
        draw.rectangle(
            [(field_x, field_y), (field_x + field_width, field_y + field_height)],
            fill=bg_color, outline=border_color, width=1
        )
        # Render field value if any
        if self._field.value is not None:
            value_text = str(self._field.value)
            value_font = self._font
            # For password fields, mask the text
            if self._field.field_type == FormFieldType.PASSWORD:
                value_text = "•" * len(value_text)
            # Create text object for value
            value_text_obj = Text(value_text, value_font)
            value_img = value_text_obj.render()
            # Position value text within field (with some padding)
            value_x = field_x + 5
            value_y = field_y + (field_height - value_img.height) // 2
            # Paste value text
            canvas.paste(value_img, (value_x, value_y), value_img)
        return canvas
    def set_focused(self, focused: bool):
        """Set whether the field is focused"""
        self._focused = focused
    def handle_click(self, point):
        """
        Handle a click on the field.
        Args:
            point: The coordinates of the click relative to the field
        Returns:
            True if the field was clicked, False otherwise
        """
        # Calculate field position
        field_x = self._padding[3]
        field_y = self._padding[0] + self._label_text.size[1] + 5
        # Calculate field dimensions
        field_width = self._size[0] - self._padding[1] - self._padding[3]
        if self._field.field_type == FormFieldType.TEXTAREA:
            field_height = 80
        else:
            field_height = 24
        # Check if click is within field
        if (field_x <= point[0] <= field_x + field_width and
            field_y <= point[1] <= field_y + field_height):
            self.set_focused(True)
            return True
        return False
    def in_object(self, point):
        """Check if a point is within this field"""
        point_array = np.array(point)
        relative_point = point_array - self._origin
        # Check if the point is within the field boundaries
        return (0 <= relative_point[0] < self._size[0] and 
                0 <= relative_point[1] < self._size[1])
--- a/pyWebLayout/concrete/image.py
+++ b/pyWebLayout/concrete/image.py
@ -0,0 +1,233 @@
 import os
 from typing import Optional, Tuple, Union, Dict, Any
 import numpy as np
 from PIL import Image as PILImage, ImageDraw, ImageFont
 from pyWebLayout.base import Renderable, Queriable
 from pyWebLayout.abstract.block import Image as AbstractImage
 from .box import Box
 from pyWebLayout.layout import Alignment
 class RenderableImage(Box, Queriable):
    """
    A concrete implementation for rendering Image objects.
    """
    def __init__(self, image: AbstractImage, 
                 max_width: Optional[int] = None, max_height: Optional[int] = None,
                 origin=None, size=None, callback=None, sheet=None, mode=None,
                 halign=Alignment.CENTER, valign=Alignment.CENTER):
        """
        Initialize a renderable image.
        Args:
            image: The abstract Image object to render
            max_width: Maximum width constraint for the image
            max_height: Maximum height constraint for the image
            origin: Optional origin coordinates
            size: Optional size override
            callback: Optional callback function
            sheet: Optional sheet for rendering
            mode: Optional image mode
            halign: Horizontal alignment
            valign: Vertical alignment
        """
        self._abstract_image = image
        self._pil_image = None
        self._error_message = None
        # Try to load the image
        self._load_image()
        # Calculate the size if not provided
        if size is None:
            size = image.calculate_scaled_dimensions(max_width, max_height)
        # Initialize the box
        super().__init__(origin or (0, 0), size, callback, sheet, mode, halign, valign)
    def _load_image(self):
        """Load the image from the source path"""
        try:
            source = self._abstract_image.source
            # Handle different types of sources
            if os.path.isfile(source):
                # Local file
                self._pil_image = PILImage.open(source)
                self._abstract_image._loaded_image = self._pil_image
            elif source.startswith(('http://', 'https://')):
                # URL - requires requests library
                try:
                    import requests
                    from io import BytesIO
                    response = requests.get(source, stream=True)
                    if response.status_code == 200:
                        self._pil_image = PILImage.open(BytesIO(response.content))
                        self._abstract_image._loaded_image = self._pil_image
                    else:
                        self._error_message = f"Failed to load image: HTTP status {response.status_code}"
                except ImportError:
                    self._error_message = "Requests library not available for URL loading"
            else:
                self._error_message = f"Unable to load image from source: {source}"
        except Exception as e:
            self._error_message = f"Error loading image: {str(e)}"
            self._abstract_image._error = self._error_message
    def render(self) -> PILImage.Image:
        """
        Render the image.
        Returns:
            A PIL Image containing the rendered image
        """
        # Create a base canvas
        canvas = super().render()
        if self._pil_image:
            # Resize the image to fit the box while maintaining aspect ratio
            resized_image = self._resize_image()
            # Calculate position based on alignment
            img_width, img_height = resized_image.size
            box_width, box_height = self._size
            # Horizontal alignment
            if self._halign == Alignment.LEFT:
                x_offset = 0
            elif self._halign == Alignment.RIGHT:
                x_offset = box_width - img_width
            else:  # CENTER is default
                x_offset = (box_width - img_width) // 2
            # Vertical alignment
            if self._valign == Alignment.TOP:
                y_offset = 0
            elif self._valign == Alignment.BOTTOM:
                y_offset = box_height - img_height
            else:  # CENTER is default
                y_offset = (box_height - img_height) // 2
            # Paste the image onto the canvas
            if resized_image.mode == 'RGBA' and canvas.mode == 'RGBA':
                canvas.paste(resized_image, (x_offset, y_offset), resized_image)
            else:
                canvas.paste(resized_image, (x_offset, y_offset))
        else:
            # Draw error placeholder
            self._draw_error_placeholder(canvas)
        return canvas
    def _resize_image(self) -> PILImage.Image:
        """
        Resize the image to fit within the box while maintaining aspect ratio.
        Returns:
            A resized PIL Image
        """
        if not self._pil_image:
            return PILImage.new('RGBA', self._size, (200, 200, 200, 100))
        # Get the target dimensions
        target_width, target_height = self._size
        # Get the original dimensions
        orig_width, orig_height = self._pil_image.size
        # Calculate the scaling factor to maintain aspect ratio
        width_ratio = target_width / orig_width
        height_ratio = target_height / orig_height
        # Use the smaller ratio to ensure the image fits within the box
        ratio = min(width_ratio, height_ratio)
        # Calculate new dimensions
        new_width = int(orig_width * ratio)
        new_height = int(orig_height * ratio)
        # Resize the image
        if self._pil_image.mode == 'RGBA':
            resized = self._pil_image.resize((new_width, new_height), PILImage.LANCZOS)
        else:
            # Convert to RGBA if needed
            resized = self._pil_image.convert('RGBA').resize((new_width, new_height), PILImage.LANCZOS)
        return resized
    def _draw_error_placeholder(self, canvas: PILImage.Image):
        """
        Draw a placeholder for when the image can't be loaded.
        Args:
            canvas: The canvas to draw on
        """
        draw = ImageDraw.Draw(canvas)
        # Draw a gray box with a border
        draw.rectangle([(0, 0), self._size], fill=(240, 240, 240), outline=(180, 180, 180), width=2)
        # Draw an X across the box
        draw.line([(0, 0), self._size], fill=(180, 180, 180), width=2)
        draw.line([(0, self._size[1]), (self._size[0], 0)], fill=(180, 180, 180), width=2)
        # Add error text if available
        if self._error_message:
            try:
                # Try to use a basic font
                font = ImageFont.load_default()
                # Draw the error message, wrapped to fit
                error_text = "Error: " + self._error_message
                # Simple text wrapping - split by words and add lines
                words = error_text.split()
                lines = []
                current_line = ""
                for word in words:
                    test_line = current_line + " " + word if current_line else word
                    text_bbox = draw.textbbox((0, 0), test_line, font=font)
                    text_width = text_bbox[2] - text_bbox[0]
                    if text_width <= self._size[0] - 20:  # 10px padding on each side
                        current_line = test_line
                    else:
                        lines.append(current_line)
                        current_line = word
                if current_line:
                    lines.append(current_line)
                # Draw each line
                y_pos = 10
                for line in lines:
                    text_bbox = draw.textbbox((0, 0), line, font=font)
                    text_width = text_bbox[2] - text_bbox[0]
                    text_height = text_bbox[3] - text_bbox[1]
                    # Center the text horizontally
                    x_pos = (self._size[0] - text_width) // 2
                    # Draw the text
                    draw.text((x_pos, y_pos), line, fill=(80, 80, 80), font=font)
                    # Move to the next line
                    y_pos += text_height + 2
            except Exception:
                # If text rendering fails, just draw a generic error indicator
                pass
    def in_object(self, point):
        """Check if a point is within this image"""
        point_array = np.array(point)
        relative_point = point_array - self._origin
        # Check if the point is within the image boundaries
        return (0 <= relative_point[0] < self._size[0] and 
                0 <= relative_point[1] < self._size[1])
--- a/pyWebLayout/concrete/page.py
+++ b/pyWebLayout/concrete/page.py
@ -0,0 +1,175 @@
 from typing import List, Tuple, Optional, Dict, Any
 import numpy as np
 from PIL import Image
 from pyWebLayout.base import Renderable, Layoutable
 from .box import Box
 from pyWebLayout.layout import Alignment
 class Container(Box, Layoutable):
    """
    A container that can hold multiple renderable objects and lay them out.
    """
    def __init__(self, origin, size, direction='vertical', spacing=5, 
                 callback=None, sheet=None, mode=None, 
                 halign=Alignment.CENTER, valign=Alignment.CENTER,
                 padding: Tuple[int, int, int, int] = (10, 10, 10, 10)):
        """
        Initialize a container.
        Args:
            origin: Top-left corner coordinates
            size: Width and height of the container
            direction: Layout direction ('vertical' or 'horizontal')
            spacing: Space between elements
            callback: Optional callback function
            sheet: Optional image sheet
            mode: Optional image mode
            halign: Horizontal alignment
            valign: Vertical alignment
            padding: Padding as (top, right, bottom, left)
        """
        super().__init__(origin, size, callback, sheet, mode, halign, valign)
        self._children: List[Renderable] = []
        self._direction = direction
        self._spacing = spacing
        self._padding = padding
    def add_child(self, child: Renderable):
        """Add a child element to this container"""
        self._children.append(child)
        return self
    def layout(self):
        """Layout the children according to the container's direction and spacing"""
        if not self._children:
            return
        # Get available space after padding
        padding_top, padding_right, padding_bottom, padding_left = self._padding
        available_width = self._size[0] - padding_left - padding_right
        available_height = self._size[1] - padding_top - padding_bottom
        # Calculate total content size
        if self._direction == 'vertical':
            total_height = sum(getattr(child, '_size', [0, 0])[1] for child in self._children)
            total_height += self._spacing * (len(self._children) - 1)
            # Position each child
            current_y = padding_top
            for child in self._children:
                if hasattr(child, '_size') and hasattr(child, '_origin'):
                    child_width, child_height = child._size
                    # Calculate horizontal position based on alignment
                    if self._halign == Alignment.LEFT:
                        x_pos = padding_left
                    elif self._halign == Alignment.RIGHT:
                        x_pos = padding_left + available_width - child_width
                    else:  # CENTER
                        x_pos = padding_left + (available_width - child_width) // 2
                    # Set child position
                    child._origin = np.array([x_pos, current_y])
                    # Move down for next child
                    current_y += child_height + self._spacing
                    # Layout the child if it's layoutable
                    if isinstance(child, Layoutable):
                        child.layout()
        else:  # horizontal
            total_width = sum(getattr(child, '_size', [0, 0])[0] for child in self._children)
            total_width += self._spacing * (len(self._children) - 1)
            # Position each child
            current_x = padding_left
            for child in self._children:
                if hasattr(child, '_size') and hasattr(child, '_origin'):
                    child_width, child_height = child._size
                    # Calculate vertical position based on alignment
                    if self._valign == Alignment.TOP:
                        y_pos = padding_top
                    elif self._valign == Alignment.BOTTOM:
                        y_pos = padding_top + available_height - child_height
                    else:  # CENTER
                        y_pos = padding_top + (available_height - child_height) // 2
                    # Set child position
                    child._origin = np.array([current_x, y_pos])
                    # Move right for next child
                    current_x += child_width + self._spacing
                    # Layout the child if it's layoutable
                    if isinstance(child, Layoutable):
                        child.layout()
    def render(self) -> Image:
        """Render the container with all its children"""
        # Make sure children are laid out
        self.layout()
        # Create base canvas
        canvas = super().render()
        # Render each child and paste it onto the canvas
        for child in self._children:
            if hasattr(child, '_origin'):
                child_img = child.render()
                # Calculate child position relative to container
                rel_pos = tuple(child._origin - self._origin)
                # Paste the child onto the canvas
                canvas.paste(child_img, rel_pos, child_img)
        return canvas
 class Page(Container):
    """
    Top-level container representing an HTML page.
    """
    def __init__(self, size=(800, 600), background_color=(255, 255, 255), mode='RGBA'):
        """
        Initialize a page.
        Args:
            size: Width and height of the page
            background_color: Background color as RGB tuple
            mode: Image mode
        """
        super().__init__(
            origin=(0, 0),
            size=size,
            direction='vertical',
            spacing=10,
            mode=mode,
            halign=Alignment.CENTER,
            valign=Alignment.TOP
        )
        self._background_color = background_color
    def render(self) -> Image:
        """Render the page with all its content"""
        # Make sure children are laid out
        self.layout()
        # Create base canvas with background color
        canvas = Image.new(self._mode, tuple(self._size), self._background_color)
        # Render each child and paste it onto the canvas
        for child in self._children:
            if hasattr(child, '_origin'):
                child_img = child.render()
                # Calculate child position relative to page
                rel_pos = tuple(child._origin)
                # Paste the child onto the canvas with alpha channel if available
                if 'A' in self._mode and child_img.mode == 'RGBA':
                    canvas.paste(child_img, rel_pos, child_img)
                else:
                    canvas.paste(child_img, rel_pos)
        return canvas
--- a/pyWebLayout/concrete/text.py
+++ b/pyWebLayout/concrete/text.py
@ -0,0 +1,455 @@
 from __future__ import annotations
 from pyWebLayout.base import Renderable, Queriable
 from .box import Box
 from pyWebLayout.layout import Alignment
 from pyWebLayout.style import Font, FontStyle, FontWeight, TextDecoration
 from pyWebLayout.abstract.inline import Word
 from PIL import Image, ImageDraw, ImageFont
 from typing import Tuple, Union, List, Optional
 import numpy as np
 class Text(Renderable, Queriable):
    """
    Concrete implementation for rendering text.
    This class handles the visual representation of text fragments.
    """
    def __init__(self, text: str, style: Font):
        """
        Initialize a Text object.
        Args:
            text: The text content to render
            style: The font style to use for rendering
        """
        super().__init__()
        self._text = text
        self._style = style
        self._line = None
        self._previous = None
        self._next = None
        self._origin = np.array([0, 0])
        # Calculate dimensions
        self._calculate_dimensions()
    def _calculate_dimensions(self):
        """Calculate the width and height of the text based on the font metrics"""
        # Get the size using PIL's text size functionality
        font = self._style.font
        # GetTextSize is deprecated, using textbbox for better accuracy
        # The bounding box is (left, top, right, bottom)
        try:
            bbox = font.getbbox(self._text)
            self._width = bbox[2] - bbox[0]
            self._height = bbox[3] - bbox[1]
            self._size = (self._width, self._height)
        except AttributeError:
            # Fallback for older PIL versions
            self._width, self._height = font.getsize(self._text)
            self._size = (self._width, self._height)
    @property
    def text(self) -> str:
        """Get the text content"""
        return self._text
    @property
    def style(self) -> Font:
        """Get the text style"""
        return self._style
    @property
    def line(self) -> Optional[Line]:
        """Get the line containing this text"""
        return self._line
    @line.setter
    def line(self, line):
        """Set the line containing this text"""
        self._line = line
    @property
    def width(self) -> int:
        """Get the width of the text"""
        return self._width
    @property
    def height(self) -> int:
        """Get the height of the text"""
        return self._height
    @property
    def size(self) -> Tuple[int, int]:
        """Get the size (width, height) of the text"""
        return self._size
    def set_origin(self, x: int, y: int):
        """Set the origin (top-left corner) of this text element"""
        self._origin = np.array([x, y])
    def add_to_line(self, line):
        """Add this text to a line"""
        self._line = line
    def _apply_decoration(self, draw: ImageDraw.Draw):
        """Apply text decoration (underline or strikethrough)"""
        if self._style.decoration == TextDecoration.UNDERLINE:
            # Draw underline at about 90% of the height
            y_position = int(self._height * 0.9)
            draw.line([(0, y_position), (self._width, y_position)], 
                      fill=self._style.colour, width=max(1, int(self._style.font_size / 15)))
        elif self._style.decoration == TextDecoration.STRIKETHROUGH:
            # Draw strikethrough at about 50% of the height
            y_position = int(self._height * 0.5)
            draw.line([(0, y_position), (self._width, y_position)], 
                      fill=self._style.colour, width=max(1, int(self._style.font_size / 15)))
    def render(self) -> Image.Image:
        """
        Render the text to an image.
        Returns:
            A PIL Image containing the rendered text
        """
        # Create a transparent image with the appropriate size
        canvas = Image.new('RGBA', self._size, (0, 0, 0, 0))
        draw = ImageDraw.Draw(canvas)
        # Draw the text background if specified
        if self._style.background and self._style.background[3] > 0:  # If alpha > 0
            draw.rectangle([(0, 0), self._size], fill=self._style.background)
        # Draw the text
        draw.text((0, 0), self._text, font=self._style.font, fill=self._style.colour)
        # Apply any text decorations
        self._apply_decoration(draw)
        return canvas
    def get_size(self) -> Tuple[int, int]:
        """Get the size (width, height) of the text"""
        return self._size
    def in_object(self, point):
        """Check if a point is within this text object"""
        point_array = np.array(point)
        relative_point = point_array - self._origin
        # Check if the point is within the text boundaries
        return (0 <= relative_point[0] < self._width and 
                0 <= relative_point[1] < self._height)
 class RenderableWord(Renderable, Queriable):
    """
    A concrete implementation for rendering Word objects.
    This bridges between the abstract Word class and rendering capabilities.
    """
    def __init__(self, word: Word):
        """
        Initialize a new renderable word.
        Args:
            word: The abstract Word object to render
        """
        super().__init__()
        self._word = word
        self._text_parts: List[Text] = []
        self._origin = np.array([0, 0])
        self._size = (0, 0)
        # Initialize with the full word as a single text part
        self._initialize_text_parts()
    def _initialize_text_parts(self):
        """Initialize the text parts based on the word's current state"""
        # Clear existing parts
        self._text_parts.clear()
        if self._word.hyphenated_parts:
            # If the word is hyphenated, create a Text object for each part
            for part in self._word.hyphenated_parts:
                self._text_parts.append(Text(part, self._word.style))
        else:
            # Otherwise, create a single Text object for the whole word
            self._text_parts.append(Text(self._word.text, self._word.style))
        # Calculate total size
        self._recalculate_size()
    def _recalculate_size(self):
        """Recalculate the size of the word based on its text parts"""
        if not self._text_parts:
            self._size = (0, 0)
            return
        # For a non-hyphenated word, use the size of the single text part
        if len(self._text_parts) == 1:
            self._size = self._text_parts[0].size
            return
        # For a hyphenated word that's not yet split across lines,
        # calculate the total width and maximum height
        total_width = sum(part.width for part in self._text_parts)
        max_height = max(part.height for part in self._text_parts)
        self._size = (total_width, max_height)
    @property
    def word(self) -> Word:
        """Get the abstract Word object"""
        return self._word
    @property
    def text_parts(self) -> List[Text]:
        """Get the list of Text objects that make up this word"""
        return self._text_parts
    def update_from_word(self):
        """Update the text parts based on changes to the word"""
        self._initialize_text_parts()
    def get_part_size(self, index: int) -> Tuple[int, int]:
        """
        Get the size of a specific text part.
        Args:
            index: The index of the part to query.
        Returns:
            A tuple (width, height) of the part.
        Raises:
            IndexError: If the index is out of range.
        """
        if index >= len(self._text_parts):
            raise IndexError(f"Part index {index} out of range")
        return self._text_parts[index].size
    @property
    def width(self) -> int:
        """Get the total width of the word"""
        return self._size[0]
    @property
    def height(self) -> int:
        """Get the height of the word"""
        return self._size[1]
    def set_origin(self, x: int, y: int):
        """Set the origin (top-left corner) of this word"""
        self._origin = np.array([x, y])
        # Update positions of text parts
        x_offset = 0
        for part in self._text_parts:
            part.set_origin(x + x_offset, y)
            x_offset += part.width
    def render(self) -> Image.Image:
        """
        Render the word to an image.
        Returns:
            A PIL Image containing the rendered word
        """
        # For a non-hyphenated word or if there's only one part, render just that part
        if len(self._text_parts) == 1:
            return self._text_parts[0].render()
        # For a hyphenated word, create a canvas and paste all parts
        canvas = Image.new('RGBA', self._size, (0, 0, 0, 0))
        x_offset = 0
        for part in self._text_parts:
            part_img = part.render()
            canvas.paste(part_img, (x_offset, 0), part_img)
            x_offset += part.width
        return canvas
    def in_object(self, point):
        """Check if a point is within this word"""
        point_array = np.array(point)
        # First check if the point is within the word's boundaries
        relative_point = point_array - self._origin
        if not (0 <= relative_point[0] < self._size[0] and 
                0 <= relative_point[1] < self._size[1]):
            return False
        # Then check which text part contains the point
        x_offset = 0
        for part in self._text_parts:
            part_width = part.width
            if x_offset <= relative_point[0] < x_offset + part_width:
                # The point is within this part's horizontal bounds
                # Adjust the point to be relative to the part
                part_relative_point = relative_point.copy()
                part_relative_point[0] -= x_offset
                return part.in_object(self._origin + part_relative_point)
            x_offset += part_width
        return False
 class Line(Box):
    """
    A line of text consisting of words with consistent spacing.
    """
    def __init__(self, spacing: Tuple[int, int], origin, size, font: Optional[Font] = None, 
                 callback=None, sheet=None, mode=None, halign=Alignment.CENTER, 
                 valign=Alignment.CENTER, previous = None):
        """
        Initialize a new line.
        Args:
            spacing: A tuple of (min_spacing, max_spacing) between words
            origin: The top-left position of the line
            size: The width and height of the line
            font: The default font to use for text in this line
            callback: Optional callback function
            sheet: Optional image sheet
            mode: Optional image mode
            halign: Horizontal alignment of text within the line
            valign: Vertical alignment of text within the line
            previous: Reference to the previous line
        """
        super().__init__(origin, size, callback, sheet, mode, halign, valign)
        self._renderable_words: List[RenderableWord] = []
        self._spacing = spacing  # (min_spacing, max_spacing)
        self._font = font if font else Font()  # Use default font if none provided
        self._current_width = 0  # Track the current width used
        self._previous = previous
        self._next = None
    @property
    def renderable_words(self) -> List[RenderableWord]:
        """Get the list of renderable words in this line"""
        return self._renderable_words
    def set_next(self, line: Line):
        """Set the next line in sequence"""
        self._next = line
    def add_word(self, text: str, font: Optional[Font] = None) -> Union[None, str]:
        """
        Add a word to this line.
        Args:
            text: The text content of the word
            font: The font to use for this word, or None to use the line's default font
        Returns:
            None if the word fits, or the remaining text if it doesn't fit
        """
        if not font:
            font = self._font
        # Create an abstract word
        abstract_word = Word(text, font)
        # Create a renderable word
        renderable_word = RenderableWord(abstract_word)
        # Check if the word fits in the current line with minimum spacing
        min_spacing, max_spacing = self._spacing
        word_width = renderable_word.width
        # If this is the first word, no spacing is needed
        spacing_needed = min_spacing if self._renderable_words else 0
        # Check if word fits in the line
        if self._current_width + spacing_needed + word_width <= self._size[0]:
            self._renderable_words.append(renderable_word)
            self._current_width += spacing_needed + word_width
            return None
        else:
            # Try to hyphenate the word if it doesn't fit
            if abstract_word.hyphenate():
                # Update the renderable word to reflect hyphenation
                renderable_word.update_from_word()
                # Check if first part with hyphen fits
                first_part_size = renderable_word.get_part_size(0)
                if self._current_width + spacing_needed + first_part_size[0] <= self._size[0]:
                    # Create a word with just the first part
                    first_part_text = abstract_word.get_hyphenated_part(0)
                    first_word = Word(first_part_text, font)
                    renderable_first_word = RenderableWord(first_word)
                    self._renderable_words.append(renderable_first_word)
                    self._current_width += spacing_needed + first_part_size[0]
                    # Return the remaining parts as a single string
                    remaining_parts = [abstract_word.get_hyphenated_part(i) 
                                      for i in range(1, abstract_word.get_hyphenated_part_count())]
                    return ''.join(remaining_parts)
            # If we can't hyphenate or first part doesn't fit, return the entire word
            return text
    def render(self) -> Image.Image:
        """
        Render the line with all its words.
        Returns:
            A PIL Image containing the rendered line
        """
        # Create an image for the line
        canvas = super().render()
        # If there are no words, return the empty canvas
        if not self._renderable_words:
            return canvas
        # Calculate total width of words
        total_word_width = sum(word.width for word in self._renderable_words)
        # Calculate spacing based on alignment and available space
        available_space = self._size[0] - total_word_width
        num_spaces = len(self._renderable_words) - 1
        if num_spaces > 0:
            if self._halign == Alignment.JUSTIFY:
                # For justified text, distribute space evenly between words
                spacing = available_space // num_spaces
            else:
                # Use minimum spacing for other alignments
                spacing = self._spacing[0]
        else:
            spacing = 0
        # Calculate starting x position based on alignment
        if self._halign == Alignment.LEFT:
            x_pos = 0
        elif self._halign == Alignment.RIGHT:
            x_pos = self._size[0] - (total_word_width + spacing * num_spaces)
        else:  # CENTER
            x_pos = (self._size[0] - (total_word_width + spacing * num_spaces)) // 2
        # Vertical alignment - center words vertically in the line
        y_pos = (self._size[1] - max(word.height for word in self._renderable_words)) // 2
        # Render and paste each word onto the line
        for word in self._renderable_words:
            # Set the word's position
            word.set_origin(x_pos, y_pos)
            # Render the word
            word_img = word.render()
            # Paste the word onto the canvas
            canvas.paste(word_img, (x_pos, y_pos), word_img)
            # Move to the next word position
            x_pos += word.width + spacing
        return canvas
--- a/pyWebLayout/core/init.py
+++ b/pyWebLayout/core/init.py
@ -0,0 +1,10 @@
 """
 Core functionality for the pyWebLayout library.
 This package contains the core abstractions and base classes that form the foundation
 of the pyWebLayout rendering system.
 """
 from pyWebLayout.core.base import (
    Renderable, Interactable, Layoutable, Queriable
 )
--- a/pyWebLayout/core/base.py
+++ b/pyWebLayout/core/base.py
@ -0,0 +1,67 @@
 from abc import ABC
 import numpy as np
 from pyWebLayout.style import Alignment
 class Renderable(ABC):
    """
    Abstract base class for any object that can be rendered to an image.
    All renderable objects must implement the render method.
    """
    def render(self):
        """
        Render the object to an image.
        Returns:
            PIL.Image: The rendered image
        """
        pass
 class Interactable(ABC):
    """
    Abstract base class for any object that can be interacted with.
    Interactable objects must have a callback that is executed when interacted with.
    """
    def __init__(self, callback=None):
        """
        Initialize an interactable object.
        Args:
            callback: The function to call when this object is interacted with
        """
        self._callback = callback
    def interact(self, point: np.generic):
        """
        Handle interaction at the given point.
        Args:
            point: The coordinates of the interaction
        Returns:
            The result of calling the callback function with the point
        """
        if self._callback is None:
            return None
        return self._callback(point)
 class Layoutable(ABC):
    """
    Abstract base class for any object that can be laid out.
    Layoutable objects must implement the layout method which arranges their contents.
    """
    def layout(self):
        """
        Layout the object's contents.
        This method should be called before rendering to properly arrange the object's contents.
        """
        pass
 class Queriable(ABC):
    def in_object(self, point:np.generic):
        """
        check if a point is in the object
        """
        pass
--- a/pyWebLayout/examples/epub_viewer.py
+++ b/pyWebLayout/examples/epub_viewer.py
@ -0,0 +1,100 @@
 #!/usr/bin/env python3
 """
 Example EPUB viewer using pyWebLayout.
 This example demonstrates how to use pyWebLayout to load an EPUB file,
 paginate it, and render pages as images.
 """
 import os
 import sys
 import argparse
 from pathlib import Path
 from PIL import Image
 # Add the parent directory to the path to import pyWebLayout
 sys.path.append(str(Path(__file__).parent.parent.parent))
 from pyWebLayout import (
    Document, Book, read_epub, 
    DocumentPaginator, Page
 )
 def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(description='EPUB viewer example')
    parser.add_argument('epub_file', help='Path to EPUB file')
    parser.add_argument('--output-dir', '-o', default='output', help='Output directory for rendered pages')
    parser.add_argument('--width', '-w', type=int, default=800, help='Page width')
    parser.add_argument('--height', '-h', type=int, default=1000, help='Page height')
    parser.add_argument('--margin', '-m', type=int, default=50, help='Page margin')
    parser.add_argument('--max-pages', '-p', type=int, default=10, help='Maximum number of pages to render')
    args = parser.parse_args()
    # Create output directory
    os.makedirs(args.output_dir, exist_ok=True)
    # Read EPUB file
    print(f"Reading EPUB file: {args.epub_file}")
    book = read_epub(args.epub_file)
    # Display book metadata
    print(f"Title: {book.get_title()}")
    print(f"Author: {book.get_metadata('AUTHOR')}")
    print(f"Chapters: {len(book.chapters)}")
    # Create a paginator
    page_size = (args.width, args.height)
    margins = (args.margin, args.margin, args.margin, args.margin)
    paginator = DocumentPaginator(
        document=book,
        page_size=page_size,
        margins=margins
    )
    # Paginate and render pages
    print("Paginating and rendering pages...")
    # Option 1: Render all pages at once
    pages = paginator.paginate(max_pages=args.max_pages)
    for i, page in enumerate(pages):
        # Render the page
        image = page.render()
        # Save the image
        output_path = os.path.join(args.output_dir, f"page_{i+1:03d}.png")
        image.save(output_path)
        print(f"Saved page {i+1} to {output_path}")
    # Option 2: Render pages one by one with state saving
    """
    # Clear paginator state
    paginator.state = DocumentPaginationState()
    for i in range(args.max_pages):
        # Get next page
        page = paginator.paginate_next()
        if page is None:
            print(f"No more pages after page {i}")
            break
        # Render the page
        image = page.render()
        # Save the image
        output_path = os.path.join(args.output_dir, f"page_{i+1:03d}.png")
        image.save(output_path)
        print(f"Saved page {i+1} to {output_path}")
        # Save pagination state (could be saved to a file for later resumption)
        state_dict = paginator.get_state()
        # Progress information
        progress = paginator.get_progress() * 100
        print(f"Progress: {progress:.1f}%")
    """
 if __name__ == "__main__":
    main()
--- a/pyWebLayout/html_parser.py
+++ b/pyWebLayout/html_parser.py
@ -0,0 +1,918 @@
 import re
 from html.parser import HTMLParser as BaseHTMLParser
 from typing import Dict, List, Optional, Tuple, Union, Any, Set, Callable
 import urllib.parse
 from PIL import Image
 from .style import Font, FontStyle, FontWeight, TextDecoration
 from .abstract.document import Document, MetadataType, Book, Chapter
 from .abstract.block import (
    Block, BlockType, Parapgraph, Heading, HeadingLevel, Quote, CodeBlock,
    HList, ListStyle, ListItem, Table, TableRow, TableCell, HorizontalRule, LineBreak
 )
 from .abstract.inline import Word, FormattedSpan
 from .abstract.functional import Link, LinkType, Button, Form, FormField, FormFieldType
 from .concrete.page import Page
 from pyWebLayout.layout import Alignment
 class HTMLParser(BaseHTMLParser):
    """
    HTML parser that builds an abstract document representation from HTML content.
    This parser converts HTML to abstract document classes without any rendering specifics.
    """
    def __init__(self, base_url: Optional[str] = None):
        """
        Initialize the HTML parser.
        Args:
            base_url: Base URL for resolving relative links
        """
        super().__init__()
        # Document structure
        self.document = Document()
        # State variables
        self._current_block = None
        self._block_stack: List[Block] = []
        # Text handling
        self._current_paragraph = None
        self._current_span = None
        self._text_buffer = ""
        # Style state
        self._style_stack: List[Dict[str, Any]] = []
        self._current_style = {
            'font_size': 12,
            'font_weight': FontWeight.NORMAL,
            'font_style': FontStyle.NORMAL,
            'decoration': TextDecoration.NONE,
            'color': (0, 0, 0),
            'background': None,
            'language': 'en_US'
        }
        # Tag state
        self._list_stack: List[HList] = []
        self._table_stack: List[Table] = []
        self._current_table_row = None
        # Link handling
        self._base_url = base_url
        self._in_link = False
        self._current_link = None
        # Special state flags
        self._in_head = False
        self._in_title = False
        self._in_script = False
        self._in_style = False
        self._script_buffer = ""
        self._style_buffer = ""
        self._title_buffer = ""
    def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
        """
        Handle the start of an HTML tag.
        Args:
            tag: The tag name
            attrs: List of attribute tuples (name, value)
        """
        tag = tag.lower()
        attrs_dict = dict(attrs)
        # Special handling for elements where we collect content
        if self._in_script and tag != 'script':
            return
        if self._in_style and tag != 'style':
            return
        # Parse style attribute if present
        style = {}
        if 'style' in attrs_dict:
            style = self._parse_style(attrs_dict['style'])
        # Apply tag-specific styling based on the tag
        tag_style = self._get_tag_style(tag)
        for key, value in tag_style.items():
            if key not in style:
                style[key] = value
        # Push the current style and apply the new style
        self._push_style(style)
        # Handle specific tags
        if tag == 'html':
            # Set document language if specified
            if 'lang' in attrs_dict:
                self.document.set_metadata(MetadataType.LANGUAGE, attrs_dict['lang'])
        elif tag == 'head':
            self._in_head = True
        elif tag == 'title' and self._in_head:
            self._in_title = True
            self._title_buffer = ""
        elif tag == 'meta' and self._in_head:
            self._handle_meta_tag(attrs_dict)
        elif tag == 'link' and self._in_head:
            self._handle_link_tag(attrs_dict)
        elif tag == 'script':
            self._in_script = True
            self._script_buffer = ""
        elif tag == 'style':
            self._in_style = True
            self._style_buffer = ""
        elif tag == 'body':
            # Body attributes can contain style information
            pass
        elif tag == 'p':
            self._flush_text()  # Flush any pending text
            self._current_paragraph = Parapgraph()
            # Add the paragraph to the current block or document
            if self._current_block and hasattr(self._current_block, 'add_block'):
                self._current_block.add_block(self._current_paragraph)
            else:
                self.document.add_block(self._current_paragraph)
            # Push to block stack
            self._block_stack.append(self._current_paragraph)
            self._current_block = self._current_paragraph
        elif tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
            self._flush_text()  # Flush any pending text
            # Determine heading level
            level_map = {
                'h1': HeadingLevel.H1,
                'h2': HeadingLevel.H2,
                'h3': HeadingLevel.H3,
                'h4': HeadingLevel.H4,
                'h5': HeadingLevel.H5,
                'h6': HeadingLevel.H6
            }
            heading = Heading(level=level_map[tag])
            # Add the heading to the current block or document
            if self._current_block and hasattr(self._current_block, 'add_block'):
                self._current_block.add_block(heading)
            else:
                self.document.add_block(heading)
            # Push to block stack
            self._block_stack.append(heading)
            self._current_block = heading
            self._current_paragraph = heading  # Heading inherits from Paragraph
        elif tag == 'div':
            self._flush_text()  # Flush any pending text
            # For divs, we create a new paragraph as a container
            div_para = Parapgraph()
            # Add the div to the current block or document
            if self._current_block and hasattr(self._current_block, 'add_block'):
                self._current_block.add_block(div_para)
            else:
                self.document.add_block(div_para)
            # Push to block stack
            self._block_stack.append(div_para)
            self._current_block = div_para
            self._current_paragraph = div_para
        elif tag == 'blockquote':
            self._flush_text()  # Flush any pending text
            quote = Quote()
            # Add the quote to the current block or document
            if self._current_block and hasattr(self._current_block, 'add_block'):
                self._current_block.add_block(quote)
            else:
                self.document.add_block(quote)
            # Push to block stack
            self._block_stack.append(quote)
            self._current_block = quote
        elif tag == 'pre':
            self._flush_text()  # Flush any pending text
            # Pre can optionally contain a code block
            # We'll create a paragraph for now, and if we find a code tag inside,
            # we'll replace it with a code block
            pre_para = Parapgraph()
            # Add the pre to the current block or document
            if self._current_block and hasattr(self._current_block, 'add_block'):
                self._current_block.add_block(pre_para)
            else:
                self.document.add_block(pre_para)
            # Push to block stack
            self._block_stack.append(pre_para)
            self._current_block = pre_para
            self._current_paragraph = pre_para
        elif tag == 'code':
            # If we're inside a pre, replace the paragraph with a code block
            if self._block_stack and isinstance(self._block_stack[-1], Parapgraph):
                pre_para = self._block_stack.pop()
                # Get the language from class if specified (e.g., class="language-python")
                language = ""
                if 'class' in attrs_dict:
                    class_attr = attrs_dict['class']
                    if class_attr.startswith('language-'):
                        language = class_attr[9:]
                code_block = CodeBlock(language=language)
                # Replace the paragraph with the code block
                if pre_para.parent:
                    parent = pre_para.parent
                    if hasattr(parent, '_blocks'):
                        # Find the paragraph in the parent's blocks and replace it
                        for i, block in enumerate(parent._blocks):
                            if block == pre_para:
                                parent._blocks[i] = code_block
                                break
                # Push the code block to the stack
                self._block_stack.append(code_block)
                self._current_block = code_block
                self._current_paragraph = None
            else:
                # If not in a pre, just create a formatted span for code
                self._current_span = None  # Force creation of a new span with code style
        elif tag in ('ul', 'ol', 'dl'):
            self._flush_text()  # Flush any pending text
            # Determine list style
            style_map = {
                'ul': ListStyle.UNORDERED,
                'ol': ListStyle.ORDERED,
                'dl': ListStyle.DEFINITION
            }
            list_block = HList(style=style_map[tag])
            # Add the list to the current block or document
            if self._current_block and hasattr(self._current_block, 'add_block'):
                self._current_block.add_block(list_block)
            else:
                self.document.add_block(list_block)
            # Push to block stack and list stack
            self._block_stack.append(list_block)
            self._list_stack.append(list_block)
            self._current_block = list_block
            self._current_paragraph = None
        elif tag == 'li' and self._list_stack:
            self._flush_text()  # Flush any pending text
            list_item = ListItem()
            # Add to the current list
            current_list = self._list_stack[-1]
            current_list.add_item(list_item)
            # Push to block stack
            self._block_stack.append(list_item)
            self._current_block = list_item
            self._current_paragraph = None
        elif tag == 'dt' and self._list_stack and self._list_stack[-1].style == ListStyle.DEFINITION:
            self._flush_text()  # Flush any pending text
            # For definition term, we create a list item with a term
            list_item = ListItem(term="")  # Will be filled by content
            # Add to the current list
            current_list = self._list_stack[-1]
            current_list.add_item(list_item)
            # Push to block stack
            self._block_stack.append(list_item)
            self._current_block = list_item
            # Create a paragraph for the term content
            term_para = Parapgraph()
            list_item.add_block(term_para)
            self._current_paragraph = term_para
        elif tag == 'dd' and self._list_stack and self._list_stack[-1].style == ListStyle.DEFINITION:
            self._flush_text()  # Flush any pending text
            # Find the last dt item
            current_list = self._list_stack[-1]
            if current_list._items:
                list_item = current_list._items[-1]
                # Create a paragraph for the description content
                desc_para = Parapgraph()
                list_item.add_block(desc_para)
                # Update current state
                self._current_paragraph = desc_para
            else:
                # If no dt found, create a new list item
                list_item = ListItem()
                current_list.add_item(list_item)
                # Push to block stack
                self._block_stack.append(list_item)
                self._current_block = list_item
                # Create a paragraph for the description content
                desc_para = Parapgraph()
                list_item.add_block(desc_para)
                self._current_paragraph = desc_para
        elif tag == 'table':
            self._flush_text()  # Flush any pending text
            # Create a new table
            caption = None
            if 'summary' in attrs_dict:
                caption = attrs_dict['summary']
            table = Table(caption=caption)
            # Add the table to the current block or document
            if self._current_block and hasattr(self._current_block, 'add_block'):
                self._current_block.add_block(table)
            else:
                self.document.add_block(table)
            # Push to block stack and table stack
            self._block_stack.append(table)
            self._table_stack.append(table)
            self._current_block = table
            self._current_paragraph = None
        elif tag in ('thead', 'tbody', 'tfoot') and self._table_stack:
            # Just track the current section - no need to create new objects
            self._current_table_section = tag
        elif tag == 'tr' and self._table_stack:
            self._flush_text()  # Flush any pending text
            # Create a new row
            row = TableRow()
            # Add to the current table
            current_table = self._table_stack[-1]
            # Determine the section based on context
            section = "body"
            if hasattr(self, '_current_table_section'):
                if self._current_table_section == 'thead':
                    section = "header"
                elif self._current_table_section == 'tfoot':
                    section = "footer"
            current_table.add_row(row, section=section)
            # Update state
            self._current_table_row = row
            self._current_paragraph = None
        elif tag in ('td', 'th') and self._current_table_row:
            self._flush_text()  # Flush any pending text
            # Parse attributes
            colspan = 1
            rowspan = 1
            if 'colspan' in attrs_dict:
                try:
                    colspan = int(attrs_dict['colspan'])
                except (ValueError, TypeError):
                    pass
            if 'rowspan' in attrs_dict:
                try:
                    rowspan = int(attrs_dict['rowspan'])
                except (ValueError, TypeError):
                    pass
            # Create a new cell
            is_header = (tag == 'th')
            cell = TableCell(is_header=is_header, colspan=colspan, rowspan=rowspan)
            # Add to the current row
            self._current_table_row.add_cell(cell)
            # Push to block stack
            self._block_stack.append(cell)
            self._current_block = cell
            # Create a paragraph for the cell content
            cell_para = Parapgraph()
            cell.add_block(cell_para)
            self._current_paragraph = cell_para
        elif tag == 'a':
            self._flush_text()  # Flush any pending text
            # Parse attributes
            href = attrs_dict.get('href', '')
            title = attrs_dict.get('title', '')
            # Determine link type
            link_type = LinkType.INTERNAL
            if href.startswith('http://') or href.startswith('https://'):
                link_type = LinkType.EXTERNAL
            elif href.startswith('javascript:'):
                link_type = LinkType.FUNCTION
            elif href.startswith('api:'):
                link_type = LinkType.API
                href = href[4:]  # Remove api: prefix
            # If we have a base URL and the href is relative, resolve it
            if self._base_url and not href.startswith(('http://', 'https://', 'javascript:', 'api:', '#')):
                href = urllib.parse.urljoin(self._base_url, href)
            # Create a Link object
            self._current_link = Link(
                location=href,
                link_type=link_type,
                title=title if title else None
            )
            # Set the flag to indicate we're inside a link
            self._in_link = True
            # Force creation of a new span with link style
            self._current_span = None
        elif tag == 'img':
            # Handle image
            src = attrs_dict.get('src', '')
            alt = attrs_dict.get('alt', '')
            # Parse width and height if provided
            width = None
            height = None
            if 'width' in attrs_dict:
                try:
                    width = int(attrs_dict['width'])
                except (ValueError, TypeError):
                    pass
            if 'height' in attrs_dict:
                try:
                    height = int(attrs_dict['height'])
                except (ValueError, TypeError):
                    pass
            # If we have a base URL and the src is relative, resolve it
            if self._base_url and not src.startswith(('http://', 'https://')):
                src = urllib.parse.urljoin(self._base_url, src)
            # Create an Image block
            from .abstract.block import Image
            image = Image(source=src, alt_text=alt, width=width, height=height)
            # Add the image to the current block or document
            if self._current_block and hasattr(self._current_block, 'add_block'):
                self._current_block.add_block(image)
            else:
                self.document.add_block(image)
            # Also add as a resource for backwards compatibility
            resource_name = f"img_{len(self.document._resources) + 1}"
            self.document.add_resource(resource_name, {
                'type': 'image',
                'src': src,
                'alt': alt,
                'width': width,
                'height': height,
                'image_object': image
            })
        elif tag == 'br':
            # Add a line break
            if self._current_paragraph:
                line_break = LineBreak()
                if hasattr(self._current_paragraph, 'add_block'):
                    self._current_paragraph.add_block(line_break)
            # Flush any text before the break
            self._flush_text()
        elif tag == 'hr':
            self._flush_text()  # Flush any pending text
            # Create a horizontal rule
            hr = HorizontalRule()
            # Add to the current block or document
            if self._current_block and hasattr(self._current_block, 'add_block'):
                self._current_block.add_block(hr)
            else:
                self.document.add_block(hr)
        elif tag in ('b', 'strong'):
            # Bold text
            self._current_style['font_weight'] = FontWeight.BOLD
            self._current_span = None  # Force creation of a new span
        elif tag in ('i', 'em'):
            # Italic text
            self._current_style['font_style'] = FontStyle.ITALIC
            self._current_span = None  # Force creation of a new span
        elif tag == 'u':
            # Underlined text
            self._current_style['decoration'] = TextDecoration.UNDERLINE
            self._current_span = None  # Force creation of a new span
        elif tag == 'span':
            # Span can have style attributes
            self._current_span = None  # Force creation of a new span
        elif tag == 'form':
            self._flush_text()  # Flush any pending text
            # Parse attributes
            form_id = attrs_dict.get('id', f"form_{len(self.document._resources) + 1}")
            action = attrs_dict.get('action', '')
            # Create a Form object
            form = Form(form_id=form_id, action=action)
            # Add as a resource
            self.document.add_resource(form_id, form)
            # TODO: Create a proper Form block class and add it to the document
        elif tag == 'input':
            # Parse attributes
            input_type = attrs_dict.get('type', 'text')
            input_name = attrs_dict.get('name', '')
            input_value = attrs_dict.get('value', '')
            input_required = 'required' in attrs_dict
            # Map HTML input types to FormFieldType
            type_map = {
                'text': FormFieldType.TEXT,
                'password': FormFieldType.PASSWORD,
                'checkbox': FormFieldType.CHECKBOX,
                'radio': FormFieldType.RADIO,
                'number': FormFieldType.NUMBER,
                'date': FormFieldType.DATE,
                'time': FormFieldType.TIME,
                'email': FormFieldType.EMAIL,
                'url': FormFieldType.URL,
                'color': FormFieldType.COLOR,
                'range': FormFieldType.RANGE,
                'hidden': FormFieldType.HIDDEN
            }
            field_type = type_map.get(input_type, FormFieldType.TEXT)
            # Create a FormField object
            field = FormField(
                name=input_name,
                field_type=field_type,
                label=attrs_dict.get('placeholder', input_name),
                value=input_value,
                required=input_required
            )
            # TODO: Add the field to a form if inside a form
        elif tag == 'textarea':
            # Similar to input but with multiline content
            # We'll handle the content in handle_data
            pass
        elif tag == 'select':
            # Similar to input but with options
            # We'll handle the options in handle_data
            pass
        elif tag == 'button':
            # Parse attributes
            button_type = attrs_dict.get('type', 'button')
            button_name = attrs_dict.get('name', '')
            # TODO: Create a Button object and add it to the document
    def handle_endtag(self, tag: str):
        """
        Handle the end of an HTML tag.
        Args:
            tag: The tag name
        """
        tag = tag.lower()
        # Special handling for elements where we collect content
        if tag == 'script' and self._in_script:
            self._in_script = False
            self.document.add_script(self._script_buffer)
            self._script_buffer = ""
            self._pop_style()
            return
        if tag == 'style' and self._in_style:
            self._in_style = False
            # Parse the style and add to document
            stylesheet = self._parse_css(self._style_buffer)
            if stylesheet:
                self.document.add_stylesheet(stylesheet)
            self._style_buffer = ""
            self._pop_style()
            return
        if tag == 'title' and self._in_title:
            self._in_title = False
            self.document.set_title(self._title_buffer.strip())
            self._title_buffer = ""
            self._pop_style()
            return
        if self._in_script and tag != 'script':
            return
        if self._in_style and tag != 'style':
            return
        # Flush any accumulated text
        self._flush_text()
        # Handle specific end tags
        if tag == 'head':
            self._in_head = False
        elif tag == 'body':
            pass  # Nothing special to do
        elif tag in ('p', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'pre'):
            # Pop from block stack
            if self._block_stack:
                self._block_stack.pop()
            # Update current block
            if self._block_stack:
                self._current_block = self._block_stack[-1]
            else:
                self._current_block = None
            # Reset current paragraph
            self._current_paragraph = None
            self._current_span = None
        elif tag == 'code':
            # If we're inside a code block, no need to do anything special
            pass
        elif tag in ('ul', 'ol', 'dl'):
            # Pop from block stack and list stack
            if self._block_stack:
                self._block_stack.pop()
            if self._list_stack:
                self._list_stack.pop()
            # Update current block
            if self._block_stack:
                self._current_block = self._block_stack[-1]
            else:
                self._current_block = None
            # Reset current paragraph
            self._current_paragraph = None
            self._current_span = None
        elif tag in ('li', 'dt', 'dd'):
            # Pop from block stack
            if self._block_stack:
                self._block_stack.pop()
            # Update current block
            if self._block_stack:
                self._current_block = self._block_stack[-1]
            else:
                self._current_block = None
            # Reset current paragraph
            self._current_paragraph = None
            self._current_span = None
        elif tag == 'table':
            # Pop from block stack and table stack
            if self._block_stack:
                self._block_stack.pop()
            if self._table_stack:
                self._table_stack.pop()
            # Update current block
            if self._block_stack:
                self._current_block = self._block_stack[-1]
            else:
                self._current_block = None
            # Reset current paragraph and table state
            self._current_paragraph = None
            self._current_span = None
            self._current_table_row = None
            if hasattr(self, '_current_table_section'):
                delattr(self, '_current_table_section')
        elif tag in ('thead', 'tbody', 'tfoot'):
            # Clear current section
            if hasattr(self, '_current_table_section'):
                delattr(self, '_current_table_section')
        elif tag == 'tr':
            # Reset current row
            self._current_table_row = None
        elif tag in ('td', 'th'):
            # Pop from block stack
            if self._block_stack:
                self._block_stack.pop()
            # Update current block
            if self._block_stack:
                self._current_block = self._block_stack[-1]
            else:
                self._current_block = None
            # Reset current paragraph
            self._current_paragraph = None
            self._current_span = None
        elif tag == 'a':
            # End of link
            self._in_link = False
            self._current_link = None
        elif tag in ('b', 'strong', 'i', 'em', 'u', 'span'):
            # End of styled text
            self._current_span = None
        # Pop style regardless of tag
        self._pop_style()
    def handle_data(self, data: str):
        """
        Handle text data.
        Args:
            data: The text data
        """
        if self._in_script:
            self._script_buffer += data
            return
        if self._in_style:
            self._style_buffer += data
            return
        if self._in_title:
            self._title_buffer += data
            return
        # Add to text buffer
        self._text_buffer += data
    def handle_entityref(self, name: str):
        """
        Handle an HTML entity reference.
        Args:
            name: The entity name
        """
        # Map common entity references to characters
        entities = {
            'lt': '<',
            'gt': '>',
            'amp': '&',
            'quot': '"',
            'apos': "'",
            'nbsp': ' ',
            'copy': '©',
            'reg': '®',
            'trade': '™',
        }
        if name in entities:
            char = entities[name]
        else:
            try:
                import html.entities
                char = chr(html.entities.name2codepoint[name])
            except (KeyError, ImportError):
                char = f'&{name};'
        # Handle based on context
        if self._in_script:
            self._script_buffer += char
        elif self._in_style:
            self._style_buffer += char
        elif self._in_title:
            self._title_buffer += char
        else:
            self._text_buffer += char
    def handle_charref(self, name: str):
        """
        Handle a character reference.
        Args:
            name: The character reference (decimal or hex)
        """
        # Convert character reference to character
        if name.startswith('x'):
            # Hexadecimal reference
            char = chr(int(name[1:], 16))
        else:
            # Decimal reference
            char = chr(int(name))
        # Handle based on context
        if self._in_script:
            self._script_buffer += char
        elif self._in_style:
            self._style_buffer += char
        elif self._in_title:
            self._title_buffer += char
        else:
            self._text_buffer += char
    def _push_style(self, style: Dict[str, Any]):
        """
        Push a new style onto the style stack.
        Args:
            style: The style to push
        """
        # Save the current style
        self._style_stack.append(self._current_style.copy())
        # Apply the new style
        for key, value in style.items():
            self._current_style[key] = value
    def _pop_style(self):
        """Pop a style from the style stack."""
        if self._style_stack:
            self._current_style = self._style_stack.pop()
    def _get_tag_style(self, tag: str) -> Dict[str, Any]:
        """
        Get the default style for a tag.
        Args:
            tag: The tag name
        Returns:
            A dictionary of style properties
        """
        # Default styles for common tags
        tag_styles = {
            'h1': {'font_size': 24, 'font_weight': FontWeight.BOLD},
            'h2': {'font_size': 20, 'font_weight': FontWeight.BOLD},
            'h3': {'font_size': 18, 'font_weight': FontWeight.BOLD},
            'h4': {'font_size': 16, 'font_weight': FontWeight.BOLD},
            'h5': {'font_size': 14, 'font_weight': FontWeight.BOLD},
            'h6': {'font_size': 12, 'font_weight': FontWeight.BOLD},
            'b': {'font_weight': FontWeight.BOLD},
            'strong': {'font_weight': FontWeight.BOLD},
            'i': {'font_style': FontStyle.ITALIC},
            'em': {'font_style': FontStyle.ITALIC},
            'u': {'decoration': TextDecoration.UNDERLINE},
            'a': {'decoration': TextDecoration.UNDERLINE, 'color': (0, 0, 255)},
            'code': {'font_family': 'monospace', 'background': (240, 240, 240, 255)},
            'pre': {'font_family': 'monospace'},
        }
        return tag_styles.get(tag, {})
    def _create_font(self) -> Font:
        """
        Create a Font object from the current style.
        Returns:
            Font: A font object with the current style settings
        """
--- a/pyWebLayout/io/init.py
+++ b/pyWebLayout/io/init.py
@ -0,0 +1,69 @@
 """
 Input/Output module for pyWebLayout.
 This package provides functionality for reading and writing various file formats,
 including HTML, EPUB, and other document formats.
 The module uses a decomposed architecture with specialized readers for different
 aspects of document parsing (metadata, content, resources), following the same
 pattern as the abstract module.
 """
 # Legacy readers (for backward compatibility)
 # Legacy functions provided by new HTML reader for backward compatibility
 from pyWebLayout.io.readers.html import parse_html_string as parse_html
 from pyWebLayout.io.readers.html import read_html_file as html_to_document
 from pyWebLayout.io.readers.epub_reader import read_epub
 # New decomposed readers
 from pyWebLayout.io.readers.html import HTMLReader, read_html, read_html_file, parse_html_string
 from pyWebLayout.io.readers.base import BaseReader, MetadataReader, ContentReader, ResourceReader, CompositeReader
 # Specialized HTML readers
 from pyWebLayout.io.readers.html_metadata import HTMLMetadataReader
 from pyWebLayout.io.readers.html_content import HTMLContentReader
 from pyWebLayout.io.readers.html_resources import HTMLResourceReader
 # Specialized EPUB readers
 from pyWebLayout.io.readers.epub_metadata import EPUBMetadataReader
 # Convenience functions using the new architecture
 def read_document(source, format_hint=None, **options):
    """
    Read a document using the appropriate reader based on format detection.
    Args:
        source: The source to read (file path, URL, or content)
        format_hint: Optional hint about the format ('html', 'epub', etc.)
        **options: Additional options for reading
    Returns:
        Document: The parsed document
    """
    if format_hint == 'html' or (not format_hint and _is_html_source(source)):
        reader = HTMLReader()
        return reader.read(source, **options)
    elif format_hint == 'epub' or (not format_hint and _is_epub_source(source)):
        # Use legacy EPUB reader for now
        return read_epub(source)
    else:
        # Try HTML reader as fallback
        try:
            reader = HTMLReader()
            if reader.can_read(source):
                return reader.read(source, **options)
        except:
            pass
        raise ValueError(f"Cannot determine format for source: {source}")
 def _is_html_source(source):
    """Check if source appears to be HTML."""
    reader = HTMLReader()
    return reader.can_read(source)
 def _is_epub_source(source):
    """Check if source appears to be EPUB."""
    if isinstance(source, str):
        return source.lower().endswith('.epub')
    return False
--- a/pyWebLayout/io/readers/init.py
+++ b/pyWebLayout/io/readers/init.py
@ -0,0 +1,36 @@
 """
 Readers module for pyWebLayout.
 This module provides specialized readers for different document formats
 using a decomposed architecture pattern.
 """
 # Base classes for the decomposed architecture
 from .base import BaseReader, MetadataReader, ContentReader, ResourceReader, CompositeReader
 # HTML readers (decomposed)
 from .html import HTMLReader, read_html, read_html_file, parse_html_string
 from .html_metadata import HTMLMetadataReader
 from .html_content import HTMLContentReader
 from .html_resources import HTMLResourceReader
 # HTML processing components (supporting modules)
 from .html_style import HTMLStyleManager
 from .html_text import HTMLTextProcessor
 from .html_elements import BlockElementHandler, ListElementHandler, TableElementHandler, InlineElementHandler
 # EPUB readers
 from .epub_reader import read_epub  # Legacy
 from .epub_metadata import EPUBMetadataReader  # New decomposed
 __all__ = [
    # Base classes
    'BaseReader', 'MetadataReader', 'ContentReader', 'ResourceReader', 'CompositeReader',
    # HTML readers
    'HTMLReader', 'read_html', 'read_html_file', 'parse_html_string',
    'HTMLMetadataReader', 'HTMLContentReader', 'HTMLResourceReader',
    # EPUB readers
    'read_epub', 'EPUBMetadataReader',
 ]
--- a/pyWebLayout/io/readers/base.py
+++ b/pyWebLayout/io/readers/base.py
@ -0,0 +1,229 @@
 """
 Base classes for document readers in pyWebLayout.
 This module provides the foundational classes that all readers inherit from,
 similar to how the abstract module provides base classes for document elements.
 """
 from abc import ABC, abstractmethod
 from typing import Any, Dict, List, Optional, Union
 from pyWebLayout.abstract.document import Document
 class BaseReader(ABC):
    """
    Abstract base class for all document readers.
    This class defines the common interface that all readers must implement.
    """
    def __init__(self):
        """Initialize the base reader."""
        self._document = None
        self._options = {}
    @abstractmethod
    def can_read(self, source: Union[str, bytes]) -> bool:
        """
        Check if this reader can handle the given source.
        Args:
            source: The source to check (file path, URL, or content)
        Returns:
            True if this reader can handle the source, False otherwise
        """
        pass
    @abstractmethod
    def read(self, source: Union[str, bytes], **options) -> Document:
        """
        Read and parse the source into a Document.
        Args:
            source: The source to read (file path, URL, or content)
            **options: Additional options for reading
        Returns:
            The parsed Document
        """
        pass
    def set_option(self, key: str, value: Any):
        """
        Set a reader option.
        Args:
            key: The option name
            value: The option value
        """
        self._options[key] = value
    def get_option(self, key: str, default: Any = None) -> Any:
        """
        Get a reader option.
        Args:
            key: The option name
            default: Default value if option is not set
        Returns:
            The option value or default
        """
        return self._options.get(key, default)
 class MetadataReader(ABC):
    """
    Abstract base class for reading document metadata.
    This class handles extraction of document metadata like title, author, etc.
    """
    @abstractmethod
    def extract_metadata(self, source: Any, document: Document) -> Dict[str, Any]:
        """
        Extract metadata from the source.
        Args:
            source: The source data
            document: The document to populate with metadata
        Returns:
            Dictionary of extracted metadata
        """
        pass
 class StructureReader(ABC):
    """
    Abstract base class for reading document structure.
    This class handles extraction of document structure like headings, sections, etc.
    """
    @abstractmethod
    def extract_structure(self, source: Any, document: Document) -> List[Any]:
        """
        Extract structure information from the source.
        Args:
            source: The source data
            document: The document to populate with structure
        Returns:
            List of structural elements
        """
        pass
 class ContentReader(ABC):
    """
    Abstract base class for reading document content.
    This class handles extraction of document content like text, formatting, etc.
    """
    @abstractmethod
    def extract_content(self, source: Any, document: Document) -> Any:
        """
        Extract content from the source.
        Args:
            source: The source data
            document: The document to populate with content
        Returns:
            The extracted content
        """
        pass
 class ResourceReader(ABC):
    """
    Abstract base class for reading document resources.
    This class handles extraction of document resources like images, stylesheets, etc.
    """
    @abstractmethod
    def extract_resources(self, source: Any, document: Document) -> Dict[str, Any]:
        """
        Extract resources from the source.
        Args:
            source: The source data
            document: The document to populate with resources
        Returns:
            Dictionary of extracted resources
        """
        pass
 class CompositeReader(BaseReader):
    """
    A reader that combines multiple specialized readers.
    This class uses composition to combine metadata, structure, content,
    and resource readers into a complete document reader.
    """
    def __init__(self):
        """Initialize the composite reader."""
        super().__init__()
        self._metadata_reader: Optional[MetadataReader] = None
        self._structure_reader: Optional[StructureReader] = None
        self._content_reader: Optional[ContentReader] = None
        self._resource_reader: Optional[ResourceReader] = None
    def set_metadata_reader(self, reader: MetadataReader):
        """Set the metadata reader."""
        self._metadata_reader = reader
    def set_structure_reader(self, reader: StructureReader):
        """Set the structure reader."""
        self._structure_reader = reader
    def set_content_reader(self, reader: ContentReader):
        """Set the content reader."""
        self._content_reader = reader
    def set_resource_reader(self, reader: ResourceReader):
        """Set the resource reader."""
        self._resource_reader = reader
    def read(self, source: Union[str, bytes], **options) -> Document:
        """
        Read the source using all configured readers.
        Args:
            source: The source to read
            **options: Additional options for reading
        Returns:
            The parsed Document
        """
        # Create a new document
        document = Document()
        # Store options
        self._options.update(options)
        # Extract metadata if reader is available
        if self._metadata_reader:
            self._metadata_reader.extract_metadata(source, document)
        # Extract structure if reader is available
        if self._structure_reader:
            self._structure_reader.extract_structure(source, document)
        # Extract content if reader is available
        if self._content_reader:
            self._content_reader.extract_content(source, document)
        # Extract resources if reader is available
        if self._resource_reader:
            self._resource_reader.extract_resources(source, document)
        return document
--- a/pyWebLayout/io/readers/epub_metadata.py
+++ b/pyWebLayout/io/readers/epub_metadata.py
@ -0,0 +1,352 @@
 """
 EPUB metadata reader for pyWebLayout.
 This module provides specialized functionality for extracting metadata
 from EPUB documents, following the decomposed architecture pattern.
 """
 import os
 import zipfile
 import tempfile
 from typing import Dict, Any, Optional, List
 import xml.etree.ElementTree as ET
 from pyWebLayout.abstract.document import Document, MetadataType
 from pyWebLayout.io.readers.base import MetadataReader
 # XML namespaces used in EPUB files
 NAMESPACES = {
    'opf': 'http://www.idpf.org/2007/opf',
    'dc': 'http://purl.org/dc/elements/1.1/',
    'dcterms': 'http://purl.org/dc/terms/',
 }
 class EPUBMetadataReader(MetadataReader):
    """
    Specialized reader for extracting metadata from EPUB documents.
    This class handles EPUB package document metadata including
    Dublin Core elements and custom metadata.
    """
    def __init__(self):
        """Initialize the EPUB metadata reader."""
        self._metadata = {}
        self._temp_dir = None
        self._package_path = None
    def extract_metadata(self, epub_path: str, document: Document) -> Dict[str, Any]:
        """
        Extract metadata from EPUB file.
        Args:
            epub_path: Path to the EPUB file
            document: The document to populate with metadata
        Returns:
            Dictionary of extracted metadata
        """
        # Reset internal state
        self._reset()
        try:
            # Extract EPUB to temporary directory
            self._extract_epub(epub_path)
            # Find and parse package document
            self._find_package_document()
            if self._package_path:
                self._parse_package_metadata()
            # Populate document with extracted metadata
            self._populate_document(document)
            return self._metadata
        finally:
            # Clean up temporary files
            self._cleanup()
    def _reset(self):
        """Reset internal state for a new extraction."""
        self._metadata = {}
        self._temp_dir = None
        self._package_path = None
    def _extract_epub(self, epub_path: str):
        """
        Extract EPUB file to temporary directory.
        Args:
            epub_path: Path to the EPUB file
        """
        self._temp_dir = tempfile.mkdtemp()
        with zipfile.ZipFile(epub_path, 'r') as zip_ref:
            zip_ref.extractall(self._temp_dir)
    def _find_package_document(self):
        """Find the package document (content.opf) in the extracted EPUB."""
        # First, try to find it via META-INF/container.xml
        container_path = os.path.join(self._temp_dir, 'META-INF', 'container.xml')
        if os.path.exists(container_path):
            try:
                tree = ET.parse(container_path)
                root = tree.getroot()
                # Find rootfile element
                for rootfile in root.findall('.//{urn:oasis:names:tc:opendocument:xmlns:container}rootfile'):
                    full_path = rootfile.get('full-path')
                    if full_path:
                        self._package_path = os.path.join(self._temp_dir, full_path)
                        if os.path.exists(self._package_path):
                            return
            except ET.ParseError:
                pass
        # Fallback: search for .opf files
        for root, dirs, files in os.walk(self._temp_dir):
            for file in files:
                if file.endswith('.opf'):
                    self._package_path = os.path.join(root, file)
                    return
    def _parse_package_metadata(self):
        """Parse metadata from the package document."""
        if not self._package_path or not os.path.exists(self._package_path):
            return
        try:
            tree = ET.parse(self._package_path)
            root = tree.getroot()
            # Find metadata element
            metadata_elem = root.find('.//{{{0}}}metadata'.format(NAMESPACES['opf']))
            if metadata_elem is None:
                return
            # Parse Dublin Core metadata
            self._parse_dublin_core(metadata_elem)
            # Parse OPF-specific metadata
            self._parse_opf_metadata(metadata_elem)
        except ET.ParseError as e:
            print(f"Error parsing package document: {e}")
    def _parse_dublin_core(self, metadata_elem: ET.Element):
        """
        Parse Dublin Core metadata elements.
        Args:
            metadata_elem: The metadata XML element
        """
        dc_elements = {
            'title': 'title',
            'creator': 'creator',
            'subject': 'subject',
            'description': 'description',
            'publisher': 'publisher',
            'contributor': 'contributor',
            'date': 'date',
            'type': 'type',
            'format': 'format',
            'identifier': 'identifier',
            'source': 'source',
            'language': 'language',
            'relation': 'relation',
            'coverage': 'coverage',
            'rights': 'rights'
        }
        for dc_name, meta_key in dc_elements.items():
            elements = metadata_elem.findall('.//{{{0}}}{1}'.format(NAMESPACES['dc'], dc_name))
            if elements:
                if len(elements) == 1:
                    # Single element
                    text = elements[0].text
                    if text:
                        self._metadata[meta_key] = text.strip()
                        # Handle special attributes
                        elem = elements[0]
                        if dc_name == 'creator':
                            # Check for role attribute
                            role = elem.get('{{{0}}}role'.format(NAMESPACES['opf']))
                            if role:
                                self._metadata[f'{meta_key}_role'] = role
                            # Check for file-as attribute for sorting
                            file_as = elem.get('{{{0}}}file-as'.format(NAMESPACES['opf']))
                            if file_as:
                                self._metadata[f'{meta_key}_file_as'] = file_as
                        elif dc_name == 'identifier':
                            # Check for scheme (ISBN, DOI, etc.)
                            scheme = elem.get('{{{0}}}scheme'.format(NAMESPACES['opf']))
                            if scheme:
                                self._metadata[f'{meta_key}_scheme'] = scheme
                            # Check if this is the unique identifier
                            id_attr = elem.get('id')
                            if id_attr:
                                self._metadata[f'{meta_key}_id'] = id_attr
                        elif dc_name == 'date':
                            # Check for event type
                            event = elem.get('{{{0}}}event'.format(NAMESPACES['opf']))
                            if event:
                                self._metadata[f'{meta_key}_event'] = event
                else:
                    # Multiple elements - store as list
                    values = []
                    for elem in elements:
                        if elem.text:
                            values.append(elem.text.strip())
                    if values:
                        self._metadata[meta_key] = values
    def _parse_opf_metadata(self, metadata_elem: ET.Element):
        """
        Parse OPF-specific metadata elements.
        Args:
            metadata_elem: The metadata XML element
        """
        # Parse meta elements
        meta_elements = metadata_elem.findall('.//{{{0}}}meta'.format(NAMESPACES['opf']))
        for meta in meta_elements:
            name = meta.get('name')
            content = meta.get('content')
            if name and content:
                self._metadata[f'meta_{name}'] = content
        # Parse x-metadata elements (custom metadata)
        x_meta_elements = metadata_elem.findall('.//{{{0}}}x-metadata'.format(NAMESPACES['opf']))
        for x_meta in x_meta_elements:
            for child in x_meta:
                if child.tag and child.text:
                    # Remove namespace prefix for cleaner key names
                    tag_name = child.tag.split('}')[-1] if '}' in child.tag else child.tag
                    self._metadata[f'x_meta_{tag_name}'] = child.text.strip()
    def _populate_document(self, document: Document):
        """
        Populate the document with extracted metadata.
        Args:
            document: The document to populate
        """
        # Map EPUB metadata to document metadata types
        metadata_mapping = {
            'title': MetadataType.TITLE,
            'creator': MetadataType.AUTHOR,
            'description': MetadataType.DESCRIPTION,
            'subject': MetadataType.KEYWORDS,
            'language': MetadataType.LANGUAGE,
            'date': MetadataType.PUBLICATION_DATE,
            'publisher': MetadataType.PUBLISHER,
            'identifier': MetadataType.IDENTIFIER,
        }
        for epub_key, doc_type in metadata_mapping.items():
            if epub_key in self._metadata:
                value = self._metadata[epub_key]
                # Handle list values (like multiple subjects)
                if isinstance(value, list):
                    if epub_key == 'subject':
                        # Join subjects with commas for keywords
                        document.set_metadata(doc_type, ', '.join(value))
                    else:
                        # For other list values, use the first one
                        document.set_metadata(doc_type, value[0])
                else:
                    document.set_metadata(doc_type, value)
        # Handle cover image
        cover_meta = self._metadata.get('meta_cover')
        if cover_meta:
            document.set_metadata(MetadataType.COVER_IMAGE, cover_meta)
        # Store original EPUB metadata for reference
        document.set_metadata(MetadataType.CUSTOM, {
            'epub_metadata': self._metadata
        })
    def _cleanup(self):
        """Clean up temporary files."""
        if self._temp_dir:
            try:
                import shutil
                shutil.rmtree(self._temp_dir, ignore_errors=True)
            except:
                pass
            self._temp_dir = None
    def get_unique_identifier(self) -> Optional[str]:
        """
        Get the unique identifier from the EPUB metadata.
        Returns:
            The unique identifier string, or None if not found
        """
        # Look for identifier with specific ID
        for key, value in self._metadata.items():
            if key.startswith('identifier') and key.endswith('_id'):
                return self._metadata.get('identifier')
        # Fallback to any identifier
        return self._metadata.get('identifier')
    def get_cover_id(self) -> Optional[str]:
        """
        Get the cover image ID from metadata.
        Returns:
            The cover image ID, or None if not found
        """
        return self._metadata.get('meta_cover')
    def get_creators(self) -> List[Dict[str, str]]:
        """
        Get creator information with roles.
        Returns:
            List of creator dictionaries with name, role, and file-as info
        """
        creators = []
        creator_value = self._metadata.get('creator')
        if creator_value:
            if isinstance(creator_value, list):
                # Multiple creators - this is simplified, real implementation
                # would need to correlate with role and file-as attributes
                for creator in creator_value:
                    creators.append({'name': creator})
            else:
                # Single creator
                creator_info = {'name': creator_value}
                # Add role if available
                role = self._metadata.get('creator_role')
                if role:
                    creator_info['role'] = role
                # Add file-as if available
                file_as = self._metadata.get('creator_file_as')
                if file_as:
                    creator_info['file_as'] = file_as
                creators.append(creator_info)
        return creators
--- a/pyWebLayout/io/readers/epub_reader.py
+++ b/pyWebLayout/io/readers/epub_reader.py
@ -0,0 +1,400 @@
 """
 EPUB reader for pyWebLayout.
 This module provides functionality for reading EPUB documents and converting them
 to pyWebLayout's abstract document model.
 """
 import os
 import zipfile
 import tempfile
 from typing import Dict, List, Optional, Any, Tuple
 import xml.etree.ElementTree as ET
 import re
 import urllib.parse
 from pyWebLayout.abstract.document import Document, Book, Chapter, MetadataType
 from pyWebLayout.io.readers.html import parse_html_string as parse_html, read_html_file as html_to_document
 # XML namespaces used in EPUB files
 NAMESPACES = {
    'opf': 'http://www.idpf.org/2007/opf',
    'dc': 'http://purl.org/dc/elements/1.1/',
    'dcterms': 'http://purl.org/dc/terms/',
    'xhtml': 'http://www.w3.org/1999/xhtml',
    'ncx': 'http://www.daisy.org/z3986/2005/ncx/',
 }
 class EPUBReader:
    """
    Reader for EPUB documents.
    This class extracts content from EPUB files and converts it to
    pyWebLayout's abstract document model.
    """
    def __init__(self, epub_path: str):
        """
        Initialize an EPUB reader.
        Args:
            epub_path: Path to the EPUB file
        """
        self.epub_path = epub_path
        self.book = Book()
        self.temp_dir = None
        self.content_dir = None
        self.metadata = {}
        self.toc = []
        self.spine = []
        self.manifest = {}
    def read(self) -> Book:
        """
        Read the EPUB file and convert it to a Book.
        Returns:
            Book: The parsed book
        """
        try:
            # Extract the EPUB file
            self.temp_dir = tempfile.mkdtemp()
            self._extract_epub()
            # Parse the package document (content.opf)
            self._parse_package_document()
            # Parse the table of contents
            self._parse_toc()
            # Create a Book object
            self._create_book()
            # Add chapters to the book
            self._add_chapters()
            return self.book
        finally:
            # Clean up temporary files
            if self.temp_dir:
                import shutil
                shutil.rmtree(self.temp_dir, ignore_errors=True)
    def _extract_epub(self):
        """Extract the EPUB file to a temporary directory."""
        with zipfile.ZipFile(self.epub_path, 'r') as zip_ref:
            zip_ref.extractall(self.temp_dir)
        # Find the content directory (typically OEBPS or OPS)
        container_path = os.path.join(self.temp_dir, 'META-INF', 'container.xml')
        if os.path.exists(container_path):
            tree = ET.parse(container_path)
            root = tree.getroot()
            # Get the path to the package document (content.opf)
            for rootfile in root.findall('.//{urn:oasis:names:tc:opendocument:xmlns:container}rootfile'):
                full_path = rootfile.get('full-path')
                if full_path:
                    self.content_dir = os.path.dirname(os.path.join(self.temp_dir, full_path))
                    return
        # Fallback: look for common content directories
        for content_dir in ['OEBPS', 'OPS', 'Content']:
            if os.path.exists(os.path.join(self.temp_dir, content_dir)):
                self.content_dir = os.path.join(self.temp_dir, content_dir)
                return
        # If no content directory found, use the root
        self.content_dir = self.temp_dir
    def _parse_package_document(self):
        """Parse the package document (content.opf)."""
        # Find the package document
        opf_path = None
        for root, dirs, files in os.walk(self.content_dir):
            for file in files:
                if file.endswith('.opf'):
                    opf_path = os.path.join(root, file)
                    break
            if opf_path:
                break
        if not opf_path:
            raise ValueError("No package document (.opf) found in EPUB")
        # Parse the package document
        tree = ET.parse(opf_path)
        root = tree.getroot()
        # Parse metadata
        self._parse_metadata(root)
        # Parse manifest
        self._parse_manifest(root)
        # Parse spine
        self._parse_spine(root)
    def _parse_metadata(self, root: ET.Element):
        """
        Parse metadata from the package document.
        Args:
            root: Root element of the package document
        """
        # Find the metadata element
        metadata_elem = root.find('.//{{{0}}}metadata'.format(NAMESPACES['opf']))
        if metadata_elem is None:
            return
        # Parse DC metadata
        for elem in metadata_elem:
            if elem.tag.startswith('{{{0}}}'.format(NAMESPACES['dc'])):
                # Get the local name (without namespace)
                name = elem.tag.split('}', 1)[1]
                value = elem.text
                if name == 'title':
                    self.metadata['title'] = value
                elif name == 'creator':
                    self.metadata['creator'] = value
                elif name == 'language':
                    self.metadata['language'] = value
                elif name == 'description':
                    self.metadata['description'] = value
                elif name == 'subject':
                    if 'subjects' not in self.metadata:
                        self.metadata['subjects'] = []
                    self.metadata['subjects'].append(value)
                elif name == 'date':
                    self.metadata['date'] = value
                elif name == 'identifier':
                    self.metadata['identifier'] = value
                elif name == 'publisher':
                    self.metadata['publisher'] = value
                else:
                    # Store other metadata
                    self.metadata[name] = value
    def _parse_manifest(self, root: ET.Element):
        """
        Parse manifest from the package document.
        Args:
            root: Root element of the package document
        """
        # Find the manifest element
        manifest_elem = root.find('.//{{{0}}}manifest'.format(NAMESPACES['opf']))
        if manifest_elem is None:
            return
        # Parse items
        for item in manifest_elem.findall('.//{{{0}}}item'.format(NAMESPACES['opf'])):
            id = item.get('id')
            href = item.get('href')
            media_type = item.get('media-type')
            if id and href:
                # Resolve relative path
                href = urllib.parse.unquote(href)
                path = os.path.normpath(os.path.join(self.content_dir, href))
                self.manifest[id] = {
                    'href': href,
                    'path': path,
                    'media_type': media_type
                }
    def _parse_spine(self, root: ET.Element):
        """
        Parse spine from the package document.
        Args:
            root: Root element of the package document
        """
        # Find the spine element
        spine_elem = root.find('.//{{{0}}}spine'.format(NAMESPACES['opf']))
        if spine_elem is None:
            return
        # Get the toc attribute (NCX file ID)
        toc_id = spine_elem.get('toc')
        if toc_id and toc_id in self.manifest:
            self.toc_path = self.manifest[toc_id]['path']
        # Parse itemrefs
        for itemref in spine_elem.findall('.//{{{0}}}itemref'.format(NAMESPACES['opf'])):
            idref = itemref.get('idref')
            if idref and idref in self.manifest:
                self.spine.append(idref)
    def _parse_toc(self):
        """Parse the table of contents."""
        if not hasattr(self, 'toc_path') or not self.toc_path or not os.path.exists(self.toc_path):
            # Try to find the toc.ncx file
            for root, dirs, files in os.walk(self.content_dir):
                for file in files:
                    if file.endswith('.ncx'):
                        self.toc_path = os.path.join(root, file)
                        break
                if hasattr(self, 'toc_path') and self.toc_path:
                    break
        if not hasattr(self, 'toc_path') or not self.toc_path or not os.path.exists(self.toc_path):
            # No TOC found
            return
        # Parse the NCX file
        tree = ET.parse(self.toc_path)
        root = tree.getroot()
        # Parse navMap
        nav_map = root.find('.//{{{0}}}navMap'.format(NAMESPACES['ncx']))
        if nav_map is None:
            return
        # Parse navPoints
        self._parse_nav_points(nav_map, [])
    def _parse_nav_points(self, parent: ET.Element, path: List[Dict[str, Any]]):
        """
        Recursively parse navPoints from the NCX file.
        Args:
            parent: Parent element containing navPoints
            path: Current path in the TOC hierarchy
        """
        for nav_point in parent.findall('.//{{{0}}}navPoint'.format(NAMESPACES['ncx'])):
            # Get navPoint attributes
            id = nav_point.get('id')
            play_order = nav_point.get('playOrder')
            # Get navLabel
            nav_label = nav_point.find('.//{{{0}}}navLabel'.format(NAMESPACES['ncx']))
            text_elem = nav_label.find('.//{{{0}}}text'.format(NAMESPACES['ncx'])) if nav_label else None
            label = text_elem.text if text_elem is not None else ""
            # Get content
            content = nav_point.find('.//{{{0}}}content'.format(NAMESPACES['ncx']))
            src = content.get('src') if content is not None else ""
            # Create a TOC entry
            entry = {
                'id': id,
                'label': label,
                'src': src,
                'play_order': play_order,
                'children': []
            }
            # Add to TOC
            if path:
                path[-1]['children'].append(entry)
            else:
                self.toc.append(entry)
            # Parse child navPoints
            self._parse_nav_points(nav_point, path + [entry])
    def _create_book(self):
        """Create a Book object from the parsed metadata."""
        # Set book metadata
        if 'title' in self.metadata:
            self.book.set_title(self.metadata['title'])
        if 'creator' in self.metadata:
            self.book.set_metadata(MetadataType.AUTHOR, self.metadata['creator'])
        if 'language' in self.metadata:
            self.book.set_metadata(MetadataType.LANGUAGE, self.metadata['language'])
        if 'description' in self.metadata:
            self.book.set_metadata(MetadataType.DESCRIPTION, self.metadata['description'])
        if 'subjects' in self.metadata:
            self.book.set_metadata(MetadataType.KEYWORDS, ', '.join(self.metadata['subjects']))
        if 'date' in self.metadata:
            self.book.set_metadata(MetadataType.PUBLICATION_DATE, self.metadata['date'])
        if 'identifier' in self.metadata:
            self.book.set_metadata(MetadataType.IDENTIFIER, self.metadata['identifier'])
        if 'publisher' in self.metadata:
            self.book.set_metadata(MetadataType.PUBLISHER, self.metadata['publisher'])
    def _add_chapters(self):
        """Add chapters to the book based on the spine and TOC."""
        # Create a mapping from src to TOC entry
        toc_map = {}
        def add_to_toc_map(entries):
            for entry in entries:
                if entry['src']:
                    # Extract the path part of the src (remove fragment)
                    src_parts = entry['src'].split('#', 1)
                    path = src_parts[0]
                    toc_map[path] = entry
                # Process children
                if entry['children']:
                    add_to_toc_map(entry['children'])
        add_to_toc_map(self.toc)
        # Process spine items
        for i, idref in enumerate(self.spine):
            if idref not in self.manifest:
                continue
            item = self.manifest[idref]
            path = item['path']
            href = item['href']
            # Check if this item is in the TOC
            chapter_title = None
            if href in toc_map:
                chapter_title = toc_map[href]['label']
            # Create a chapter
            chapter = self.book.create_chapter(chapter_title, i + 1)
            # Parse the HTML content
            try:
                # Read the HTML file
                with open(path, 'r', encoding='utf-8') as f:
                    html = f.read()
                # Parse HTML and add blocks to chapter
                base_url = os.path.dirname(path)
                document = parse_html(html, base_url)
                # Copy blocks to the chapter
                for block in document.blocks:
                    chapter.add_block(block)
            except Exception as e:
                print(f"Error parsing chapter {i+1}: {str(e)}")
                # Add an error message block
                from pyWebLayout.abstract.block import Parapgraph
                from pyWebLayout.abstract.inline import Word
                error_para = Parapgraph()
                error_para.add_word(Word(f"Error loading chapter: {str(e)}"))
                chapter.add_block(error_para)
 def read_epub(epub_path: str) -> Book:
    """
    Read an EPUB file and convert it to a Book.
    Args:
        epub_path: Path to the EPUB file
    Returns:
        Book: The parsed book
    """
    reader = EPUBReader(epub_path)
    return reader.read()
--- a/pyWebLayout/io/readers/html.py
+++ b/pyWebLayout/io/readers/html.py
@ -0,0 +1,190 @@
 """
 Modern HTML reader for pyWebLayout.
 This module provides a decomposed HTML reader that uses specialized
 readers for metadata, content, and resources, following the pattern
 established in the abstract module.
 """
 import os
 from typing import Union, Optional
 from pyWebLayout.abstract.document import Document
 from pyWebLayout.io.readers.base import CompositeReader
 from pyWebLayout.io.readers.html_metadata import HTMLMetadataReader
 from pyWebLayout.io.readers.html_content import HTMLContentReader
 from pyWebLayout.io.readers.html_resources import HTMLResourceReader
 class HTMLReader(CompositeReader):
    """
    Modern HTML reader using decomposed architecture.
    This reader combines specialized readers for metadata, content,
    and resources to provide a complete HTML parsing solution.
    """
    def __init__(self):
        """Initialize the HTML reader with all specialized readers."""
        super().__init__()
        # Set up specialized readers
        self.set_metadata_reader(HTMLMetadataReader())
        self.set_content_reader(HTMLContentReader())
        self.set_resource_reader(HTMLResourceReader())
    def can_read(self, source: Union[str, bytes]) -> bool:
        """
        Check if this reader can handle the given source.
        Args:
            source: The source to check (file path, URL, or content)
        Returns:
            True if this reader can handle the source, False otherwise
        """
        if isinstance(source, str):
            # Check if it's a file path
            if os.path.isfile(source):
                return source.lower().endswith(('.html', '.htm', '.xhtml'))
            # Check if it's HTML content (very basic check)
            source_lower = source.lower().strip()
            return (source_lower.startswith('<!doctype html') or 
                   source_lower.startswith('<html') or
                   '<html' in source_lower[:200])
        elif isinstance(source, bytes):
            # Check if it's HTML content in bytes
            try:
                source_str = source.decode('utf-8', errors='ignore').lower().strip()
                return (source_str.startswith('<!doctype html') or 
                       source_str.startswith('<html') or
                       '<html' in source_str[:200])
            except:
                return False
        return False
    def read(self, source: Union[str, bytes], **options) -> Document:
        """
        Read and parse the HTML source into a Document.
        Args:
            source: The HTML source to read (file path, URL, or content)
            **options: Additional options for reading
                - base_url: Base URL for resolving relative links
                - encoding: Character encoding (default: 'utf-8')
                - extract_metadata: Whether to extract metadata (default: True)
                - extract_resources: Whether to extract resources (default: True)
        Returns:
            The parsed Document
        """
        # Get options
        base_url = options.get('base_url')
        encoding = options.get('encoding', 'utf-8')
        extract_metadata = options.get('extract_metadata', True)
        extract_resources = options.get('extract_resources', True)
        # Read the HTML content
        html_content = self._read_html_content(source, encoding)
        # Set base URL if not provided and source is a file
        if not base_url and isinstance(source, str) and os.path.isfile(source):
            base_url = f"file://{os.path.dirname(os.path.abspath(source))}/"
        # Set base URL in content reader
        if self._content_reader and hasattr(self._content_reader, 'set_base_url'):
            self._content_reader.set_base_url(base_url)
        # Create a new document
        document = Document()
        # Extract metadata if enabled
        if extract_metadata and self._metadata_reader:
            self._metadata_reader.extract_metadata(html_content, document)
        # Extract content
        if self._content_reader:
            self._content_reader.extract_content(html_content, document)
        # Extract resources if enabled
        if extract_resources and self._resource_reader:
            self._resource_reader.extract_resources(html_content, document)
        return document
    def _read_html_content(self, source: Union[str, bytes], encoding: str = 'utf-8') -> str:
        """
        Read HTML content from various sources.
        Args:
            source: The source to read from
            encoding: Character encoding to use
        Returns:
            The HTML content as a string
        """
        if isinstance(source, bytes):
            # Source is already bytes, decode it
            return source.decode(encoding, errors='replace')
        elif isinstance(source, str):
            # Check if it's a file path
            if os.path.isfile(source):
                with open(source, 'r', encoding=encoding, errors='replace') as f:
                    return f.read()
            else:
                # Assume it's HTML content
                return source
        else:
            raise ValueError(f"Unsupported source type: {type(source)}")
 def read_html(source: Union[str, bytes], **options) -> Document:
    """
    Convenience function to read HTML content.
    Args:
        source: The HTML source to read (file path, URL, or content)
        **options: Additional options for reading
    Returns:
        The parsed Document
    """
    reader = HTMLReader()
    return reader.read(source, **options)
 def read_html_file(file_path: str, **options) -> Document:
    """
    Convenience function to read HTML from a file.
    Args:
        file_path: Path to the HTML file
        **options: Additional options for reading
    Returns:
        The parsed Document
    """
    if not os.path.isfile(file_path):
        raise FileNotFoundError(f"HTML file not found: {file_path}")
    reader = HTMLReader()
    return reader.read(file_path, **options)
 def parse_html_string(html_content: str, **options) -> Document:
    """
    Convenience function to parse HTML content from a string.
    Args:
        html_content: The HTML content as a string
        **options: Additional options for reading
    Returns:
        The parsed Document
    """
    reader = HTMLReader()
    return reader.read(html_content, **options)
--- a/pyWebLayout/io/readers/html_content.py
+++ b/pyWebLayout/io/readers/html_content.py
@ -0,0 +1,269 @@
 """
 Modern HTML content reader for pyWebLayout.
 This module provides a decomposed HTML content reader that uses specialized
 handlers and managers for different aspects of HTML parsing.
 """
 from html.parser import HTMLParser as BaseHTMLParser
 from typing import Dict, List, Optional, Tuple, Union, Any
 from pyWebLayout.abstract.document import Document
 from pyWebLayout.io.readers.base import ContentReader
 from pyWebLayout.io.readers.html_style import HTMLStyleManager
 from pyWebLayout.io.readers.html_text import HTMLTextProcessor
 from pyWebLayout.io.readers.html_elements import (
    BlockElementHandler, ListElementHandler, TableElementHandler, InlineElementHandler
 )
 class HTMLContentReader(ContentReader, BaseHTMLParser):
    """
    Modern HTML content reader using decomposed architecture.
    This class orchestrates specialized handlers to parse HTML content
    and convert it to pyWebLayout's abstract document model.
    """
    def __init__(self):
        """Initialize the HTML content reader."""
        BaseHTMLParser.__init__(self)
        # Initialize managers and processors
        self.style_manager = HTMLStyleManager()
        self.text_processor = HTMLTextProcessor(self.style_manager)
        # Initialize element handlers
        self.block_handler = BlockElementHandler(self.style_manager, self.text_processor)
        self.list_handler = ListElementHandler(self.text_processor)
        self.table_handler = TableElementHandler(self.text_processor)
        self.inline_handler = InlineElementHandler(self.text_processor)
        # Document and parsing state
        self._document: Optional[Document] = None
        self._in_head = False
        self._in_script = False
        self._in_style = False
    def extract_content(self, html_content: str, document: Document) -> Any:
        """
        Extract content from HTML.
        Args:
            html_content: The HTML content to parse
            document: The document to populate with content
        Returns:
            The document with populated content
        """
        self._document = document
        self._reset_state()
        # Parse the HTML content
        self.feed(html_content)
        # Flush any remaining text
        self.text_processor.flush_text()
        return document
    def set_base_url(self, base_url: str):
        """Set the base URL for resolving relative links."""
        self.inline_handler.set_base_url(base_url)
    def _reset_state(self):
        """Reset all parser state for new content."""
        # Reset managers and processors
        self.style_manager.reset()
        self.text_processor.reset()
        # Reset element handlers
        self.block_handler.reset()
        self.list_handler.reset()
        self.table_handler.reset()
        self.inline_handler.reset()
        # Reset parser flags
        self._in_head = False
        self._in_script = False
        self._in_style = False
    def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
        """Handle the start of an HTML tag."""
        tag = tag.lower()
        attrs_dict = dict(attrs)
        # Skip content in head, script, style (except body)
        if self._should_skip_content(tag):
            return
        # Handle special section markers
        if self._handle_special_sections_start(tag):
            return
        # Apply styles for this element
        style = self.style_manager.apply_style_to_element(tag, attrs_dict)
        self.style_manager.push_style(style)
        # Delegate to appropriate handler
        self._delegate_start_tag(tag, attrs_dict)
    def handle_endtag(self, tag: str):
        """Handle the end of an HTML tag."""
        tag = tag.lower()
        # Handle special section markers
        if self._handle_special_sections_end(tag):
            return
        # Skip content in head, script, style
        if self._in_head or self._in_script or self._in_style:
            return
        # Flush any accumulated text
        self.text_processor.flush_text()
        # Delegate to appropriate handler
        self._delegate_end_tag(tag)
        # Pop style regardless of tag
        self.style_manager.pop_style()
    def handle_data(self, data: str):
        """Handle text data."""
        if self._in_head or self._in_script or self._in_style:
            return
        self.text_processor.add_text(data)
    def handle_entityref(self, name: str):
        """Handle an HTML entity reference."""
        if self._in_head or self._in_script or self._in_style:
            return
        self.text_processor.add_entity_reference(name)
    def handle_charref(self, name: str):
        """Handle a character reference."""
        if self._in_head or self._in_script or self._in_style:
            return
        self.text_processor.add_character_reference(name)
    def _should_skip_content(self, tag: str) -> bool:
        """Check if we should skip content based on current state."""
        if self._in_head or self._in_script or self._in_style:
            if tag in ('head', 'script', 'style'):
                return False  # Let special section handlers deal with these
            if tag != 'body':
                return True
        return False
    def _handle_special_sections_start(self, tag: str) -> bool:
        """Handle special section start tags. Returns True if handled."""
        if tag == 'head':
            self._in_head = True
            return True
        elif tag == 'body':
            self._in_head = False
            return True
        elif tag == 'script':
            self._in_script = True
            return True
        elif tag == 'style':
            self._in_style = True
            return True
        return False
    def _handle_special_sections_end(self, tag: str) -> bool:
        """Handle special section end tags. Returns True if handled."""
        if tag == 'head':
            self._in_head = False
            self.style_manager.pop_style()
            return True
        elif tag == 'script':
            self._in_script = False
            self.style_manager.pop_style()
            return True
        elif tag == 'style':
            self._in_style = False
            self.style_manager.pop_style()
            return True
        return False
    def _delegate_start_tag(self, tag: str, attrs: Dict[str, str]):
        """Delegate start tag handling to appropriate handler."""
        # Block elements
        if tag == 'p':
            self.block_handler.handle_paragraph_start(self._document)
        elif tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
            self.block_handler.handle_heading_start(tag, self._document)
        elif tag == 'div':
            self.block_handler.handle_div_start(self._document)
        elif tag == 'blockquote':
            self.block_handler.handle_blockquote_start(self._document)
        elif tag == 'pre':
            self.block_handler.handle_pre_start(self._document)
        elif tag == 'code':
            self.block_handler.handle_code_start(attrs, self._document)
        # List elements
        elif tag in ('ul', 'ol', 'dl'):
            self.list_handler.handle_list_start(tag, self.block_handler, self._document)
        elif tag == 'li':
            self.list_handler.handle_list_item_start(self.block_handler)
        elif tag in ('dt', 'dd'):
            self.list_handler.handle_definition_start(tag, self.block_handler)
        # Table elements
        elif tag == 'table':
            self.table_handler.handle_table_start(attrs, self.block_handler, self._document)
        elif tag in ('thead', 'tbody', 'tfoot'):
            self.table_handler.handle_table_section_start(tag)
        elif tag == 'tr':
            self.table_handler.handle_table_row_start()
        elif tag in ('td', 'th'):
            self.table_handler.handle_table_cell_start(tag, attrs, self.block_handler)
        # Inline elements
        elif tag == 'a':
            self.inline_handler.handle_link_start(attrs)
        elif tag == 'img':
            self.inline_handler.handle_image(attrs, self.block_handler, self._document)
        elif tag == 'br':
            self.inline_handler.handle_line_break(self.block_handler)
        elif tag == 'hr':
            self.inline_handler.handle_horizontal_rule(self.block_handler, self._document)
        # Style-only elements (no special handling needed, just styling)
        elif tag in ('b', 'strong', 'i', 'em', 'u', 'span'):
            pass  # Styles are already applied by style manager
    def _delegate_end_tag(self, tag: str):
        """Delegate end tag handling to appropriate handler."""
        # Block elements
        if tag in ('p', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'pre', 'code'):
            self.block_handler.handle_block_end()
        # List elements
        elif tag in ('ul', 'ol', 'dl'):
            self.list_handler.handle_list_end(self.block_handler)
        elif tag in ('li', 'dt', 'dd'):
            self.list_handler.handle_list_item_end(self.block_handler)
        # Table elements
        elif tag == 'table':
            self.table_handler.handle_table_end(self.block_handler)
        elif tag in ('thead', 'tbody', 'tfoot'):
            self.table_handler.handle_table_section_end()
        elif tag == 'tr':
            self.table_handler.handle_table_row_end()
        elif tag in ('td', 'th'):
            self.table_handler.handle_table_cell_end(self.block_handler)
        # Inline elements
        elif tag == 'a':
            self.inline_handler.handle_link_end()
        # Style-only elements (no special handling needed)
        elif tag in ('b', 'strong', 'i', 'em', 'u', 'span'):
            pass  # Styles are handled by style manager
--- a/pyWebLayout/io/readers/html_elements.py
+++ b/pyWebLayout/io/readers/html_elements.py
@ -0,0 +1,472 @@
 """
 HTML element handlers for pyWebLayout.
 This module provides specialized handlers for different types of HTML elements,
 using composition and delegation to handle specific element types.
 """
 from typing import Dict, List, Optional, Any
 import urllib.parse
 from pyWebLayout.abstract.document import Document
 from pyWebLayout.abstract.block import (
    Block, Parapgraph, Heading, HeadingLevel, Quote, CodeBlock,
    HList, ListStyle, ListItem, Table, TableRow, TableCell, 
    HorizontalRule, LineBreak, Image
 )
 from pyWebLayout.abstract.functional import Link, LinkType
 from pyWebLayout.io.readers.html_style import HTMLStyleManager
 from pyWebLayout.io.readers.html_text import HTMLTextProcessor
 class BlockElementHandler:
    """Handles block-level HTML elements like paragraphs, headings, divs."""
    def __init__(self, style_manager: HTMLStyleManager, text_processor: HTMLTextProcessor):
        self.style_manager = style_manager
        self.text_processor = text_processor
        self.block_stack: List[Block] = []
        self.current_block: Optional[Block] = None
        self.current_paragraph: Optional[Parapgraph] = None
    def reset(self):
        """Reset the handler state."""
        self.block_stack = []
        self.current_block = None
        self.current_paragraph = None
    def add_block_to_document_or_parent(self, block: Block, document: Document):
        """Add a block to the document or current parent block."""
        if self.current_block and hasattr(self.current_block, 'add_block'):
            self.current_block.add_block(block)
        else:
            document.add_block(block)
    def handle_paragraph_start(self, document: Document):
        """Handle the start of a paragraph element."""
        self.text_processor.flush_text()
        paragraph = Parapgraph()
        self.add_block_to_document_or_parent(paragraph, document)
        self.block_stack.append(paragraph)
        self.current_block = paragraph
        self.current_paragraph = paragraph
        self.text_processor.set_current_paragraph(paragraph)
    def handle_heading_start(self, tag: str, document: Document):
        """Handle the start of a heading element."""
        self.text_processor.flush_text()
        level_map = {
            'h1': HeadingLevel.H1, 'h2': HeadingLevel.H2, 'h3': HeadingLevel.H3,
            'h4': HeadingLevel.H4, 'h5': HeadingLevel.H5, 'h6': HeadingLevel.H6
        }
        heading = Heading(level=level_map[tag])
        self.add_block_to_document_or_parent(heading, document)
        self.block_stack.append(heading)
        self.current_block = heading
        self.current_paragraph = heading  # Heading inherits from Paragraph
        self.text_processor.set_current_paragraph(heading)
    def handle_div_start(self, document: Document):
        """Handle the start of a div element."""
        self.text_processor.flush_text()
        div_para = Parapgraph()
        self.add_block_to_document_or_parent(div_para, document)
        self.block_stack.append(div_para)
        self.current_block = div_para
        self.current_paragraph = div_para
        self.text_processor.set_current_paragraph(div_para)
    def handle_blockquote_start(self, document: Document):
        """Handle the start of a blockquote element."""
        self.text_processor.flush_text()
        quote = Quote()
        self.add_block_to_document_or_parent(quote, document)
        self.block_stack.append(quote)
        self.current_block = quote
        self.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
    def handle_pre_start(self, document: Document):
        """Handle the start of a pre element."""
        self.text_processor.flush_text()
        pre_para = Parapgraph()
        self.add_block_to_document_or_parent(pre_para, document)
        self.block_stack.append(pre_para)
        self.current_block = pre_para
        self.current_paragraph = pre_para
        self.text_processor.set_current_paragraph(pre_para)
    def handle_code_start(self, attrs: Dict[str, str], document: Document):
        """Handle the start of a code element."""
        # If we're inside a pre, replace the paragraph with a code block
        if self.block_stack and isinstance(self.block_stack[-1], Parapgraph):
            pre_para = self.block_stack.pop()
            # Get the language from class if specified
            language = ""
            if 'class' in attrs:
                class_attr = attrs['class']
                if class_attr.startswith('language-'):
                    language = class_attr[9:]
            code_block = CodeBlock(language=language)
            # Replace the paragraph with the code block in its parent
            if pre_para.parent:
                parent = pre_para.parent
                if hasattr(parent, '_blocks'):
                    for i, block in enumerate(parent._blocks):
                        if block == pre_para:
                            parent._blocks[i] = code_block
                            code_block.parent = parent
                            break
            else:
                # Replace in document blocks
                for i, block in enumerate(document.blocks):
                    if block == pre_para:
                        document.blocks[i] = code_block
                        break
            self.block_stack.append(code_block)
            self.current_block = code_block
            self.current_paragraph = None
            self.text_processor.set_current_paragraph(None)
    def handle_block_end(self):
        """Handle the end of a block element."""
        if self.block_stack:
            self.block_stack.pop()
        if self.block_stack:
            self.current_block = self.block_stack[-1]
            # Update current paragraph based on block type
            if isinstance(self.current_block, Parapgraph):
                self.current_paragraph = self.current_block
            else:
                self.current_paragraph = None
        else:
            self.current_block = None
            self.current_paragraph = None
        self.text_processor.set_current_paragraph(self.current_paragraph)
 class ListElementHandler:
    """Handles list-related HTML elements (ul, ol, dl, li, dt, dd)."""
    def __init__(self, text_processor: HTMLTextProcessor):
        self.text_processor = text_processor
        self.list_stack: List[HList] = []
    def reset(self):
        """Reset the handler state."""
        self.list_stack = []
    def handle_list_start(self, tag: str, block_handler: BlockElementHandler, document: Document):
        """Handle the start of a list element."""
        self.text_processor.flush_text()
        style_map = {
            'ul': ListStyle.UNORDERED,
            'ol': ListStyle.ORDERED,
            'dl': ListStyle.DEFINITION
        }
        list_block = HList(style=style_map[tag])
        block_handler.add_block_to_document_or_parent(list_block, document)
        block_handler.block_stack.append(list_block)
        self.list_stack.append(list_block)
        block_handler.current_block = list_block
        block_handler.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
    def handle_list_item_start(self, block_handler: BlockElementHandler):
        """Handle the start of a list item."""
        if not self.list_stack:
            return
        self.text_processor.flush_text()
        list_item = ListItem()
        current_list = self.list_stack[-1]
        current_list.add_item(list_item)
        block_handler.block_stack.append(list_item)
        block_handler.current_block = list_item
        # Create a paragraph for the list item content
        item_para = Parapgraph()
        list_item.add_block(item_para)
        block_handler.current_paragraph = item_para
        self.text_processor.set_current_paragraph(item_para)
    def handle_definition_start(self, tag: str, block_handler: BlockElementHandler):
        """Handle the start of definition terms or descriptions."""
        if not self.list_stack or self.list_stack[-1].style != ListStyle.DEFINITION:
            return
        self.text_processor.flush_text()
        current_list = self.list_stack[-1]
        if tag == 'dt':
            list_item = ListItem(term="")
            current_list.add_item(list_item)
            block_handler.block_stack.append(list_item)
            block_handler.current_block = list_item
            term_para = Parapgraph()
            list_item.add_block(term_para)
            block_handler.current_paragraph = term_para
            self.text_processor.set_current_paragraph(term_para)
        elif tag == 'dd':
            if current_list._items:
                list_item = current_list._items[-1]
                desc_para = Parapgraph()
                list_item.add_block(desc_para)
                block_handler.current_paragraph = desc_para
                self.text_processor.set_current_paragraph(desc_para)
    def handle_list_end(self, block_handler: BlockElementHandler):
        """Handle the end of a list."""
        if block_handler.block_stack:
            block_handler.block_stack.pop()
        if self.list_stack:
            self.list_stack.pop()
        if block_handler.block_stack:
            block_handler.current_block = block_handler.block_stack[-1]
        else:
            block_handler.current_block = None
        block_handler.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
    def handle_list_item_end(self, block_handler: BlockElementHandler):
        """Handle the end of a list item."""
        if block_handler.block_stack:
            block_handler.block_stack.pop()
        if block_handler.block_stack:
            block_handler.current_block = block_handler.block_stack[-1]
        else:
            block_handler.current_block = None
        block_handler.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
 class TableElementHandler:
    """Handles table-related HTML elements (table, tr, td, th, thead, tbody, tfoot)."""
    def __init__(self, text_processor: HTMLTextProcessor):
        self.text_processor = text_processor
        self.table_stack: List[Table] = []
        self.current_table_row: Optional[TableRow] = None
        self.current_table_section = "body"
    def reset(self):
        """Reset the handler state."""
        self.table_stack = []
        self.current_table_row = None
        self.current_table_section = "body"
    def handle_table_start(self, attrs: Dict[str, str], block_handler: BlockElementHandler, document: Document):
        """Handle the start of a table element."""
        self.text_processor.flush_text()
        caption = attrs.get('summary')
        table = Table(caption=caption)
        block_handler.add_block_to_document_or_parent(table, document)
        block_handler.block_stack.append(table)
        self.table_stack.append(table)
        block_handler.current_block = table
        block_handler.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
    def handle_table_section_start(self, tag: str):
        """Handle the start of a table section."""
        self.current_table_section = tag
    def handle_table_row_start(self):
        """Handle the start of a table row."""
        if not self.table_stack:
            return
        self.text_processor.flush_text()
        row = TableRow()
        current_table = self.table_stack[-1]
        section = self.current_table_section
        if section == 'thead':
            section = "header"
        elif section == 'tfoot':
            section = "footer"
        else:
            section = "body"
        current_table.add_row(row, section=section)
        self.current_table_row = row
    def handle_table_cell_start(self, tag: str, attrs: Dict[str, str], block_handler: BlockElementHandler):
        """Handle the start of a table cell."""
        if not self.current_table_row:
            return
        self.text_processor.flush_text()
        # Parse attributes
        try:
            colspan = int(attrs.get('colspan', 1))
            rowspan = int(attrs.get('rowspan', 1))
        except ValueError:
            colspan, rowspan = 1, 1
        is_header = (tag == 'th')
        cell = TableCell(is_header=is_header, colspan=colspan, rowspan=rowspan)
        self.current_table_row.add_cell(cell)
        block_handler.block_stack.append(cell)
        block_handler.current_block = cell
        # Create a paragraph for the cell content
        cell_para = Parapgraph()
        cell.add_block(cell_para)
        block_handler.current_paragraph = cell_para
        self.text_processor.set_current_paragraph(cell_para)
    def handle_table_end(self, block_handler: BlockElementHandler):
        """Handle the end of a table."""
        if block_handler.block_stack:
            block_handler.block_stack.pop()
        if self.table_stack:
            self.table_stack.pop()
        if block_handler.block_stack:
            block_handler.current_block = block_handler.block_stack[-1]
        else:
            block_handler.current_block = None
        block_handler.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
        self.current_table_row = None
        self.current_table_section = "body"
    def handle_table_section_end(self):
        """Handle the end of a table section."""
        self.current_table_section = "body"
    def handle_table_row_end(self):
        """Handle the end of a table row."""
        self.current_table_row = None
    def handle_table_cell_end(self, block_handler: BlockElementHandler):
        """Handle the end of a table cell."""
        if block_handler.block_stack:
            block_handler.block_stack.pop()
        if block_handler.block_stack:
            block_handler.current_block = block_handler.block_stack[-1]
        else:
            block_handler.current_block = None
        block_handler.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
 class InlineElementHandler:
    """Handles inline and special HTML elements (a, img, br, hr)."""
    def __init__(self, text_processor: HTMLTextProcessor, base_url: Optional[str] = None):
        self.text_processor = text_processor
        self.base_url = base_url
        self.in_link = False
        self.current_link: Optional[Link] = None
    def reset(self):
        """Reset the handler state."""
        self.in_link = False
        self.current_link = None
    def set_base_url(self, base_url: Optional[str]):
        """Set the base URL for resolving relative links."""
        self.base_url = base_url
    def handle_link_start(self, attrs: Dict[str, str]):
        """Handle the start of a link element."""
        self.text_processor.flush_text()
        href = attrs.get('href', '')
        title = attrs.get('title', '')
        # Determine link type
        link_type = LinkType.INTERNAL
        if href.startswith('http://') or href.startswith('https://'):
            link_type = LinkType.EXTERNAL
        elif href.startswith('javascript:'):
            link_type = LinkType.FUNCTION
        elif href.startswith('api:'):
            link_type = LinkType.API
            href = href[4:]
        # Resolve relative URLs
        if self.base_url and not href.startswith(('http://', 'https://', 'javascript:', 'api:', '#')):
            href = urllib.parse.urljoin(self.base_url, href)
        self.current_link = Link(
            location=href,
            link_type=link_type,
            title=title if title else None
        )
        self.in_link = True
    def handle_link_end(self):
        """Handle the end of a link element."""
        self.in_link = False
        self.current_link = None
    def handle_image(self, attrs: Dict[str, str], block_handler: BlockElementHandler, document: Document):
        """Handle an image element."""
        src = attrs.get('src', '')
        alt = attrs.get('alt', '')
        # Parse dimensions
        width = height = None
        try:
            if 'width' in attrs:
                width = int(attrs['width'])
            if 'height' in attrs:
                height = int(attrs['height'])
        except ValueError:
            pass
        # Resolve relative URLs
        if self.base_url and not src.startswith(('http://', 'https://')):
            src = urllib.parse.urljoin(self.base_url, src)
        image = Image(source=src, alt_text=alt, width=width, height=height)
        block_handler.add_block_to_document_or_parent(image, document)
    def handle_line_break(self, block_handler: BlockElementHandler):
        """Handle a line break element."""
        if block_handler.current_paragraph:
            line_break = LineBreak()
            if hasattr(block_handler.current_paragraph, 'add_block'):
                block_handler.current_paragraph.add_block(line_break)
        self.text_processor.flush_text()
    def handle_horizontal_rule(self, block_handler: BlockElementHandler, document: Document):
        """Handle a horizontal rule element."""
        self.text_processor.flush_text()
        hr = HorizontalRule()
        block_handler.add_block_to_document_or_parent(hr, document)
--- a/pyWebLayout/io/readers/html_metadata.py
+++ b/pyWebLayout/io/readers/html_metadata.py
@ -0,0 +1,426 @@
 """
 HTML metadata reader for pyWebLayout.
 This module provides specialized functionality for extracting metadata
 from HTML documents, following the decomposed architecture pattern.
 """
 from typing import Dict, Any, Optional
 import re
 from pyWebLayout.abstract.document import Document, MetadataType
 from pyWebLayout.io.readers.base import MetadataReader
 class HTMLMetadataReader(MetadataReader):
    """
    Specialized reader for extracting metadata from HTML documents.
    This class handles HTML meta tags, title elements, and other metadata
    sources like Open Graph tags and JSON-LD structured data.
    """
    def __init__(self):
        """Initialize the HTML metadata reader."""
        self._title = None
        self._meta_tags = {}
        self._og_tags = {}
        self._twitter_tags = {}
        self._json_ld = {}
    def extract_metadata(self, html_content: str, document: Document) -> Dict[str, Any]:
        """
        Extract metadata from HTML content.
        Args:
            html_content: The HTML content to parse
            document: The document to populate with metadata
        Returns:
            Dictionary of extracted metadata
        """
        # Reset internal state
        self._reset()
        # Extract title
        self._extract_title(html_content)
        # Extract meta tags
        self._extract_meta_tags(html_content)
        # Extract Open Graph tags
        self._extract_open_graph(html_content)
        # Extract Twitter Card tags
        self._extract_twitter_cards(html_content)
        # Extract JSON-LD structured data
        self._extract_json_ld(html_content)
        # Populate document with extracted metadata
        self._populate_document(document)
        # Return all extracted metadata
        return {
            'title': self._title,
            'meta_tags': self._meta_tags,
            'open_graph': self._og_tags,
            'twitter_cards': self._twitter_tags,
            'json_ld': self._json_ld
        }
    def _reset(self):
        """Reset internal state for a new extraction."""
        self._title = None
        self._meta_tags = {}
        self._og_tags = {}
        self._twitter_tags = {}
        self._json_ld = {}
    def _extract_title(self, html_content: str):
        """
        Extract the title from HTML content.
        Args:
            html_content: The HTML content to parse
        """
        # Look for title tag
        title_match = re.search(r'<title[^>]*>(.*?)</title>', html_content, re.IGNORECASE | re.DOTALL)
        if title_match:
            # Clean up the title text
            self._title = self._clean_text(title_match.group(1))
    def _extract_meta_tags(self, html_content: str):
        """
        Extract meta tags from HTML content.
        Args:
            html_content: The HTML content to parse
        """
        # Regular expression to match meta tags
        meta_pattern = r'<meta\s+([^>]+)>'
        for match in re.finditer(meta_pattern, html_content, re.IGNORECASE):
            attrs = self._parse_attributes(match.group(1))
            # Get name and content
            name = attrs.get('name', '').lower()
            content = attrs.get('content', '')
            # Handle different types of meta tags
            if name and content:
                self._meta_tags[name] = content
            # Handle http-equiv meta tags
            http_equiv = attrs.get('http-equiv', '').lower()
            if http_equiv and content:
                self._meta_tags[f'http-equiv:{http_equiv}'] = content
            # Handle charset meta tags
            charset = attrs.get('charset', '')
            if charset:
                self._meta_tags['charset'] = charset
    def _extract_open_graph(self, html_content: str):
        """
        Extract Open Graph meta tags from HTML content.
        Args:
            html_content: The HTML content to parse
        """
        # Regular expression to match Open Graph meta tags
        og_pattern = r'<meta\s+property="og:([^"]+)"\s+content="([^"]*)"[^>]*>'
        for match in re.finditer(og_pattern, html_content, re.IGNORECASE):
            property_name = match.group(1)
            content = match.group(2)
            self._og_tags[property_name] = content
    def _extract_twitter_cards(self, html_content: str):
        """
        Extract Twitter Card meta tags from HTML content.
        Args:
            html_content: The HTML content to parse
        """
        # Regular expression to match Twitter Card meta tags
        twitter_pattern = r'<meta\s+name="twitter:([^"]+)"\s+content="([^"]*)"[^>]*>'
        for match in re.finditer(twitter_pattern, html_content, re.IGNORECASE):
            property_name = match.group(1)
            content = match.group(2)
            self._twitter_tags[property_name] = content
    def _extract_json_ld(self, html_content: str):
        """
        Extract JSON-LD structured data from HTML content.
        Args:
            html_content: The HTML content to parse
        """
        # Regular expression to match JSON-LD script tags
        json_ld_pattern = r'<script[^>]*type="application/ld\+json"[^>]*>(.*?)</script>'
        for match in re.finditer(json_ld_pattern, html_content, re.IGNORECASE | re.DOTALL):
            try:
                import json
                json_content = match.group(1).strip()
                data = json.loads(json_content)
                # Store JSON-LD data by type if available
                if isinstance(data, dict) and '@type' in data:
                    type_name = data['@type']
                    if type_name not in self._json_ld:
                        self._json_ld[type_name] = []
                    self._json_ld[type_name].append(data)
                elif isinstance(data, list):
                    # Handle arrays of structured data
                    for item in data:
                        if isinstance(item, dict) and '@type' in item:
                            type_name = item['@type']
                            if type_name not in self._json_ld:
                                self._json_ld[type_name] = []
                            self._json_ld[type_name].append(item)
            except (json.JSONDecodeError, ImportError):
                # Skip invalid JSON-LD
                continue
    def _populate_document(self, document: Document):
        """
        Populate the document with extracted metadata.
        Args:
            document: The document to populate
        """
        # Set title
        title = self._get_best_title()
        if title:
            document.set_metadata(MetadataType.TITLE, title)
        # Set description
        description = self._get_best_description()
        if description:
            document.set_metadata(MetadataType.DESCRIPTION, description)
        # Set author
        author = self._get_best_author()
        if author:
            document.set_metadata(MetadataType.AUTHOR, author)
        # Set keywords
        keywords = self._get_keywords()
        if keywords:
            document.set_metadata(MetadataType.KEYWORDS, keywords)
        # Set language
        language = self._get_language()
        if language:
            document.set_metadata(MetadataType.LANGUAGE, language)
        # Set cover image
        cover_image = self._get_cover_image()
        if cover_image:
            document.set_metadata(MetadataType.COVER_IMAGE, cover_image)
        # Set publisher
        publisher = self._get_publisher()
        if publisher:
            document.set_metadata(MetadataType.PUBLISHER, publisher)
        # Set publication date
        pub_date = self._get_publication_date()
        if pub_date:
            document.set_metadata(MetadataType.PUBLICATION_DATE, pub_date)
    def _get_best_title(self) -> Optional[str]:
        """Get the best available title from all sources."""
        # Priority order: Open Graph > Twitter > JSON-LD > meta > HTML title
        # Check Open Graph
        if 'title' in self._og_tags:
            return self._og_tags['title']
        # Check Twitter Cards
        if 'title' in self._twitter_tags:
            return self._twitter_tags['title']
        # Check JSON-LD
        for type_name, items in self._json_ld.items():
            for item in items:
                if 'name' in item:
                    return item['name']
                elif 'headline' in item:
                    return item['headline']
        # Check meta tags
        for key in ['title', 'og:title', 'twitter:title']:
            if key in self._meta_tags:
                return self._meta_tags[key]
        # Fall back to HTML title
        return self._title
    def _get_best_description(self) -> Optional[str]:
        """Get the best available description from all sources."""
        # Priority order: Open Graph > Twitter > meta description > JSON-LD
        # Check Open Graph
        if 'description' in self._og_tags:
            return self._og_tags['description']
        # Check Twitter Cards
        if 'description' in self._twitter_tags:
            return self._twitter_tags['description']
        # Check meta description
        if 'description' in self._meta_tags:
            return self._meta_tags['description']
        # Check JSON-LD
        for type_name, items in self._json_ld.items():
            for item in items:
                if 'description' in item:
                    return item['description']
        return None
    def _get_best_author(self) -> Optional[str]:
        """Get the best available author from all sources."""
        # Check meta tags
        if 'author' in self._meta_tags:
            return self._meta_tags['author']
        # Check JSON-LD
        for type_name, items in self._json_ld.items():
            for item in items:
                if 'author' in item:
                    author = item['author']
                    if isinstance(author, dict) and 'name' in author:
                        return author['name']
                    elif isinstance(author, str):
                        return author
                elif 'creator' in item:
                    creator = item['creator']
                    if isinstance(creator, dict) and 'name' in creator:
                        return creator['name']
                    elif isinstance(creator, str):
                        return creator
        return None
    def _get_keywords(self) -> Optional[str]:
        """Get keywords from meta tags."""
        return self._meta_tags.get('keywords')
    def _get_language(self) -> Optional[str]:
        """Get language from meta tags or HTML lang attribute."""
        # Check meta tags first
        if 'language' in self._meta_tags:
            return self._meta_tags['language']
        # Could also extract from html lang attribute if needed
        return None
    def _get_cover_image(self) -> Optional[str]:
        """Get the best available cover image from all sources."""
        # Check Open Graph
        if 'image' in self._og_tags:
            return self._og_tags['image']
        # Check Twitter Cards
        if 'image' in self._twitter_tags:
            return self._twitter_tags['image']
        # Check JSON-LD
        for type_name, items in self._json_ld.items():
            for item in items:
                if 'image' in item:
                    image = item['image']
                    if isinstance(image, dict) and 'url' in image:
                        return image['url']
                    elif isinstance(image, str):
                        return image
        return None
    def _get_publisher(self) -> Optional[str]:
        """Get publisher from JSON-LD or other sources."""
        # Check JSON-LD
        for type_name, items in self._json_ld.items():
            for item in items:
                if 'publisher' in item:
                    publisher = item['publisher']
                    if isinstance(publisher, dict) and 'name' in publisher:
                        return publisher['name']
                    elif isinstance(publisher, str):
                        return publisher
        return None
    def _get_publication_date(self) -> Optional[str]:
        """Get publication date from JSON-LD or other sources."""
        # Check JSON-LD
        for type_name, items in self._json_ld.items():
            for item in items:
                if 'datePublished' in item:
                    return item['datePublished']
                elif 'publishDate' in item:
                    return item['publishDate']
        return None
    def _parse_attributes(self, attr_string: str) -> Dict[str, str]:
        """
        Parse HTML attributes from a string.
        Args:
            attr_string: String containing HTML attributes
        Returns:
            Dictionary of attribute name-value pairs
        """
        attrs = {}
        # Regular expression to match attribute="value" or attribute='value'
        attr_pattern = r'(\w+)=(?:"([^"]*)"|\'([^\']*)|([^\s>]+))'
        for match in re.finditer(attr_pattern, attr_string):
            name = match.group(1).lower()
            value = match.group(2) or match.group(3) or match.group(4) or ''
            attrs[name] = value
        # Handle standalone attributes (like charset)
        standalone_pattern = r'\b(\w+)(?!=)'
        for match in re.finditer(standalone_pattern, attr_string):
            attr_name = match.group(1).lower()
            if attr_name not in attrs:
                attrs[attr_name] = ''
        return attrs
    def _clean_text(self, text: str) -> str:
        """
        Clean up text content by removing extra whitespace and HTML entities.
        Args:
            text: The text to clean
        Returns:
            Cleaned text
        """
        # Remove extra whitespace
        cleaned = re.sub(r'\s+', ' ', text).strip()
        # Decode common HTML entities
        entities = {
            '&lt;': '<',
            '&gt;': '>',
            '&amp;': '&',
            '&quot;': '"',
            '&apos;': "'",
            '&nbsp;': ' ',
        }
        for entity, char in entities.items():
            cleaned = cleaned.replace(entity, char)
        return cleaned
--- a/pyWebLayout/io/readers/html_resources.py
+++ b/pyWebLayout/io/readers/html_resources.py
@ -0,0 +1,483 @@
 """
 HTML resources reader for pyWebLayout.
 This module provides specialized functionality for extracting resources
 from HTML documents, such as stylesheets, scripts, and external files.
 """
 from typing import Dict, Any, Optional, List
 import re
 import urllib.parse
 from pyWebLayout.abstract.document import Document
 from pyWebLayout.io.readers.base import ResourceReader
 class HTMLResourceReader(ResourceReader):
    """
    Specialized reader for extracting resources from HTML documents.
    This class handles CSS stylesheets, JavaScript files, images,
    and other external resources referenced in HTML.
    """
    def __init__(self):
        """Initialize the HTML resource reader."""
        self._stylesheets = []
        self._scripts = []
        self._external_resources = {}
        self._inline_styles = {}
        self._inline_scripts = []
    def extract_resources(self, html_content: str, document: Document) -> Dict[str, Any]:
        """
        Extract resources from HTML content.
        Args:
            html_content: The HTML content to parse
            document: The document to populate with resources
        Returns:
            Dictionary of extracted resources
        """
        # Reset internal state
        self._reset()
        # Extract stylesheets
        self._extract_stylesheets(html_content)
        # Extract scripts
        self._extract_scripts(html_content)
        # Extract other external resources
        self._extract_external_resources(html_content)
        # Extract inline styles
        self._extract_inline_styles(html_content)
        # Extract inline scripts
        self._extract_inline_scripts(html_content)
        # Populate document with extracted resources
        self._populate_document(document)
        # Return all extracted resources
        return {
            'stylesheets': self._stylesheets,
            'scripts': self._scripts,
            'external_resources': self._external_resources,
            'inline_styles': self._inline_styles,
            'inline_scripts': self._inline_scripts
        }
    def _reset(self):
        """Reset internal state for a new extraction."""
        self._stylesheets = []
        self._scripts = []
        self._external_resources = {}
        self._inline_styles = {}
        self._inline_scripts = []
    def _extract_stylesheets(self, html_content: str):
        """
        Extract CSS stylesheet references from HTML content.
        Args:
            html_content: The HTML content to parse
        """
        # Regular expression to match link tags for stylesheets
        link_pattern = r'<link\s+([^>]+)>'
        for match in re.finditer(link_pattern, html_content, re.IGNORECASE):
            attrs = self._parse_attributes(match.group(1))
            # Check if this is a stylesheet
            rel = attrs.get('rel', '').lower()
            if rel == 'stylesheet':
                href = attrs.get('href', '')
                media = attrs.get('media', 'all')
                type_attr = attrs.get('type', 'text/css')
                if href:
                    stylesheet = {
                        'type': 'external',
                        'href': href,
                        'media': media,
                        'content_type': type_attr
                    }
                    self._stylesheets.append(stylesheet)
            # Handle other link types
            elif rel in ('icon', 'shortcut icon', 'apple-touch-icon'):
                href = attrs.get('href', '')
                if href:
                    self._external_resources[f'icon_{len(self._external_resources)}'] = {
                        'type': 'icon',
                        'rel': rel,
                        'href': href,
                        'sizes': attrs.get('sizes', ''),
                        'content_type': attrs.get('type', '')
                    }
            elif rel == 'preload':
                href = attrs.get('href', '')
                if href:
                    self._external_resources[f'preload_{len(self._external_resources)}'] = {
                        'type': 'preload',
                        'href': href,
                        'as': attrs.get('as', ''),
                        'content_type': attrs.get('type', '')
                    }
    def _extract_scripts(self, html_content: str):
        """
        Extract script references from HTML content.
        Args:
            html_content: The HTML content to parse
        """
        # Regular expression to match script tags
        script_pattern = r'<script\s*([^>]*)>(.*?)</script>'
        for match in re.finditer(script_pattern, html_content, re.IGNORECASE | re.DOTALL):
            attrs_str = match.group(1)
            content = match.group(2).strip()
            attrs = self._parse_attributes(attrs_str)
            src = attrs.get('src', '')
            script_type = attrs.get('type', 'text/javascript')
            if src:
                # External script
                script = {
                    'type': 'external',
                    'src': src,
                    'content_type': script_type,
                    'async': 'async' in attrs,
                    'defer': 'defer' in attrs,
                    'integrity': attrs.get('integrity', ''),
                    'crossorigin': attrs.get('crossorigin', '')
                }
                self._scripts.append(script)
            elif content:
                # Inline script
                script = {
                    'type': 'inline',
                    'content': content,
                    'content_type': script_type
                }
                self._scripts.append(script)
    def _extract_external_resources(self, html_content: str):
        """
        Extract other external resources from HTML content.
        Args:
            html_content: The HTML content to parse
        """
        # Extract images
        img_pattern = r'<img\s+([^>]+)>'
        for match in re.finditer(img_pattern, html_content, re.IGNORECASE):
            attrs = self._parse_attributes(match.group(1))
            src = attrs.get('src', '')
            if src:
                self._external_resources[f'image_{len(self._external_resources)}'] = {
                    'type': 'image',
                    'src': src,
                    'alt': attrs.get('alt', ''),
                    'width': attrs.get('width', ''),
                    'height': attrs.get('height', ''),
                    'loading': attrs.get('loading', ''),
                    'srcset': attrs.get('srcset', '')
                }
        # Extract audio
        audio_pattern = r'<audio\s+([^>]+)>'
        for match in re.finditer(audio_pattern, html_content, re.IGNORECASE):
            attrs = self._parse_attributes(match.group(1))
            src = attrs.get('src', '')
            if src:
                self._external_resources[f'audio_{len(self._external_resources)}'] = {
                    'type': 'audio',
                    'src': src,
                    'controls': 'controls' in attrs,
                    'autoplay': 'autoplay' in attrs,
                    'loop': 'loop' in attrs,
                    'muted': 'muted' in attrs
                }
        # Extract video
        video_pattern = r'<video\s+([^>]+)>'
        for match in re.finditer(video_pattern, html_content, re.IGNORECASE):
            attrs = self._parse_attributes(match.group(1))
            src = attrs.get('src', '')
            if src:
                self._external_resources[f'video_{len(self._external_resources)}'] = {
                    'type': 'video',
                    'src': src,
                    'controls': 'controls' in attrs,
                    'autoplay': 'autoplay' in attrs,
                    'loop': 'loop' in attrs,
                    'muted': 'muted' in attrs,
                    'width': attrs.get('width', ''),
                    'height': attrs.get('height', ''),
                    'poster': attrs.get('poster', '')
                }
        # Extract embed/object resources
        embed_pattern = r'<embed\s+([^>]+)>'
        for match in re.finditer(embed_pattern, html_content, re.IGNORECASE):
            attrs = self._parse_attributes(match.group(1))
            src = attrs.get('src', '')
            if src:
                self._external_resources[f'embed_{len(self._external_resources)}'] = {
                    'type': 'embed',
                    'src': src,
                    'content_type': attrs.get('type', ''),
                    'width': attrs.get('width', ''),
                    'height': attrs.get('height', '')
                }
        # Extract iframe sources
        iframe_pattern = r'<iframe\s+([^>]+)>'
        for match in re.finditer(iframe_pattern, html_content, re.IGNORECASE):
            attrs = self._parse_attributes(match.group(1))
            src = attrs.get('src', '')
            if src:
                self._external_resources[f'iframe_{len(self._external_resources)}'] = {
                    'type': 'iframe',
                    'src': src,
                    'width': attrs.get('width', ''),
                    'height': attrs.get('height', ''),
                    'loading': attrs.get('loading', ''),
                    'sandbox': attrs.get('sandbox', '')
                }
    def _extract_inline_styles(self, html_content: str):
        """
        Extract inline CSS styles from HTML content.
        Args:
            html_content: The HTML content to parse
        """
        # Extract style blocks
        style_pattern = r'<style\s*([^>]*)>(.*?)</style>'
        for i, match in enumerate(re.finditer(style_pattern, html_content, re.IGNORECASE | re.DOTALL)):
            attrs_str = match.group(1)
            content = match.group(2).strip()
            attrs = self._parse_attributes(attrs_str)
            if content:
                style_block = {
                    'content': content,
                    'media': attrs.get('media', 'all'),
                    'content_type': attrs.get('type', 'text/css')
                }
                self._inline_styles[f'style_block_{i}'] = style_block
        # Extract inline style attributes (this would be more complex
        # as it requires parsing all elements with style attributes)
        style_attr_pattern = r'<[^>]+style\s*=\s*["\']([^"\']+)["\'][^>]*>'
        for i, match in enumerate(re.finditer(style_attr_pattern, html_content, re.IGNORECASE)):
            style_content = match.group(1)
            if style_content:
                style_attr = {
                    'content': style_content,
                    'type': 'attribute'
                }
                self._inline_styles[f'style_attr_{i}'] = style_attr
    def _extract_inline_scripts(self, html_content: str):
        """
        Extract inline JavaScript from HTML content.
        Args:
            html_content: The HTML content to parse
        """
        # This is already handled in _extract_scripts, but we keep this
        # method for consistency and potential future extensions
        pass
    def _populate_document(self, document: Document):
        """
        Populate the document with extracted resources.
        Args:
            document: The document to populate
        """
        # Add stylesheets
        for stylesheet in self._stylesheets:
            document.add_stylesheet(stylesheet)
        # Add scripts
        for script in self._scripts:
            if script['type'] == 'inline':
                document.add_script(script['content'])
            else:
                # For external scripts, we store them as resources
                script_name = f"script_{len(document._resources)}"
                document.add_resource(script_name, script)
        # Add external resources
        for name, resource in self._external_resources.items():
            document.add_resource(name, resource)
        # Add inline styles as stylesheets
        for name, style in self._inline_styles.items():
            if style.get('type') != 'attribute':  # Don't add individual style attributes
                parsed_style = self._parse_css(style['content'])
                if parsed_style:
                    document.add_stylesheet({
                        'type': 'inline',
                        'content': style['content'],
                        'parsed': parsed_style,
                        'media': style.get('media', 'all')
                    })
    def _parse_attributes(self, attr_string: str) -> Dict[str, str]:
        """
        Parse HTML attributes from a string.
        Args:
            attr_string: String containing HTML attributes
        Returns:
            Dictionary of attribute name-value pairs
        """
        attrs = {}
        # Regular expression to match attribute="value" or attribute='value'
        attr_pattern = r'(\w+)=(?:"([^"]*)"|\'([^\']*)|([^\s>]+))'
        for match in re.finditer(attr_pattern, attr_string):
            name = match.group(1).lower()
            value = match.group(2) or match.group(3) or match.group(4) or ''
            attrs[name] = value
        # Handle standalone attributes (like async, defer)
        standalone_pattern = r'\b(\w+)(?!=)'
        for match in re.finditer(standalone_pattern, attr_string):
            attr_name = match.group(1).lower()
            if attr_name not in attrs:
                attrs[attr_name] = ''
        return attrs
    def _parse_css(self, css_str: str) -> Dict[str, Dict[str, str]]:
        """
        Parse a CSS stylesheet.
        Args:
            css_str: CSS stylesheet string
        Returns:
            Dictionary of selectors and their style properties
        """
        stylesheet = {}
        # Remove comments
        css_str = re.sub(r'/\*.*?\*/', '', css_str, flags=re.DOTALL)
        # Split into rule sets
        rule_sets = css_str.split('}')
        for rule_set in rule_sets:
            # Split into selector and declarations
            parts = rule_set.split('{', 1)
            if len(parts) != 2:
                continue
            selector = parts[0].strip()
            declarations = parts[1].strip()
            # Parse declarations
            style = self._parse_css_declarations(declarations)
            # Add to stylesheet
            if selector and style:
                stylesheet[selector] = style
        return stylesheet
    def _parse_css_declarations(self, declarations_str: str) -> Dict[str, str]:
        """
        Parse CSS declarations.
        Args:
            declarations_str: CSS declarations string
        Returns:
            Dictionary of CSS properties and values
        """
        declarations = {}
        # Split the declarations string into individual declarations
        decl_list = [d.strip() for d in declarations_str.split(';') if d.strip()]
        for declaration in decl_list:
            # Split into property and value
            parts = declaration.split(':', 1)
            if len(parts) != 2:
                continue
            prop = parts[0].strip().lower()
            value = parts[1].strip()
            # Store the declaration
            declarations[prop] = value
        return declarations
    def resolve_url(self, url: str, base_url: Optional[str] = None) -> str:
        """
        Resolve a relative URL against a base URL.
        Args:
            url: The URL to resolve
            base_url: The base URL to resolve against
        Returns:
            The resolved URL
        """
        if base_url and not url.startswith(('http://', 'https://', '//', 'data:')):
            return urllib.parse.urljoin(base_url, url)
        return url
    def get_resource_dependencies(self, resource: Dict[str, Any]) -> List[str]:
        """
        Get the dependencies of a resource (e.g., CSS imports, script dependencies).
        Args:
            resource: The resource to analyze
        Returns:
            List of dependency URLs
        """
        dependencies = []
        if resource.get('type') == 'external' and 'content' in resource:
            content = resource['content']
            # Check for CSS @import rules
            if resource.get('content_type', '').startswith('text/css'):
                import_pattern = r'@import\s+(?:url\()?["\']?([^"\'()]+)["\']?\)?'
                for match in re.finditer(import_pattern, content, re.IGNORECASE):
                    dependencies.append(match.group(1))
            # Check for JavaScript imports/requires (basic detection)
            elif resource.get('content_type', '').startswith('text/javascript'):
                # ES6 imports
                import_pattern = r'import\s+.*?\s+from\s+["\']([^"\']+)["\']'
                for match in re.finditer(import_pattern, content):
                    dependencies.append(match.group(1))
                # CommonJS requires
                require_pattern = r'require\(\s*["\']([^"\']+)["\']\s*\)'
                for match in re.finditer(require_pattern, content):
                    dependencies.append(match.group(1))
        return dependencies
--- a/pyWebLayout/io/readers/html_style.py
+++ b/pyWebLayout/io/readers/html_style.py
@ -0,0 +1,281 @@
 """
 HTML style management for pyWebLayout.
 This module provides specialized functionality for handling CSS styles,
 style stacks, and style parsing in HTML documents.
 """
 from typing import Dict, List, Any, Optional, Tuple
 import re
 from pyWebLayout.style import Font, FontStyle, FontWeight, TextDecoration
 class HTMLStyleManager:
    """
    Manages CSS styles and style stacks during HTML parsing.
    This class handles style parsing, style inheritance, and maintains
    the style stack for proper style nesting.
    """
    def __init__(self):
        """Initialize the style manager."""
        self._style_stack: List[Dict[str, Any]] = []
        self._current_style = self._get_default_style()
    def _get_default_style(self) -> Dict[str, Any]:
        """Get the default style settings."""
        return {
            'font_size': 12,
            'font_weight': FontWeight.NORMAL,
            'font_style': FontStyle.NORMAL,
            'decoration': TextDecoration.NONE,
            'color': (0, 0, 0),
            'background': None,
            'language': 'en_US'
        }
    def reset(self):
        """Reset the style manager to initial state."""
        self._style_stack = []
        self._current_style = self._get_default_style()
    def push_style(self, style: Dict[str, Any]):
        """
        Push a new style onto the style stack.
        Args:
            style: The style to push
        """
        # Save the current style
        self._style_stack.append(self._current_style.copy())
        # Apply the new style
        for key, value in style.items():
            self._current_style[key] = value
    def pop_style(self):
        """Pop a style from the style stack."""
        if self._style_stack:
            self._current_style = self._style_stack.pop()
    def get_current_style(self) -> Dict[str, Any]:
        """Get the current style."""
        return self._current_style.copy()
    def get_tag_style(self, tag: str) -> Dict[str, Any]:
        """
        Get the default style for a tag.
        Args:
            tag: The tag name
        Returns:
            A dictionary of style properties
        """
        tag_styles = {
            'h1': {'font_size': 24, 'font_weight': FontWeight.BOLD},
            'h2': {'font_size': 20, 'font_weight': FontWeight.BOLD},
            'h3': {'font_size': 18, 'font_weight': FontWeight.BOLD},
            'h4': {'font_size': 16, 'font_weight': FontWeight.BOLD},
            'h5': {'font_size': 14, 'font_weight': FontWeight.BOLD},
            'h6': {'font_size': 12, 'font_weight': FontWeight.BOLD},
            'b': {'font_weight': FontWeight.BOLD},
            'strong': {'font_weight': FontWeight.BOLD},
            'i': {'font_style': FontStyle.ITALIC},
            'em': {'font_style': FontStyle.ITALIC},
            'u': {'decoration': TextDecoration.UNDERLINE},
            'a': {'decoration': TextDecoration.UNDERLINE, 'color': (0, 0, 255)},
            'code': {'font_family': 'monospace', 'background': (240, 240, 240, 255)},
            'pre': {'font_family': 'monospace'},
        }
        return tag_styles.get(tag, {})
    def create_font(self) -> Font:
        """
        Create a Font object from the current style.
        Returns:
            Font: A font object with the current style settings
        """
        return Font(
            font_size=self._current_style['font_size'],
            colour=self._current_style['color'],
            weight=self._current_style['font_weight'],
            style=self._current_style['font_style'],
            decoration=self._current_style['decoration'],
            background=self._current_style['background'],
            langauge=self._current_style['language']
        )
    def parse_inline_style(self, style_str: str) -> Dict[str, Any]:
        """
        Parse inline CSS style string.
        Args:
            style_str: CSS style string
        Returns:
            Dictionary of style properties
        """
        if not style_str:
            return {}
        style_dict = {}
        declarations = [d.strip() for d in style_str.split(';') if d.strip()]
        for declaration in declarations:
            parts = declaration.split(':', 1)
            if len(parts) != 2:
                continue
            prop = parts[0].strip().lower()
            value = parts[1].strip()
            # Handle specific properties
            if prop == 'font-size':
                if value.endswith('px'):
                    try:
                        size = int(value[:-2])
                        style_dict['font_size'] = size
                    except ValueError:
                        pass
                elif value.endswith('pt'):
                    try:
                        size = int(value[:-2])
                        style_dict['font_size'] = size
                    except ValueError:
                        pass
            elif prop == 'font-weight':
                if value == 'bold':
                    style_dict['font_weight'] = FontWeight.BOLD
                elif value == 'normal':
                    style_dict['font_weight'] = FontWeight.NORMAL
            elif prop == 'font-style':
                if value == 'italic':
                    style_dict['font_style'] = FontStyle.ITALIC
                elif value == 'normal':
                    style_dict['font_style'] = FontStyle.NORMAL
            elif prop == 'text-decoration':
                if value == 'underline':
                    style_dict['decoration'] = TextDecoration.UNDERLINE
                elif value == 'line-through':
                    style_dict['decoration'] = TextDecoration.STRIKETHROUGH
                elif value == 'none':
                    style_dict['decoration'] = TextDecoration.NONE
            elif prop == 'color':
                color = self.parse_color(value)
                if color:
                    style_dict['color'] = color
            elif prop == 'background-color':
                color = self.parse_color(value)
                if color:
                    style_dict['background'] = color + (255,)
        return style_dict
    def parse_color(self, color_str: str) -> Optional[Tuple[int, int, int]]:
        """
        Parse a CSS color string.
        Args:
            color_str: CSS color string
        Returns:
            RGB tuple or None if parsing fails
        """
        # Named colors
        color_map = {
            'black': (0, 0, 0),
            'white': (255, 255, 255),
            'red': (255, 0, 0),
            'green': (0, 128, 0),
            'blue': (0, 0, 255),
            'yellow': (255, 255, 0),
            'cyan': (0, 255, 255),
            'magenta': (255, 0, 255),
            'gray': (128, 128, 128),
            'grey': (128, 128, 128),
            'silver': (192, 192, 192),
            'maroon': (128, 0, 0),
            'olive': (128, 128, 0),
            'navy': (0, 0, 128),
            'purple': (128, 0, 128),
            'teal': (0, 128, 128),
            'lime': (0, 255, 0),
            'aqua': (0, 255, 255),
            'fuchsia': (255, 0, 255),
        }
        # Check for named color
        color_str = color_str.lower().strip()
        if color_str in color_map:
            return color_map[color_str]
        # Check for hex color
        if color_str.startswith('#'):
            try:
                if len(color_str) == 4:  # #RGB
                    r = int(color_str[1] + color_str[1], 16)
                    g = int(color_str[2] + color_str[2], 16)
                    b = int(color_str[3] + color_str[3], 16)
                    return (r, g, b)
                elif len(color_str) == 7:  # #RRGGBB
                    r = int(color_str[1:3], 16)
                    g = int(color_str[3:5], 16)
                    b = int(color_str[5:7], 16)
                    return (r, g, b)
            except ValueError:
                pass
        # Check for rgb() color
        rgb_match = re.match(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
        if rgb_match:
            try:
                r_val = int(rgb_match.group(1))
                g_val = int(rgb_match.group(2))
                b_val = int(rgb_match.group(3))
                # Check if values are in valid range (0-255)
                if r_val > 255 or g_val > 255 or b_val > 255 or r_val < 0 or g_val < 0 or b_val < 0:
                    return None  # Invalid color values
                return (r_val, g_val, b_val)
            except ValueError:
                pass
        # Check for rgba() color (ignore alpha)
        rgba_match = re.match(r'rgba\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*[\d.]+\s*\)', color_str)
        if rgba_match:
            try:
                r = min(255, max(0, int(rgba_match.group(1))))
                g = min(255, max(0, int(rgba_match.group(2))))
                b = min(255, max(0, int(rgba_match.group(3))))
                return (r, g, b)
            except ValueError:
                pass
        # Failed to parse color
        return None
    def apply_style_to_element(self, tag: str, attrs: Dict[str, str]) -> Dict[str, Any]:
        """
        Apply combined styles (tag defaults + inline styles) for an element.
        Args:
            tag: The HTML tag name
            attrs: Dictionary of tag attributes
        Returns:
            Combined style dictionary
        """
        # Start with tag-specific styles
        style = self.get_tag_style(tag)
        # Override with inline styles if present
        if 'style' in attrs:
            inline_style = self.parse_inline_style(attrs['style'])
            style.update(inline_style)
        return style
--- a/pyWebLayout/io/readers/html_text.py
+++ b/pyWebLayout/io/readers/html_text.py
@ -0,0 +1,163 @@
 """
 HTML text processing for pyWebLayout.
 This module provides specialized functionality for handling text content,
 entity references, and word creation in HTML documents.
 """
 from typing import Optional
 from pyWebLayout.abstract.inline import Word
 from pyWebLayout.abstract.block import Parapgraph
 from pyWebLayout.io.readers.html_style import HTMLStyleManager
 class HTMLTextProcessor:
    """
    Processes text content during HTML parsing.
    This class handles text buffering, entity resolution, and word creation
    with proper styling applied.
    """
    def __init__(self, style_manager: HTMLStyleManager):
        """
        Initialize the text processor.
        Args:
            style_manager: The style manager for creating styled words
        """
        self._style_manager = style_manager
        self._text_buffer = ""
        self._current_paragraph: Optional[Parapgraph] = None
    def reset(self):
        """Reset the text processor state."""
        self._text_buffer = ""
        self._current_paragraph = None
    def set_current_paragraph(self, paragraph: Optional[Parapgraph]):
        """
        Set the current paragraph for text output.
        Args:
            paragraph: The paragraph to receive text, or None
        """
        self._current_paragraph = paragraph
    def add_text(self, text: str):
        """
        Add text to the buffer.
        Args:
            text: The text to add
        """
        self._text_buffer += text
    def add_entity_reference(self, name: str):
        """
        Add an HTML entity reference to the buffer.
        Args:
            name: The entity name (e.g., 'lt', 'gt', 'amp')
        """
        # Map common entity references to characters
        entities = {
            'lt': '<',
            'gt': '>',
            'amp': '&',
            'quot': '"',
            'apos': "'",
            'nbsp': ' ',
            'copy': '©',
            'reg': '®',
            'trade': '™',
            'mdash': '—',
            'ndash': '–',
            'hellip': '…',
            'laquo': '«',
            'raquo': '»',
            'ldquo': '"',
            'rdquo': '"',
            'lsquo': ''',
            'rsquo': ''',
            'deg': '°',
            'plusmn': '±',
            'times': '×',
            'divide': '÷',
            'euro': '€',
            'pound': '£',
            'yen': '¥',
        }
        char = entities.get(name, f'&{name};')
        self._text_buffer += char
    def add_character_reference(self, name: str):
        """
        Add a character reference to the buffer.
        Args:
            name: The character reference (decimal or hex)
        """
        try:
            if name.startswith('x'):
                # Hexadecimal reference
                char = chr(int(name[1:], 16))
            else:
                # Decimal reference
                char = chr(int(name))
            self._text_buffer += char
        except (ValueError, OverflowError):
            # Invalid character reference
            self._text_buffer += f'&#{name};'
    def flush_text(self) -> bool:
        """
        Flush the text buffer, creating words as needed.
        Returns:
            True if text was flushed, False if buffer was empty
        """
        if not self._text_buffer or not self._current_paragraph:
            self._text_buffer = ""
            return False
        # Clean up the text
        text = self._text_buffer.strip()
        if not text:
            self._text_buffer = ""
            return False
        # Create words from the text
        words = text.split()
        for word_text in words:
            if word_text:
                font = self._style_manager.create_font()
                word = Word(word_text, font)
                self._current_paragraph.add_word(word)
        # Reset text buffer
        self._text_buffer = ""
        return True
    def has_pending_text(self) -> bool:
        """
        Check if there is pending text in the buffer.
        Returns:
            True if there is text waiting to be flushed
        """
        return bool(self._text_buffer.strip())
    def get_buffer_content(self) -> str:
        """
        Get the current buffer content without flushing.
        Returns:
            The current text buffer content
        """
        return self._text_buffer
    def clear_buffer(self):
        """Clear the text buffer without creating words."""
        self._text_buffer = ""
--- a/pyWebLayout/layout.py
+++ b/pyWebLayout/layout.py
@ -0,0 +1,11 @@
 from enum import Enum
 class Alignment(Enum):
    LEFT = 1
    CENTER = 2
    RIGHT = 3
    TOP = 4
    BOTTOM = 5
    JUSTIFY = 6
--- a/pyWebLayout/localisation.py
+++ b/pyWebLayout/localisation.py
@ -0,0 +1 @@
 ## list langauges
--- a/pyWebLayout/style.py
+++ b/pyWebLayout/style.py
@ -0,0 +1,176 @@
 # this should contain classes for how different object can be rendered, e.g. bold, italic, regular
 from PIL import ImageFont
 from enum import Enum
 from typing import Tuple, Union, Optional
 class FontWeight(Enum):
    NORMAL = "normal"
    BOLD = "bold"
 class FontStyle(Enum):
    NORMAL = "normal"
    ITALIC = "italic"
 class TextDecoration(Enum):
    NONE = "none"
    UNDERLINE = "underline"
    STRIKETHROUGH = "strikethrough"
 class Font:
    """
    Font class to manage text rendering properties including font face, size, color, and styling.
    This class is used by the text renderer to determine how to render text.
    """
    def __init__(self, 
                 font_path: Optional[str] = None,
                 font_size: int = 12, 
                 colour: Tuple[int, int, int] = (0, 0, 0),
                 weight: FontWeight = FontWeight.NORMAL,
                 style: FontStyle = FontStyle.NORMAL,
                 decoration: TextDecoration = TextDecoration.NONE,
                 background: Optional[Tuple[int, int, int, int]] = None,
                 langauge = "en_EN"):
        """
        Initialize a Font object with the specified properties.
        Args:
            font_path: Path to the font file (.ttf, .otf). If None, uses default font.
            font_size: Size of the font in points.
            colour: RGB color tuple for the text.
            weight: Font weight (normal or bold).
            style: Font style (normal or italic).
            decoration: Text decoration (none, underline, or strikethrough).
            background: RGBA background color for the text. If None, transparent background.
        """
        self._font_path = font_path
        self._font_size = font_size
        self._colour = colour
        self._weight = weight
        self._style = style
        self._decoration = decoration
        self._background = background if background else (255, 255, 255, 0)
        self.language = langauge
        # Load the font file or use default
        self._load_font()
    def _load_font(self):
        """Load the font using PIL's ImageFont"""
        try:
            if self._font_path:
                self._font = ImageFont.truetype(
                    self._font_path, 
                    self._font_size
                )
            else:
                # Use default font
                self._font = ImageFont.load_default()
                if self._font_size != 12:  # Default size might not be 12
                    self._font = ImageFont.truetype(self._font.path, self._font_size)
        except Exception as e:
            print(f"Error loading font: {e}")
            self._font = ImageFont.load_default()
    @property
    def font(self):
        """Get the PIL ImageFont object"""
        return self._font
    @property
    def font_size(self):
        """Get the font size"""
        return self._font_size
    @property
    def colour(self):
        """Get the text color"""
        return self._colour
    @property
    def color(self):
        """Alias for colour (American spelling)"""
        return self._colour
    @property
    def background(self):
        """Get the background color"""
        return self._background
    @property
    def weight(self):
        """Get the font weight"""
        return self._weight
    @property
    def style(self):
        """Get the font style"""
        return self._style
    @property
    def decoration(self):
        """Get the text decoration"""
        return self._decoration
    def with_size(self, size: int):
        """Create a new Font object with modified size"""
        return Font(
            self._font_path, 
            size, 
            self._colour,
            self._weight,
            self._style,
            self._decoration,
            self._background
        )
    def with_colour(self, colour: Tuple[int, int, int]):
        """Create a new Font object with modified colour"""
        return Font(
            self._font_path, 
            self._font_size, 
            colour,
            self._weight,
            self._style,
            self._decoration,
            self._background
        )
    def with_weight(self, weight: FontWeight):
        """Create a new Font object with modified weight"""
        return Font(
            self._font_path, 
            self._font_size, 
            self._colour,
            weight,
            self._style,
            self._decoration,
            self._background
        )
    def with_style(self, style: FontStyle):
        """Create a new Font object with modified style"""
        return Font(
            self._font_path, 
            self._font_size, 
            self._colour,
            self._weight,
            style,
            self._decoration,
            self._background
        )
    def with_decoration(self, decoration: TextDecoration):
        """Create a new Font object with modified decoration"""
        return Font(
            self._font_path, 
            self._font_size, 
            self._colour,
            self._weight,
            self._style,
            decoration,
            self._background
        )
--- a/pyWebLayout/style/init.py
+++ b/pyWebLayout/style/init.py
@ -0,0 +1,17 @@
 """
 Styling module for the pyWebLayout library.
 This package contains styling-related components including:
 - Font handling and text styling
 - Color management
 - Text decoration and formatting
 - Alignment and positioning properties
 """
 # Import alignment options
 from pyWebLayout.style.alignment import Alignment
 # Import font-related classes
 from pyWebLayout.style.fonts import (
    Font, FontWeight, FontStyle, TextDecoration
 )
--- a/pyWebLayout/style/alignment.py
+++ b/pyWebLayout/style/alignment.py
@ -0,0 +1,16 @@
 """
 Alignment options for text and elements in the pyWebLayout library.
 """
 from enum import Enum
 class Alignment(Enum):
    """
    Enum for alignment options used in layout and rendering.
    """
    LEFT = 1
    CENTER = 2
    RIGHT = 3
    TOP = 4
    BOTTOM = 5
    JUSTIFY = 6
--- a/pyWebLayout/style/fonts.py
+++ b/pyWebLayout/style/fonts.py
@ -0,0 +1,176 @@
 # this should contain classes for how different object can be rendered, e.g. bold, italic, regular
 from PIL import ImageFont
 from enum import Enum
 from typing import Tuple, Union, Optional
 class FontWeight(Enum):
    NORMAL = "normal"
    BOLD = "bold"
 class FontStyle(Enum):
    NORMAL = "normal"
    ITALIC = "italic"
 class TextDecoration(Enum):
    NONE = "none"
    UNDERLINE = "underline"
    STRIKETHROUGH = "strikethrough"
 class Font:
    """
    Font class to manage text rendering properties including font face, size, color, and styling.
    This class is used by the text renderer to determine how to render text.
    """
    def __init__(self, 
                 font_path: Optional[str] = None,
                 font_size: int = 12, 
                 colour: Tuple[int, int, int] = (0, 0, 0),
                 weight: FontWeight = FontWeight.NORMAL,
                 style: FontStyle = FontStyle.NORMAL,
                 decoration: TextDecoration = TextDecoration.NONE,
                 background: Optional[Tuple[int, int, int, int]] = None,
                 langauge = "en_EN"):
        """
        Initialize a Font object with the specified properties.
        Args:
            font_path: Path to the font file (.ttf, .otf). If None, uses default font.
            font_size: Size of the font in points.
            colour: RGB color tuple for the text.
            weight: Font weight (normal or bold).
            style: Font style (normal or italic).
            decoration: Text decoration (none, underline, or strikethrough).
            background: RGBA background color for the text. If None, transparent background.
        """
        self._font_path = font_path
        self._font_size = font_size
        self._colour = colour
        self._weight = weight
        self._style = style
        self._decoration = decoration
        self._background = background if background else (255, 255, 255, 0)
        self.language = langauge
        # Load the font file or use default
        self._load_font()
    def _load_font(self):
        """Load the font using PIL's ImageFont"""
        try:
            if self._font_path:
                self._font = ImageFont.truetype(
                    self._font_path, 
                    self._font_size
                )
            else:
                # Use default font
                self._font = ImageFont.load_default()
                if self._font_size != 12:  # Default size might not be 12
                    self._font = ImageFont.truetype(self._font.path, self._font_size)
        except Exception as e:
            print(f"Error loading font: {e}")
            self._font = ImageFont.load_default()
    @property
    def font(self):
        """Get the PIL ImageFont object"""
        return self._font
    @property
    def font_size(self):
        """Get the font size"""
        return self._font_size
    @property
    def colour(self):
        """Get the text color"""
        return self._colour
    @property
    def color(self):
        """Alias for colour (American spelling)"""
        return self._colour
    @property
    def background(self):
        """Get the background color"""
        return self._background
    @property
    def weight(self):
        """Get the font weight"""
        return self._weight
    @property
    def style(self):
        """Get the font style"""
        return self._style
    @property
    def decoration(self):
        """Get the text decoration"""
        return self._decoration
    def with_size(self, size: int):
        """Create a new Font object with modified size"""
        return Font(
            self._font_path, 
            size, 
            self._colour,
            self._weight,
            self._style,
            self._decoration,
            self._background
        )
    def with_colour(self, colour: Tuple[int, int, int]):
        """Create a new Font object with modified colour"""
        return Font(
            self._font_path, 
            self._font_size, 
            colour,
            self._weight,
            self._style,
            self._decoration,
            self._background
        )
    def with_weight(self, weight: FontWeight):
        """Create a new Font object with modified weight"""
        return Font(
            self._font_path, 
            self._font_size, 
            self._colour,
            weight,
            self._style,
            self._decoration,
            self._background
        )
    def with_style(self, style: FontStyle):
        """Create a new Font object with modified style"""
        return Font(
            self._font_path, 
            self._font_size, 
            self._colour,
            self._weight,
            style,
            self._decoration,
            self._background
        )
    def with_decoration(self, decoration: TextDecoration):
        """Create a new Font object with modified decoration"""
        return Font(
            self._font_path, 
            self._font_size, 
            self._colour,
            self._weight,
            self._style,
            decoration,
            self._background
        )
--- a/pyWebLayout/table.py
+++ b/pyWebLayout/table.py
@ -0,0 +1,137 @@
 from pyWebLayout.base import Renderable
 from .concrete.box import Box
 from pyWebLayout.layout import Alignment
 import numpy as np
 from PIL import Image, ImageDraw
 from typing import List, Tuple, Optional
 class TableCell(Box):
    def __init__(self, origin, size, content: Optional[Renderable] = None, 
                 callback=None, sheet=None, mode=None, 
                 halign=Alignment.CENTER, valign=Alignment.CENTER,
                 padding: Tuple[int, int, int, int] = (5, 5, 5, 5)):
        """
        Initialize a table cell.
        Args:
            origin: Top-left corner coordinates
            size: Width and height of the cell
            content: Optional renderable content to place in the cell
            callback: Optional callback function
            sheet: Optional image sheet
            mode: Optional image mode
            halign: Horizontal alignment
            valign: Vertical alignment
            padding: Padding as (top, right, bottom, left)
        """
        super().__init__(origin, size, callback, sheet, mode, halign, valign)
        self._content = content
        self._padding = padding  # (top, right, bottom, left)
    def set_content(self, content: Renderable):
        """Set the content of this cell"""
        self._content = content
    def render(self) -> Image:
        """Render the cell with its content and border"""
        # Create the base canvas
        canvas = super().render()
        draw = ImageDraw.Draw(canvas)
        # Draw border (optional - can be customized)
        draw.rectangle([(0, 0), tuple(self._size - np.array([1, 1]))], 
                      outline=(0, 0, 0), width=1)
        return canvas
 class Table(Box):
    def __init__(self, rows: int, columns: int, origin, size, 
                 cell_padding: Tuple[int, int, int, int] = (5, 5, 5, 5),
                 callback=None, sheet=None, mode=None, 
                 halign=Alignment.CENTER, valign=Alignment.CENTER):
        """
        Initialize a table with specified number of rows and columns.
        Args:
            rows: Number of rows in the table
            columns: Number of columns in the table
            origin: Top-left corner coordinates
            size: Width and height of the table
            cell_padding: Padding for each cell as (top, right, bottom, left)
            callback: Optional callback function
            sheet: Optional image sheet
            mode: Optional image mode
            halign: Horizontal alignment
            valign: Vertical alignment
        """
        super().__init__(origin, size, callback, sheet, mode, halign, valign)
        self._rows = rows
        self._columns = columns
        self._cell_padding = cell_padding
        # Calculate cell dimensions
        cell_width = size[0] // columns
        cell_height = size[1] // rows
        # Create a 2D array of cells
        self._cells: List[List[TableCell]] = []
        for row in range(rows):
            cell_row = []
            for col in range(columns):
                # Calculate cell position
                cell_origin = np.array([col * cell_width, row * cell_height])
                cell_size = np.array([cell_width, cell_height])
                # Create the cell
                cell = TableCell(
                    origin=cell_origin,
                    size=cell_size,
                    sheet=sheet,
                    mode=mode,
                    halign=halign,
                    valign=valign,
                    padding=cell_padding
                )
                cell_row.append(cell)
            self._cells.append(cell_row)
    def add_to_cell(self, x: int, y: int, content: Renderable):
        """
        Add content to a specific cell in the table.
        Args:
            x: Column index (0-based)
            y: Row index (0-based)
            content: Renderable content to add to the cell
        """
        if 0 <= y < self._rows and 0 <= x < self._columns:
            self._cells[y][x].set_content(content)
        else:
            raise IndexError(f"Cell indices ({x}, {y}) out of range. Table is {self._columns}x{self._rows}")
    def render(self) -> Image:
        """Render the complete table with all cells"""
        # Create base canvas
        canvas = super().render()
        # Render each cell and paste it onto the canvas
        for row in range(self._rows):
            for col in range(self._columns):
                cell = self._cells[row][col]
                cell_img = cell.render()
                # Get the position for this cell
                cell_pos = (col * (self._size[0] // self._columns),
                            row * (self._size[1] // self._rows))
                # Paste the cell onto the canvas
                canvas.paste(cell_img, cell_pos, cell_img)
        return canvas
--- a/pyWebLayout/typesetting/init.py
+++ b/pyWebLayout/typesetting/init.py
@ -0,0 +1,15 @@
 """
 Typesetting module for the pyWebLayout library.
 This package handles the organization and arrangement of elements for rendering, including:
 - Flow layout algorithms
 - Container management
 - Element positioning and sizing
 - Content wrapping and overflow
 - Coordinate systems and transformations
 - Pagination for book-like content
 """
 from pyWebLayout.typesetting.flow import FlowLayout
 from pyWebLayout.typesetting.pagination import Paginator, PaginationState
 from pyWebLayout.typesetting.document_pagination import DocumentPaginator, DocumentPaginationState
--- a/pyWebLayout/typesetting/document_pagination.py
+++ b/pyWebLayout/typesetting/document_pagination.py
@ -0,0 +1,323 @@
 """
 Document-aware pagination system for pyWebLayout.
 This module provides functionality for paginating Document and Book objects
 across multiple pages, with the ability to stop, save state, and resume pagination.
 """
 from typing import List, Tuple, Dict, Any, Optional, Iterator, Generator
 import copy
 import json
 from pyWebLayout.core import Layoutable, Renderable
 from pyWebLayout.style import Alignment
 from pyWebLayout.abstract.document import Document, Book, Chapter
 from pyWebLayout.abstract.block import Block
 from pyWebLayout.typesetting.pagination import PaginationState, Paginator
 from pyWebLayout.concrete.page import Page
 class DocumentPaginationState(PaginationState):
    """
    Extended pagination state for tracking document-specific information.
    This class extends the basic PaginationState to include information
    about the document structure, like current chapter and section.
    """
    def __init__(self):
        """Initialize a new document pagination state."""
        super().__init__()
        self.current_chapter = 0
        self.current_section = 0
        self.rendered_blocks = set()  # Track which blocks have been rendered
    def save(self) -> Dict[str, Any]:
        """
        Save the current pagination state to a dictionary.
        Returns:
            A dictionary representing the pagination state
        """
        state = super().save()
        state.update({
            'current_chapter': self.current_chapter,
            'current_section': self.current_section,
            'rendered_blocks': list(self.rendered_blocks)  # Convert set to list for serialization
        })
        return state
    @classmethod
    def load(cls, state_dict: Dict[str, Any]) -> 'DocumentPaginationState':
        """
        Load pagination state from a dictionary.
        Args:
            state_dict: Dictionary containing pagination state
        Returns:
            A DocumentPaginationState object
        """
        state = super(DocumentPaginationState, cls).load(state_dict)
        state.current_chapter = state_dict.get('current_chapter', 0)
        state.current_section = state_dict.get('current_section', 0)
        state.rendered_blocks = set(state_dict.get('rendered_blocks', []))
        return state
    def to_json(self) -> str:
        """
        Convert the state to a JSON string for persistence.
        Returns:
            JSON string representation of the state
        """
        return json.dumps(self.save())
    @classmethod
    def from_json(cls, json_str: str) -> 'DocumentPaginationState':
        """
        Load state from a JSON string.
        Args:
            json_str: JSON string representation of state
        Returns:
            A DocumentPaginationState object
        """
        return cls.load(json.loads(json_str))
 class DocumentPaginator:
    """
    Paginator for Document and Book objects.
    This class paginates Document or Book objects into a series of pages,
    respecting the document structure and allowing for state tracking.
    """
    def __init__(
        self,
        document: Document,
        page_size: Tuple[int, int],
        margins: Tuple[int, int, int, int] = (20, 20, 20, 20),  # top, right, bottom, left
        spacing: int = 5,
        halign: Alignment = Alignment.LEFT,
    ):
        """
        Initialize a document paginator.
        Args:
            document: The document to paginate
            page_size: Size of each page (width, height)
            margins: Margins for each page (top, right, bottom, left)
            spacing: Spacing between elements
            halign: Horizontal alignment of elements
        """
        self.document = document
        self.page_size = page_size
        self.margins = margins
        self.spacing = spacing
        self.halign = halign
        self.state = DocumentPaginationState()
        # Preprocess document to get all blocks
        self._blocks = self._collect_blocks()
    def _collect_blocks(self) -> List[Block]:
        """
        Collect all blocks from the document in a flat list.
        For Books, this includes blocks from all chapters.
        Returns:
            List of blocks from the document
        """
        all_blocks = []
        if isinstance(self.document, Book):
            # For books, process chapters
            for chapter in self.document.chapters:
                # Add a heading block for the chapter if it has a title
                if chapter.title:
                    from pyWebLayout.abstract.block import Heading, HeadingLevel, Parapgraph
                    from pyWebLayout.abstract.inline import Word
                    # Create a heading for the chapter
                    heading = Heading(level=HeadingLevel.H1)
                    heading_word = Word(chapter.title)
                    heading.add_word(heading_word)
                    all_blocks.append(heading)
                # Add all blocks from the chapter
                all_blocks.extend(chapter.blocks)
        else:
            # For regular documents, just add all blocks
            all_blocks.extend(self.document.blocks)
        return all_blocks
    def paginate(self, max_pages: Optional[int] = None) -> List[Page]:
        """
        Paginate the document into pages.
        Args:
            max_pages: Maximum number of pages to generate (None for all)
        Returns:
            List of Page objects
        """
        pages = []
        # Reset state
        self.state = DocumentPaginationState()
        # Create a generator for pagination
        page_generator = self._paginate_generator()
        # Generate pages up to max_pages or until all content is paginated
        page_count = 0
        for page in page_generator:
            pages.append(page)
            page_count += 1
            if max_pages is not None and page_count >= max_pages:
                break
        return pages
    def paginate_next(self) -> Optional[Page]:
        """
        Paginate and return the next page only.
        Returns:
            The next Page object, or None if no more content
        """
        try:
            return next(self._paginate_generator())
        except StopIteration:
            return None
    def _paginate_generator(self) -> Generator[Page, None, None]:
        """
        Generator that yields one page at a time.
        Yields:
            A Page object for each page in the document
        """
        # Get blocks starting from the current position
        current_index = self.state.current_element_index
        remaining_blocks = self._blocks[current_index:]
        # Keep track of which chapter we're in
        current_chapter = self.state.current_chapter
        # Process blocks until we run out
        while current_index < len(self._blocks):
            # Create a new page
            page = Page(size=self.page_size)
            # Fill the page with blocks
            page_blocks = []
            # Track how much space we've used on the page
            used_height = self.margins[0]  # Start at top margin
            avail_height = self.page_size[1] - self.margins[0] - self.margins[2]
            # Add blocks until we fill the page or run out
            while current_index < len(self._blocks):
                block = self._blocks[current_index]
                # Make sure the block is properly laid out
                if hasattr(block, 'layout'):
                    block.layout()
                # Get the rendered height of the block
                block_height = getattr(block, 'size', (0, 0))[1]
                # Check if the block fits on this page
                if used_height + block_height > avail_height:
                    # Block doesn't fit, move to next page
                    break
                # Add the block to the page
                page_blocks.append(block)
                page.add_child(block)
                # Update position
                used_height += block_height + self.spacing
                # Track that we've rendered this block
                self.state.rendered_blocks.add(id(block))
                # Move to the next block
                current_index += 1
                # Check if we're moving to a new chapter (for Book objects)
                if isinstance(self.document, Book) and current_index < len(self._blocks):
                    # Check if the next block is a heading that starts a new chapter
                    # This is a simplified check - in a real implementation you'd need
                    # a more robust way to identify chapter boundaries
                    from pyWebLayout.abstract.block import Heading
                    if isinstance(self._blocks[current_index], Heading):
                        # We're at a chapter boundary, might want to start a new page
                        # This is optional and depends on your layout preferences
                        current_chapter += 1
                        break
            # Update state
            self.state.current_page += 1
            self.state.current_element_index = current_index
            self.state.current_chapter = current_chapter
            # Layout the page
            page.layout()
            # If we couldn't fit any blocks on this page but have more, skip the block
            if not page_blocks and current_index < len(self._blocks):
                print(f"Warning: Block at index {current_index} is too large to fit on a page")
                current_index += 1
                self.state.current_element_index = current_index
            # Yield the page
            if page_blocks:
                yield page
            else:
                # No more blocks to paginate
                break
    def get_state(self) -> Dict[str, Any]:
        """
        Get the current pagination state.
        Returns:
            Dictionary representing pagination state
        """
        return self.state.save()
    def set_state(self, state: Dict[str, Any]) -> None:
        """
        Set the pagination state.
        Args:
            state: Dictionary representing pagination state
        """
        self.state = DocumentPaginationState.load(state)
    def is_complete(self) -> bool:
        """
        Check if pagination is complete.
        Returns:
            True if all blocks have been paginated, False otherwise
        """
        return self.state.current_element_index >= len(self._blocks)
    def get_progress(self) -> float:
        """
        Get the pagination progress as a percentage.
        Returns:
            Percentage of blocks that have been paginated (0.0 to 1.0)
        """
        if not self._blocks:
            return 1.0
        return self.state.current_element_index / len(self._blocks)
--- a/pyWebLayout/typesetting/flow.py
+++ b/pyWebLayout/typesetting/flow.py
@ -0,0 +1,155 @@
 """
 Flow layout implementation for pyWebLayout.
 This module provides a flow layout algorithm similar to HTML's normal flow,
 where elements are positioned sequentially, wrapping to the next line when
 they exceed the container width.
 """
 from typing import List, Tuple, Optional, Any
 import numpy as np
 from pyWebLayout.core import Layoutable
 from pyWebLayout.style import Alignment
 class FlowLayout:
    """
    Flow layout algorithm for arranging elements in a container.
    Flow layout places elements sequentially from left to right, wrapping to the
    next line when the elements exceed the container's width. It supports various
    alignment options for both horizontal and vertical positioning.
    """
    @staticmethod
    def layout_elements(
        elements: List[Layoutable],
        container_size: Tuple[int, int],
        padding: Tuple[int, int, int, int] = (0, 0, 0, 0),  # top, right, bottom, left
        spacing: int = 0,
        halign: Alignment = Alignment.LEFT,
        valign: Alignment = Alignment.TOP
    ) -> List[Tuple[int, int]]:
        """
        Layout elements in a flow layout within the given container.
        Args:
            elements: List of layoutable elements to arrange
            container_size: (width, height) tuple for the container
            padding: (top, right, bottom, left) padding inside the container
            spacing: Horizontal spacing between elements
            halign: Horizontal alignment (LEFT, CENTER, RIGHT)
            valign: Vertical alignment (TOP, CENTER, BOTTOM)
        Returns:
            List of (x, y) positions for each element
        """
        # Calculate available width and height after padding
        avail_width = container_size[0] - padding[1] - padding[3]
        avail_height = container_size[1] - padding[0] - padding[2]
        # First, lay out elements in rows
        positions = []
        current_x = padding[3]  # Start at left padding
        current_y = padding[0]  # Start at top padding
        row_height = 0
        row_start_idx = 0
        # Ensure elements are properly laid out internally
        for element in elements:
            if hasattr(element, 'layout'):
                element.layout()
        # First pass - group elements into rows
        for i, element in enumerate(elements):
            element_width = element.size[0] if hasattr(element, 'size') else 0
            element_height = element.size[1] if hasattr(element, 'size') else 0
            # Check if this element fits in the current row
            if current_x + element_width > padding[3] + avail_width and i > row_start_idx:
                # Adjust positions for the completed row based on halign
                FlowLayout._align_row(
                    positions, elements, row_start_idx, i, 
                    padding[3], avail_width, halign
                )
                # Move to next row
                current_x = padding[3]
                current_y += row_height + spacing
                row_height = 0
                row_start_idx = i
            # Add element to current row
            positions.append((current_x, current_y))
            current_x += element_width + spacing
            row_height = max(row_height, element_height)
        # Handle the last row
        if row_start_idx < len(elements):
            FlowLayout._align_row(
                positions, elements, row_start_idx, len(elements), 
                padding[3], avail_width, halign
            )
        # Second pass - adjust vertical positions based on valign
        if valign != Alignment.TOP:
            total_height = current_y + row_height - padding[0]
            if total_height < avail_height:
                offset = 0
                if valign == Alignment.CENTER:
                    offset = (avail_height - total_height) // 2
                elif valign == Alignment.BOTTOM:
                    offset = avail_height - total_height
                # Apply vertical offset to all positions
                positions = [(x, y + offset) for x, y in positions]
        return positions
    @staticmethod
    def _align_row(
        positions: List[Tuple[int, int]],
        elements: List[Any],
        start_idx: int,
        end_idx: int,
        left_margin: int,
        avail_width: int,
        halign: Alignment
    ) -> None:
        """
        Adjust positions of elements in a row based on horizontal alignment.
        Args:
            positions: List of element positions to adjust
            elements: List of elements
            start_idx: Start index of the row
            end_idx: End index of the row
            left_margin: Left margin of the container
            avail_width: Available width of the container
            halign: Horizontal alignment
        """
        if halign == Alignment.LEFT:
            # No adjustment needed for left alignment
            return
        # Calculate total width of elements in the row
        total_width = sum(
            elements[i].size[0] if hasattr(elements[i], 'size') else 0 
            for i in range(start_idx, end_idx)
        )
        # Add spacing between elements
        if end_idx - start_idx > 1:
            total_width += (end_idx - start_idx - 1) * 0  # No spacing for now
        # Calculate the adjustment
        offset = 0
        if halign == Alignment.CENTER:
            offset = (avail_width - total_width) // 2
        elif halign == Alignment.RIGHT:
            offset = avail_width - total_width
        # Apply the offset
        for i in range(start_idx, end_idx):
            positions[i] = (positions[i][0] + offset, positions[i][1])
--- a/pyWebLayout/typesetting/pagination.py
+++ b/pyWebLayout/typesetting/pagination.py
@ -0,0 +1,231 @@
 """
 Pagination system for pyWebLayout.
 This module provides functionality for paginating content across multiple pages,
 with the ability to stop, save state, and resume pagination.
 """
 from typing import List, Tuple, Dict, Any, Optional, Iterator, Generator
 import copy
 from pyWebLayout.core import Layoutable
 from pyWebLayout.style import Alignment
 from pyWebLayout.typesetting.flow import FlowLayout
 class PaginationState:
    """
    Class to hold the state of a pagination process.
    This allows pagination to be paused, saved, and resumed later.
    """
    def __init__(self):
        """Initialize a new pagination state."""
        self.current_page = 0
        self.current_element_index = 0
        self.position_in_element = 0  # For elements that might be split across pages
        self.consumed_elements = []
        self.metadata = {}  # For any additional state information
    def save(self) -> Dict[str, Any]:
        """
        Save the current pagination state to a dictionary.
        Returns:
            A dictionary representing the pagination state
        """
        return {
            'current_page': self.current_page,
            'current_element_index': self.current_element_index,
            'position_in_element': self.position_in_element,
            'consumed_elements': self.consumed_elements,
            'metadata': self.metadata
        }
    @classmethod
    def load(cls, state_dict: Dict[str, Any]) -> 'PaginationState':
        """
        Load pagination state from a dictionary.
        Args:
            state_dict: Dictionary containing pagination state
        Returns:
            A PaginationState object
        """
        state = cls()
        state.current_page = state_dict.get('current_page', 0)
        state.current_element_index = state_dict.get('current_element_index', 0)
        state.position_in_element = state_dict.get('position_in_element', 0)
        state.consumed_elements = state_dict.get('consumed_elements', [])
        state.metadata = state_dict.get('metadata', {})
        return state
 class Paginator:
    """
    Class for paginating content across multiple pages.
    Supports flow layout within each page and maintains state between pages.
    """
    def __init__(
        self,
        elements: List[Layoutable],
        page_size: Tuple[int, int],
        margins: Tuple[int, int, int, int] = (20, 20, 20, 20),  # top, right, bottom, left
        spacing: int = 5,
        halign: Alignment = Alignment.LEFT,
    ):
        """
        Initialize a paginator.
        Args:
            elements: List of elements to paginate
            page_size: Size of each page (width, height)
            margins: Margins for each page (top, right, bottom, left)
            spacing: Spacing between elements
            halign: Horizontal alignment of elements
        """
        self.elements = elements
        self.page_size = page_size
        self.margins = margins
        self.spacing = spacing
        self.halign = halign
        self.state = PaginationState()
    def paginate(self, max_pages: Optional[int] = None) -> List[List[Tuple[Layoutable, Tuple[int, int]]]]:
        """
        Paginate all content into pages.
        Args:
            max_pages: Maximum number of pages to generate (None for all)
        Returns:
            List of pages, where each page is a list of (element, position) tuples
        """
        pages = []
        # Reset state
        self.state = PaginationState()
        # Create a generator for pagination
        page_generator = self._paginate_generator()
        # Generate pages up to max_pages or until all content is paginated
        page_count = 0
        for page in page_generator:
            pages.append(page)
            page_count += 1
            if max_pages is not None and page_count >= max_pages:
                break
        return pages
    def paginate_next(self) -> Optional[List[Tuple[Layoutable, Tuple[int, int]]]]:
        """
        Paginate and return the next page only.
        Returns:
            A list of (element, position) tuples for the next page, or None if no more content
        """
        try:
            return next(self._paginate_generator())
        except StopIteration:
            return None
    def _paginate_generator(self) -> Generator[List[Tuple[Layoutable, Tuple[int, int]]], None, None]:
        """
        Generator that yields one page at a time.
        Yields:
            A list of (element, position) tuples for each page
        """
        # Calculate available space on a page
        avail_width = self.page_size[0] - self.margins[1] - self.margins[3]
        avail_height = self.page_size[1] - self.margins[0] - self.margins[2]
        # Current position on the page
        current_index = self.state.current_element_index
        remaining_elements = self.elements[current_index:]
        # Process elements until we run out
        while current_index < len(self.elements):
            # Start a new page
            page_elements = []
            current_y = self.margins[0]
            # Fill the page with elements
            while current_index < len(self.elements):
                element = self.elements[current_index]
                # Ensure element is laid out properly
                if hasattr(element, 'layout'):
                    element.layout()
                # Get element size
                element_width = element.size[0] if hasattr(element, 'size') else 0
                element_height = element.size[1] if hasattr(element, 'size') else 0
                # Check if element fits on current page
                if current_y + element_height > self.margins[0] + avail_height:
                    # Element doesn't fit, move to next page
                    break
                # Position the element on the page based on alignment
                if self.halign == Alignment.LEFT:
                    element_x = self.margins[3]
                elif self.halign == Alignment.CENTER:
                    element_x = self.margins[3] + (avail_width - element_width) // 2
                elif self.halign == Alignment.RIGHT:
                    element_x = self.margins[3] + (avail_width - element_width)
                else:
                    element_x = self.margins[3]  # Default to left alignment
                # Add element to page
                page_elements.append((element, (element_x, current_y)))
                # Move to next element and update position
                current_index += 1
                current_y += element_height + self.spacing
            # Update state
            self.state.current_page += 1
            self.state.current_element_index = current_index
            # If we couldn't fit any elements on this page, we're done
            if not page_elements and current_index < len(self.elements):
                # This could happen if an element is too large for a page
                # Skip the element to avoid an infinite loop
                current_index += 1
                self.state.current_element_index = current_index
                # Add a warning element to the page
                warning_message = f"Element at index {current_index-1} is too large to fit on a page"
                print(f"Warning: {warning_message}")
            # Yield the page if it has elements
            if page_elements:
                yield page_elements
            else:
                # No more elements to paginate
                break
    def get_state(self) -> Dict[str, Any]:
        """
        Get the current pagination state.
        Returns:
            Dictionary representing pagination state
        """
        return self.state.save()
    def set_state(self, state: Dict[str, Any]) -> None:
        """
        Set the pagination state.
        Args:
            state: Dictionary representing pagination state
        """
        self.state = PaginationState.load(state)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,18 @@
 [build-system]
 requires = ["setuptools>=42", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "pyWebLayout"
 description = "A Python library for HTML-like layout and rendering"
 readme = "README.md"
 requires-python = ">=3.6"
 license = {file = "LICENSE"}
 authors = [
    {name = "Duncan Tourolle", email = "duncan@tourolle.paris"}
 ]
 dynamic = ["version"]
 dependencies = [
    "Pillow",
    "numpy",
 ]
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,23 @@
 [metadata]
 name = pyWebLayout
 version = 0.1.0
 author = Duncan Tourolle
 author_email = duncan@tourolle.paris
 description = A Python library for HTML-like layout and rendering
 long_description = file: README.md
 long_description_content_type = text/markdown
 url = https://gitea.tourolle.paris/pyWebLayout
 classifiers =
    Programming Language :: Python :: 3
    License :: OSI Approved :: MIT License
    Operating System :: OS Independent
 [options]
 packages = find:
 python_requires = >=3.6
 install_requires =
    Pillow
    numpy
 [options.packages.find]
 include = pyWebLayout*
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,32 @@
 from setuptools import setup, find_packages
 setup(
    name="pyWebLayout",
    version="0.1.0",
    packages=find_packages(),
    install_requires=[
        "Pillow",
        "numpy",
    ],
    extras_require={
        "test": [
            "coverage>=5.0",
        ],
        "dev": [
            "coverage>=5.0",
            "pytest>=6.0",
        ],
    },
    author="Duncan Tourolle",
    author_email="duncan@tourolle.paris",
    description="A Python library for HTML-like layout and rendering",
    long_description=open("README.md").read(),
    long_description_content_type="text/markdown",
    url="https://gitea.tourolle.paris/pyWebLayout",
    classifiers=[
        "Programming Language :: Python :: 3",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
    ],
    python_requires=">=3.6",
 )
--- a/tests/TESTING_STRATEGY.md
+++ b/tests/TESTING_STRATEGY.md
@ -0,0 +1,299 @@
 # PyWebLayout Testing Strategy
 This document outlines the comprehensive unit testing strategy for the pyWebLayout project.
 ## Testing Philosophy
 The testing strategy follows these principles:
 - **Separation of Concerns**: Each component is tested independently
 - **Comprehensive Coverage**: All public APIs and critical functionality are tested
 - **Integration Testing**: End-to-end workflows are validated
 - **Regression Prevention**: Tests prevent breaking changes
 - **Documentation**: Tests serve as living documentation of expected behavior
 ## Test Organization
 ### Current Test Files (Implemented)
 #### ✅ `test_html_style.py`
 Tests the `HTMLStyleManager` class for CSS parsing and style management.
 **Coverage:**
 - Style initialization and defaults
 - Style stack operations (push/pop)
 - CSS property parsing (font-size, font-weight, colors, etc.)
 - Color parsing (named, hex, rgb, rgba)
 - Tag-specific default styles
 - Inline style parsing
 - Font object creation
 - Style combination (tag + inline styles)
 #### ✅ `test_html_text.py` 
 Tests the `HTMLTextProcessor` class for text buffering and word creation.
 **Coverage:**
 - Text buffer management
 - HTML entity reference handling
 - Character reference processing (decimal/hex)
 - Word creation with styling
 - Paragraph management
 - Text flushing operations
 - Buffer state operations
 #### ✅ `test_html_content.py`
 Integration tests for the `HTMLContentReader` class covering complete HTML parsing.
 **Coverage:**
 - Simple paragraph parsing
 - Heading levels (h1-h6)
 - Styled text (bold, italic)
 - Lists (ul, ol, dl)
 - Tables with headers and cells
 - Blockquotes with nested content
 - Code blocks with language detection
 - HTML entities
 - Nested element structures
 - Complex document parsing
 #### ✅ `test_abstract_blocks.py`
 Tests for the core abstract block element classes.
 **Coverage:**
 - Paragraph word management
 - Heading levels and properties
 - Quote nesting capabilities
 - Code block line management
 - List creation and item handling
 - Table structure (rows, cells, sections)
 - Image properties and scaling
 - Simple elements (hr, br)
 #### ✅ `test_runner.py`
 Test runner script for executing all tests with summary reporting.
 ---
 ## Additional Tests Needed
 ### 🔄 High Priority (Should Implement Next)
 #### `test_abstract_inline.py`
 Tests for inline elements and text formatting.
 **Needed Coverage:**
 - Word creation and properties
 - Word hyphenation functionality
 - FormattedSpan management
 - Word chaining (previous/next relationships)
 - Font style application
 - Language-specific hyphenation
 #### `test_abstract_document.py`
 Tests for document structure and metadata.
 **Needed Coverage:**
 - Document creation and initialization
 - Metadata management (title, author, language, etc.)
 - Block addition and management
 - Anchor creation and resolution
 - Resource management
 - Table of contents generation
 - Chapter and book structures
 #### `test_abstract_functional.py`
 Tests for functional elements (links, buttons, forms).
 **Needed Coverage:**
 - Link creation and type detection
 - Link execution for different types
 - Button functionality and state
 - Form field management
 - Form validation and submission
 - Parameter handling
 #### `test_style_system.py`
 Tests for the style system (fonts, colors, alignment).
 **Needed Coverage:**
 - Font creation and properties
 - Color representation and manipulation
 - Font weight, style, decoration enums
 - Alignment enums and behavior
 - Style inheritance and cascading
 ### 🔧 Medium Priority
 #### `test_html_elements.py`
 Unit tests for the HTML element handlers.
 **Needed Coverage:**
 - BlockElementHandler individual methods
 - ListElementHandler state management
 - TableElementHandler complex scenarios
 - InlineElementHandler link processing
 - Handler coordination and delegation
 - Error handling in handlers
 #### `test_html_metadata.py`
 Tests for HTML metadata extraction.
 **Needed Coverage:**
 - Meta tag parsing
 - Open Graph extraction
 - JSON-LD structured data
 - Title and description extraction
 - Language detection
 - Character encoding handling
 #### `test_html_resources.py`
 Tests for HTML resource extraction.
 **Needed Coverage:**
 - CSS stylesheet extraction
 - JavaScript resource identification
 - Image source collection
 - Media element detection
 - External resource resolution
 - Base URL handling
 #### `test_io_base.py`
 Tests for the base reader architecture.
 **Needed Coverage:**
 - BaseReader interface compliance
 - MetadataReader abstract methods
 - ContentReader abstract methods
 - ResourceReader abstract methods
 - CompositeReader coordination
 ### 🔍 Lower Priority
 #### `test_concrete_elements.py`
 Tests for concrete rendering implementations.
 **Needed Coverage:**
 - Box model calculations
 - Text rendering specifics
 - Image rendering and scaling
 - Page layout management
 - Functional element rendering
 #### `test_typesetting.py`
 Tests for the typesetting system.
 **Needed Coverage:**
 - Flow algorithms
 - Pagination logic
 - Document pagination
 - Line breaking
 - Hyphenation integration
 #### `test_epub_reader.py`
 Tests for EPUB reading functionality.
 **Needed Coverage:**
 - EPUB file structure parsing
 - Manifest processing
 - Chapter extraction
 - Metadata reading
 - Navigation document parsing
 #### `test_integration.py`
 End-to-end integration tests.
 **Needed Coverage:**
 - Complete HTML-to-document workflows
 - EPUB-to-document workflows
 - Style application across parsers
 - Resource resolution chains
 - Error handling scenarios
 ## Testing Infrastructure
 ### Test Dependencies
 ```python
 # Required for testing
 unittest  # Built-in Python testing framework
 unittest.mock  # For mocking and test doubles
 ```
 ### Test Data
 - Create `tests/data/` directory with sample files:
  - `sample.html` - Well-formed HTML document
  - `complex.html` - Complex nested HTML
  - `malformed.html` - Edge cases and error conditions
  - `sample.epub` - Sample EPUB file
  - `test_images/` - Sample images for testing
 ### Continuous Integration
 - Tests should run on Python 3.6+ 
 - All tests must pass before merging
 - Aim for >90% code coverage
 - Performance regression testing for parsing speed
 ## Running Tests
 ### Run All Tests
 ```bash
 python tests/test_runner.py
 ```
 ### Run Specific Test Module
 ```bash
 python tests/test_runner.py html_style
 python -m unittest tests.test_html_style
 ```
 ### Run Individual Test
 ```bash
 python -m unittest tests.test_html_style.TestHTMLStyleManager.test_color_parsing
 ```
 ### Run with Coverage
 ```bash
 pip install coverage
 coverage run -m unittest discover tests/
 coverage report -m
 coverage html  # Generate HTML report
 ```
 ## Test Quality Guidelines
 ### Test Naming
 - Test files: `test_<module_name>.py`
 - Test classes: `Test<ClassName>`
 - Test methods: `test_<specific_functionality>`
 ### Test Structure
 1. **Arrange**: Set up test data and mocks
 2. **Act**: Execute the functionality being tested
 3. **Assert**: Verify the expected behavior
 ### Mock Usage
 - Mock external dependencies (file I/O, network)
 - Mock complex objects when testing units in isolation
 - Prefer real objects for integration tests
 ### Edge Cases
 - Empty inputs
 - Invalid inputs  
 - Boundary conditions
 - Error scenarios
 - Performance edge cases
 ## Success Metrics
 - **Coverage**: >90% line coverage across all modules
 - **Performance**: No test takes longer than 1 second
 - **Reliability**: Tests pass consistently across environments
 - **Maintainability**: Tests are easy to understand and modify
 - **Documentation**: Tests clearly show expected behavior
 ## Implementation Priority
 1. **Week 1**: Complete high-priority abstract tests
 2. **Week 2**: Implement HTML processing component tests
 3. **Week 3**: Add integration and end-to-end tests
 4. **Week 4**: Performance and edge case testing
 This testing strategy ensures comprehensive coverage of the pyWebLayout library while maintaining good separation of concerns and providing clear documentation of expected behavior.
--- a/tests/init.py
+++ b/tests/init.py
@ -0,0 +1,6 @@
 """
 Test suite for pyWebLayout.
 This package contains comprehensive unit tests for all components of the pyWebLayout library,
 organized by module and functionality.
 """
--- a/tests/test_abstract_blocks.py
+++ b/tests/test_abstract_blocks.py
@ -0,0 +1,275 @@
 """
 Unit tests for abstract block elements.
 Tests the core abstract block classes that form the foundation of the document model.
 """
 import unittest
 from pyWebLayout.abstract.block import (
    Block, BlockType, Parapgraph, Heading, HeadingLevel, Quote, CodeBlock,
    HList, ListStyle, ListItem, Table, TableRow, TableCell, 
    HorizontalRule, LineBreak, Image
 )
 from pyWebLayout.abstract.inline import Word
 from pyWebLayout.style import Font
 class TestBlockElements(unittest.TestCase):
    """Test cases for basic block elements."""
    def test_paragraph_creation(self):
        """Test creating and using paragraphs."""
        paragraph = Parapgraph()
        self.assertEqual(paragraph.block_type, BlockType.PARAGRAPH)
        self.assertEqual(paragraph.word_count, 0)
        self.assertIsNone(paragraph.parent)
        # Add words
        font = Font()
        word1 = Word("Hello", font)
        word2 = Word("World", font)
        paragraph.add_word(word1)
        paragraph.add_word(word2)
        self.assertEqual(paragraph.word_count, 2)
        # Test word iteration
        words = list(paragraph.words())
        self.assertEqual(len(words), 2)
        self.assertEqual(words[0][1].text, "Hello")
        self.assertEqual(words[1][1].text, "World")
    def test_heading_levels(self):
        """Test heading creation with different levels."""
        h1 = Heading(HeadingLevel.H1)
        h3 = Heading(HeadingLevel.H3)
        h6 = Heading(HeadingLevel.H6)
        self.assertEqual(h1.level, HeadingLevel.H1)
        self.assertEqual(h3.level, HeadingLevel.H3)
        self.assertEqual(h6.level, HeadingLevel.H6)
        self.assertEqual(h1.block_type, BlockType.HEADING)
        # Test level modification
        h1.level = HeadingLevel.H2
        self.assertEqual(h1.level, HeadingLevel.H2)
    def test_quote_nesting(self):
        """Test blockquote with nested content."""
        quote = Quote()
        # Add nested paragraphs
        p1 = Parapgraph()
        p2 = Parapgraph()
        quote.add_block(p1)
        quote.add_block(p2)
        self.assertEqual(p1.parent, quote)
        self.assertEqual(p2.parent, quote)
        # Test block iteration
        blocks = list(quote.blocks())
        self.assertEqual(len(blocks), 2)
        self.assertEqual(blocks[0], p1)
        self.assertEqual(blocks[1], p2)
    def test_code_block(self):
        """Test code block functionality."""
        code = CodeBlock("python")
        self.assertEqual(code.language, "python")
        self.assertEqual(code.line_count, 0)
        # Add code lines
        code.add_line("def hello():")
        code.add_line("    print('Hello!')")
        self.assertEqual(code.line_count, 2)
        # Test line iteration
        lines = list(code.lines())
        self.assertEqual(len(lines), 2)
        self.assertEqual(lines[0][1], "def hello():")
        self.assertEqual(lines[1][1], "    print('Hello!')")
        # Test language modification
        code.language = "javascript"
        self.assertEqual(code.language, "javascript")
    def test_list_creation(self):
        """Test list creation and item management."""
        # Unordered list
        ul = HList(ListStyle.UNORDERED)
        self.assertEqual(ul.style, ListStyle.UNORDERED)
        self.assertEqual(ul.item_count, 0)
        # Add list items
        item1 = ListItem()
        item2 = ListItem()
        ul.add_item(item1)
        ul.add_item(item2)
        self.assertEqual(ul.item_count, 2)
        self.assertEqual(item1.parent, ul)
        self.assertEqual(item2.parent, ul)
        # Test item iteration
        items = list(ul.items())
        self.assertEqual(len(items), 2)
        # Test list style change
        ul.style = ListStyle.ORDERED
        self.assertEqual(ul.style, ListStyle.ORDERED)
    def test_definition_list(self):
        """Test definition list with terms."""
        dl = HList(ListStyle.DEFINITION)
        # Add definition items with terms
        dt1 = ListItem(term="Python")
        dt2 = ListItem(term="JavaScript")
        dl.add_item(dt1)
        dl.add_item(dt2)
        self.assertEqual(dt1.term, "Python")
        self.assertEqual(dt2.term, "JavaScript")
        # Test term modification
        dt1.term = "Python 3"
        self.assertEqual(dt1.term, "Python 3")
    def test_table_structure(self):
        """Test table, row, and cell structure."""
        table = Table(caption="Test Table")
        self.assertEqual(table.caption, "Test Table")
        self.assertEqual(table.row_count["total"], 0)
        # Create rows and cells
        header_row = TableRow()
        data_row = TableRow()
        # Header cells
        h1 = TableCell(is_header=True)
        h2 = TableCell(is_header=True)
        header_row.add_cell(h1)
        header_row.add_cell(h2)
        # Data cells
        d1 = TableCell(is_header=False)
        d2 = TableCell(is_header=False, colspan=2)
        data_row.add_cell(d1)
        data_row.add_cell(d2)
        # Add rows to table
        table.add_row(header_row, "header")
        table.add_row(data_row, "body")
        # Test structure
        self.assertEqual(table.row_count["header"], 1)
        self.assertEqual(table.row_count["body"], 1)
        self.assertEqual(table.row_count["total"], 2)
        # Test cell properties
        self.assertTrue(h1.is_header)
        self.assertFalse(d1.is_header)
        self.assertEqual(d2.colspan, 2)
        self.assertEqual(d2.rowspan, 1)  # Default
        # Test row cell count
        self.assertEqual(header_row.cell_count, 2)
        self.assertEqual(data_row.cell_count, 2)
    def test_table_sections(self):
        """Test table header, body, and footer sections."""
        table = Table()
        # Add rows to different sections
        header = TableRow()
        body1 = TableRow()
        body2 = TableRow()
        footer = TableRow()
        table.add_row(header, "header")
        table.add_row(body1, "body")
        table.add_row(body2, "body")
        table.add_row(footer, "footer")
        # Test section iteration
        header_rows = list(table.header_rows())
        body_rows = list(table.body_rows())
        footer_rows = list(table.footer_rows())
        self.assertEqual(len(header_rows), 1)
        self.assertEqual(len(body_rows), 2)
        self.assertEqual(len(footer_rows), 1)
        # Test all_rows iteration
        all_rows = list(table.all_rows())
        self.assertEqual(len(all_rows), 4)
        # Check section labels
        sections = [section for section, row in all_rows]
        self.assertEqual(sections, ["header", "body", "body", "footer"])
    def test_image_loading(self):
        """Test image element properties."""
        # Test with basic properties
        img = Image("test.jpg", "Test image", 100, 200)
        self.assertEqual(img.source, "test.jpg")
        self.assertEqual(img.alt_text, "Test image")
        self.assertEqual(img.width, 100)
        self.assertEqual(img.height, 200)
        # Test property modification
        img.source = "new.png"
        img.alt_text = "New image"
        img.width = 150
        img.height = 300
        self.assertEqual(img.source, "new.png")
        self.assertEqual(img.alt_text, "New image")
        self.assertEqual(img.width, 150)
        self.assertEqual(img.height, 300)
        # Test dimensions tuple
        self.assertEqual(img.get_dimensions(), (150, 300))
    def test_aspect_ratio_calculation(self):
        """Test image aspect ratio calculations."""
        # Test with specified dimensions
        img = Image("test.jpg", width=400, height=200)
        self.assertEqual(img.get_aspect_ratio(), 2.0)  # 400/200
        # Test with only one dimension
        img2 = Image("test.jpg", width=300)
        self.assertIsNone(img2.get_aspect_ratio())  # No height specified
        # Test scaled dimensions
        scaled = img.calculate_scaled_dimensions(max_width=200, max_height=150)
        # Should scale down proportionally
        self.assertEqual(scaled[0], 200)  # Width limited by max_width
        self.assertEqual(scaled[1], 100)  # Height scaled proportionally
    def test_simple_elements(self):
        """Test simple block elements."""
        hr = HorizontalRule()
        br = LineBreak()
        self.assertEqual(hr.block_type, BlockType.HORIZONTAL_RULE)
        self.assertEqual(br.block_type, BlockType.LINE_BREAK)
        # These elements have no additional properties
        self.assertIsNone(hr.parent)
        self.assertIsNone(br.parent)
 if __name__ == '__main__':
    unittest.main()
--- a/tests/test_html_content.py
+++ b/tests/test_html_content.py
@ -0,0 +1,354 @@
 """
 Unit tests for HTML content reading.
 Tests the HTMLContentReader class for parsing complete HTML documents.
 This is more of an integration test covering the entire parsing pipeline.
 """
 import unittest
 from pyWebLayout.io.readers.html_content import HTMLContentReader
 from pyWebLayout.abstract.document import Document
 from pyWebLayout.abstract.block import (
    Parapgraph, Heading, HeadingLevel, HList, ListStyle, 
    Table, Quote, CodeBlock, HorizontalRule, LineBreak
 )
 class TestHTMLContentReader(unittest.TestCase):
    """Test cases for HTMLContentReader."""
    def setUp(self):
        """Set up test fixtures."""
        self.reader = HTMLContentReader()
        self.document = Document()
    def test_simple_paragraph(self):
        """Test parsing a simple paragraph."""
        html = '<p>Hello world!</p>'
        result = self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        self.assertIsInstance(self.document.blocks[0], Parapgraph)
        paragraph = self.document.blocks[0]
        words = list(paragraph.words())
        self.assertEqual(len(words), 2)
        self.assertEqual(words[0][1].text, "Hello")
        self.assertEqual(words[1][1].text, "world!")
    def test_headings(self):
        """Test parsing different heading levels."""
        html = '''
        <h1>Heading 1</h1>
        <h2>Heading 2</h2>
        <h3>Heading 3</h3>
        <h6>Heading 6</h6>
        '''
        self.reader.extract_content(html, self.document)
        # Should have 4 heading blocks
        headings = [block for block in self.document.blocks if isinstance(block, Heading)]
        self.assertEqual(len(headings), 4)
        # Check heading levels
        self.assertEqual(headings[0].level, HeadingLevel.H1)
        self.assertEqual(headings[1].level, HeadingLevel.H2)
        self.assertEqual(headings[2].level, HeadingLevel.H3)
        self.assertEqual(headings[3].level, HeadingLevel.H6)
        # Check text content
        h1_words = list(headings[0].words())
        self.assertEqual(len(h1_words), 2)
        self.assertEqual(h1_words[0][1].text, "Heading")
        self.assertEqual(h1_words[1][1].text, "1")
    def test_styled_text(self):
        """Test parsing text with inline styling."""
        html = '<p>This is <b>bold</b> and <i>italic</i> text.</p>'
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        paragraph = self.document.blocks[0]
        words = list(paragraph.words())
        # Should have words: "This", "is", "bold", "and", "italic", "text."
        self.assertEqual(len(words), 6)
        # The styling information is embedded in the Font objects
        # We can't easily test the exact styling without more complex setup
        # but we can verify the words are created correctly
        word_texts = [word[1].text for word in words]
        self.assertEqual(word_texts, ["This", "is", "bold", "and", "italic", "text."])
    def test_unordered_list(self):
        """Test parsing unordered lists."""
        html = '''
        <ul>
            <li>First item</li>
            <li>Second item</li>
            <li>Third item</li>
        </ul>
        '''
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        self.assertIsInstance(self.document.blocks[0], HList)
        list_block = self.document.blocks[0]
        self.assertEqual(list_block.style, ListStyle.UNORDERED)
        items = list(list_block.items())
        self.assertEqual(len(items), 3)
        # Check first item content
        first_item_blocks = list(items[0].blocks())
        self.assertEqual(len(first_item_blocks), 1)
        self.assertIsInstance(first_item_blocks[0], Parapgraph)
    def test_ordered_list(self):
        """Test parsing ordered lists."""
        html = '''
        <ol>
            <li>First step</li>
            <li>Second step</li>
        </ol>
        '''
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        list_block = self.document.blocks[0]
        self.assertEqual(list_block.style, ListStyle.ORDERED)
        items = list(list_block.items())
        self.assertEqual(len(items), 2)
    def test_definition_list(self):
        """Test parsing definition lists."""
        html = '''
        <dl>
            <dt>Term 1</dt>
            <dd>Definition 1</dd>
            <dt>Term 2</dt>
            <dd>Definition 2</dd>
        </dl>
        '''
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        list_block = self.document.blocks[0]
        self.assertEqual(list_block.style, ListStyle.DEFINITION)
        items = list(list_block.items())
        self.assertEqual(len(items), 2)  # Two dt/dd pairs
    def test_table(self):
        """Test parsing simple tables."""
        html = '''
        <table>
            <tr>
                <th>Header 1</th>
                <th>Header 2</th>
            </tr>
            <tr>
                <td>Cell 1</td>
                <td>Cell 2</td>
            </tr>
        </table>
        '''
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        self.assertIsInstance(self.document.blocks[0], Table)
        table = self.document.blocks[0]
        # Check body rows
        body_rows = list(table.body_rows())
        self.assertEqual(len(body_rows), 2)  # Header row + data row
        # Check first row (header)
        first_row_cells = list(body_rows[0].cells())
        self.assertEqual(len(first_row_cells), 2)
        self.assertTrue(first_row_cells[0].is_header)
        self.assertTrue(first_row_cells[1].is_header)
        # Check second row (data)
        second_row_cells = list(body_rows[1].cells())
        self.assertEqual(len(second_row_cells), 2)
        self.assertFalse(second_row_cells[0].is_header)
        self.assertFalse(second_row_cells[1].is_header)
    def test_blockquote(self):
        """Test parsing blockquotes."""
        html = '''
        <blockquote>
            <p>This is a quoted paragraph.</p>
            <p>Another quoted paragraph.</p>
        </blockquote>
        '''
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        self.assertIsInstance(self.document.blocks[0], Quote)
        quote = self.document.blocks[0]
        quote_blocks = list(quote.blocks())
        self.assertEqual(len(quote_blocks), 2)
        self.assertIsInstance(quote_blocks[0], Parapgraph)
        self.assertIsInstance(quote_blocks[1], Parapgraph)
    def test_code_block(self):
        """Test parsing code blocks."""
        html = '''
        <pre><code class="language-python">
 def hello():
    print("Hello, world!")
        </code></pre>
        '''
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        self.assertIsInstance(self.document.blocks[0], CodeBlock)
        code_block = self.document.blocks[0]
        self.assertEqual(code_block.language, "python")
    def test_horizontal_rule(self):
        """Test parsing horizontal rules."""
        html = '<p>Before</p><hr><p>After</p>'
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 3)
        self.assertIsInstance(self.document.blocks[0], Parapgraph)
        self.assertIsInstance(self.document.blocks[1], HorizontalRule)
        self.assertIsInstance(self.document.blocks[2], Parapgraph)
    def test_html_entities(self):
        """Test handling HTML entities."""
        html = '<p>Less than: &lt; Greater than: &gt; Ampersand: &amp;</p>'
        self.reader.extract_content(html, self.document)
        paragraph = self.document.blocks[0]
        words = list(paragraph.words())
        # Find the entity words
        word_texts = [word[1].text for word in words]
        self.assertIn('<', word_texts)
        self.assertIn('>', word_texts)
        self.assertIn('&', word_texts)
    def test_nested_elements(self):
        """Test parsing nested HTML elements."""
        html = '''
        <div>
            <h2>Section Title</h2>
            <p>Section content with <strong>important</strong> text.</p>
            <ul>
                <li>List item 1</li>
                <li>List item 2</li>
            </ul>
        </div>
        '''
        self.reader.extract_content(html, self.document)
        # Should have multiple blocks
        self.assertGreater(len(self.document.blocks), 1)
        # Check that we have different types of blocks
        block_types = [type(block).__name__ for block in self.document.blocks]
        self.assertIn('Parapgraph', block_types)  # From div
        self.assertIn('Heading', block_types)
        self.assertIn('HList', block_types)
    def test_empty_elements(self):
        """Test handling empty HTML elements."""
        html = '<p></p><div></div><ul></ul>'
        self.reader.extract_content(html, self.document)
        # Empty elements should still create blocks
        self.assertEqual(len(self.document.blocks), 3)
    def test_whitespace_handling(self):
        """Test proper whitespace handling."""
        html = '''
        <p>  Word1    Word2  
        Word3   </p>
        '''
        self.reader.extract_content(html, self.document)
        paragraph = self.document.blocks[0]
        words = list(paragraph.words())
        # Should normalize whitespace and create separate words
        word_texts = [word[1].text for word in words]
        self.assertEqual(word_texts, ["Word1", "Word2", "Word3"])
    def test_base_url_setting(self):
        """Test setting base URL for link resolution."""
        base_url = "https://example.com/path/"
        self.reader.set_base_url(base_url)
        # The base URL should be passed to the inline handler
        self.assertEqual(self.reader.inline_handler.base_url, base_url)
    def test_complex_document(self):
        """Test parsing a complex HTML document."""
        html = '''
        <!DOCTYPE html>
        <html>
        <head>
            <title>Test Document</title>
            <style>body { font-family: Arial; }</style>
        </head>
        <body>
            <h1>Main Title</h1>
            <p>Introduction paragraph with <em>emphasis</em>.</p>
            <h2>Section 1</h2>
            <p>Content with <a href="link.html">a link</a>.</p>
            <ul>
                <li>Item 1</li>
                <li>Item 2 with <strong>bold text</strong></li>
            </ul>
            <h2>Section 2</h2>
            <blockquote>
                <p>A quoted paragraph.</p>
            </blockquote>
            <table>
                <tr><th>Col1</th><th>Col2</th></tr>
                <tr><td>A</td><td>B</td></tr>
            </table>
        </body>
        </html>
        '''
        self.reader.extract_content(html, self.document)
        # Should have parsed multiple blocks
        self.assertGreater(len(self.document.blocks), 5)
        # Should have different types of content
        block_types = set(type(block).__name__ for block in self.document.blocks)
        expected_types = {'Heading', 'Parapgraph', 'HList', 'Quote', 'Table'}
        self.assertTrue(expected_types.issubset(block_types))
 if __name__ == '__main__':
    unittest.main()
--- a/tests/test_html_style.py
+++ b/tests/test_html_style.py
@ -0,0 +1,182 @@
 """
 Unit tests for HTML style management.
 Tests the HTMLStyleManager class for CSS parsing, style stacks, and font creation.
 """
 import unittest
 from pyWebLayout.io.readers.html_style import HTMLStyleManager
 from pyWebLayout.style import FontStyle, FontWeight, TextDecoration
 class TestHTMLStyleManager(unittest.TestCase):
    """Test cases for HTMLStyleManager."""
    def setUp(self):
        """Set up test fixtures."""
        self.style_manager = HTMLStyleManager()
    def test_initialization(self):
        """Test proper initialization of style manager."""
        style = self.style_manager.get_current_style()
        self.assertEqual(style['font_size'], 12)
        self.assertEqual(style['font_weight'], FontWeight.NORMAL)
        self.assertEqual(style['font_style'], FontStyle.NORMAL)
        self.assertEqual(style['decoration'], TextDecoration.NONE)
        self.assertEqual(style['color'], (0, 0, 0))
        self.assertIsNone(style['background'])
        self.assertEqual(style['language'], 'en_US')
    def test_style_stack_operations(self):
        """Test push and pop operations on style stack."""
        # Initial state
        initial_style = self.style_manager.get_current_style()
        # Push a new style
        new_style = {'font_size': 16, 'font_weight': FontWeight.BOLD}
        self.style_manager.push_style(new_style)
        current_style = self.style_manager.get_current_style()
        self.assertEqual(current_style['font_size'], 16)
        self.assertEqual(current_style['font_weight'], FontWeight.BOLD)
        self.assertEqual(current_style['color'], (0, 0, 0))  # Unchanged
        # Pop the style
        self.style_manager.pop_style()
        restored_style = self.style_manager.get_current_style()
        self.assertEqual(restored_style, initial_style)
    def test_tag_styles(self):
        """Test default styles for HTML tags."""
        h1_style = self.style_manager.get_tag_style('h1')
        self.assertEqual(h1_style['font_size'], 24)
        self.assertEqual(h1_style['font_weight'], FontWeight.BOLD)
        h6_style = self.style_manager.get_tag_style('h6')
        self.assertEqual(h6_style['font_size'], 12)
        self.assertEqual(h6_style['font_weight'], FontWeight.BOLD)
        em_style = self.style_manager.get_tag_style('em')
        self.assertEqual(em_style['font_style'], FontStyle.ITALIC)
        unknown_style = self.style_manager.get_tag_style('unknown')
        self.assertEqual(unknown_style, {})
    def test_inline_style_parsing(self):
        """Test parsing of inline CSS styles."""
        # Test font-size
        style = self.style_manager.parse_inline_style('font-size: 18px')
        self.assertEqual(style['font_size'], 18)
        style = self.style_manager.parse_inline_style('font-size: 14pt')
        self.assertEqual(style['font_size'], 14)
        # Test font-weight
        style = self.style_manager.parse_inline_style('font-weight: bold')
        self.assertEqual(style['font_weight'], FontWeight.BOLD)
        # Test font-style
        style = self.style_manager.parse_inline_style('font-style: italic')
        self.assertEqual(style['font_style'], FontStyle.ITALIC)
        # Test text-decoration
        style = self.style_manager.parse_inline_style('text-decoration: underline')
        self.assertEqual(style['decoration'], TextDecoration.UNDERLINE)
        # Test multiple properties
        style = self.style_manager.parse_inline_style(
            'font-size: 20px; font-weight: bold; color: red'
        )
        self.assertEqual(style['font_size'], 20)
        self.assertEqual(style['font_weight'], FontWeight.BOLD)
        self.assertEqual(style['color'], (255, 0, 0))
    def test_color_parsing(self):
        """Test CSS color parsing."""
        # Named colors
        self.assertEqual(self.style_manager.parse_color('red'), (255, 0, 0))
        self.assertEqual(self.style_manager.parse_color('blue'), (0, 0, 255))
        self.assertEqual(self.style_manager.parse_color('white'), (255, 255, 255))
        self.assertEqual(self.style_manager.parse_color('gray'), (128, 128, 128))
        self.assertEqual(self.style_manager.parse_color('grey'), (128, 128, 128))
        # Hex colors
        self.assertEqual(self.style_manager.parse_color('#ff0000'), (255, 0, 0))
        self.assertEqual(self.style_manager.parse_color('#00ff00'), (0, 255, 0))
        self.assertEqual(self.style_manager.parse_color('#f00'), (255, 0, 0))
        self.assertEqual(self.style_manager.parse_color('#0f0'), (0, 255, 0))
        # RGB colors
        self.assertEqual(self.style_manager.parse_color('rgb(255, 0, 0)'), (255, 0, 0))
        self.assertEqual(self.style_manager.parse_color('rgb(128, 128, 128)'), (128, 128, 128))
        self.assertEqual(self.style_manager.parse_color('rgb( 255 , 255 , 255 )'), (255, 255, 255))
        # RGBA colors (alpha ignored)
        self.assertEqual(self.style_manager.parse_color('rgba(255, 0, 0, 0.5)'), (255, 0, 0))
        # Invalid colors
        self.assertIsNone(self.style_manager.parse_color('invalid'))
        self.assertIsNone(self.style_manager.parse_color('#gg0000'))
        self.assertIsNone(self.style_manager.parse_color('rgb(300, 0, 0)'))  # Invalid values return None
    def test_color_clamping(self):
        """Test that RGB values outside valid range return None."""
        # Values outside 0-255 range should return None
        color = self.style_manager.parse_color('rgb(300, -10, 128)')
        self.assertIsNone(color)  # Invalid values return None
    def test_apply_style_to_element(self):
        """Test combining tag styles with inline styles."""
        # Test h1 with inline style
        attrs = {'style': 'color: blue; font-size: 30px'}
        combined = self.style_manager.apply_style_to_element('h1', attrs)
        # Should have h1 defaults plus inline overrides
        self.assertEqual(combined['font_size'], 30)  # Overridden
        self.assertEqual(combined['font_weight'], FontWeight.BOLD)  # From h1
        self.assertEqual(combined['color'], (0, 0, 255))  # Inline
        # Test without inline styles
        combined = self.style_manager.apply_style_to_element('strong', {})
        self.assertEqual(combined['font_weight'], FontWeight.BOLD)
    def test_reset(self):
        """Test resetting the style manager."""
        # Change the state
        self.style_manager.push_style({'font_size': 20})
        self.style_manager.push_style({'color': (255, 0, 0)})
        # Reset
        self.style_manager.reset()
        # Should be back to initial state
        style = self.style_manager.get_current_style()
        self.assertEqual(style['font_size'], 12)
        self.assertEqual(style['color'], (0, 0, 0))
        self.assertEqual(len(self.style_manager._style_stack), 0)
    def test_font_creation(self):
        """Test Font object creation from current style."""
        # Set some specific styles
        self.style_manager.push_style({
            'font_size': 16,
            'font_weight': FontWeight.BOLD,
            'font_style': FontStyle.ITALIC,
            'decoration': TextDecoration.UNDERLINE,
            'color': (255, 0, 0),
            'background': (255, 255, 0, 255)
        })
        font = self.style_manager.create_font()
        self.assertEqual(font.font_size, 16)
        self.assertEqual(font.weight, FontWeight.BOLD)
        self.assertEqual(font.style, FontStyle.ITALIC)
        self.assertEqual(font.decoration, TextDecoration.UNDERLINE)
        self.assertEqual(font.colour, (255, 0, 0))
        self.assertEqual(font.background, (255, 255, 0, 255))
 if __name__ == '__main__':
    unittest.main()
--- a/tests/test_html_text.py
+++ b/tests/test_html_text.py
@ -0,0 +1,247 @@
 """
 Unit tests for HTML text processing.
 Tests the HTMLTextProcessor class for text buffering, entity handling, and word creation.
 """
 import unittest
 from unittest.mock import Mock, MagicMock
 from pyWebLayout.io.readers.html_text import HTMLTextProcessor
 from pyWebLayout.io.readers.html_style import HTMLStyleManager
 from pyWebLayout.abstract.block import Parapgraph
 from pyWebLayout.abstract.inline import Word
 class TestHTMLTextProcessor(unittest.TestCase):
    """Test cases for HTMLTextProcessor."""
    def setUp(self):
        """Set up test fixtures."""
        self.style_manager = HTMLStyleManager()
        self.text_processor = HTMLTextProcessor(self.style_manager)
        # Create a mock paragraph
        self.mock_paragraph = Mock(spec=Parapgraph)
        self.mock_paragraph.add_word = Mock()
    def test_initialization(self):
        """Test proper initialization of text processor."""
        self.assertEqual(self.text_processor._text_buffer, "")
        self.assertIsNone(self.text_processor._current_paragraph)
        self.assertEqual(self.text_processor._style_manager, self.style_manager)
    def test_add_text(self):
        """Test adding text to buffer."""
        self.text_processor.add_text("Hello")
        self.assertEqual(self.text_processor.get_buffer_content(), "Hello")
        self.text_processor.add_text(" World")
        self.assertEqual(self.text_processor.get_buffer_content(), "Hello World")
    def test_entity_references(self):
        """Test HTML entity reference handling."""
        test_cases = [
            ('lt', '<'),
            ('gt', '>'),
            ('amp', '&'),
            ('quot', '"'),
            ('apos', "'"),
            ('nbsp', ' '),
            ('copy', '©'),
            ('reg', '®'),
            ('trade', '™'),
            ('mdash', '—'),
            ('ndash', '–'),
            ('hellip', '…'),
            ('euro', '€'),
            ('unknown', '&unknown;')  # Unknown entities should be preserved
        ]
        for entity, expected in test_cases:
            with self.subTest(entity=entity):
                self.text_processor.clear_buffer()
                self.text_processor.add_entity_reference(entity)
                self.assertEqual(self.text_processor.get_buffer_content(), expected)
    def test_character_references(self):
        """Test character reference handling."""
        # Decimal character references
        self.text_processor.clear_buffer()
        self.text_processor.add_character_reference('65')  # 'A'
        self.assertEqual(self.text_processor.get_buffer_content(), 'A')
        # Hexadecimal character references
        self.text_processor.clear_buffer()
        self.text_processor.add_character_reference('x41')  # 'A'
        self.assertEqual(self.text_processor.get_buffer_content(), 'A')
        # Unicode character
        self.text_processor.clear_buffer()
        self.text_processor.add_character_reference('8364')  # Euro symbol
        self.assertEqual(self.text_processor.get_buffer_content(), '€')
        # Invalid character reference
        self.text_processor.clear_buffer()
        self.text_processor.add_character_reference('invalid')
        self.assertEqual(self.text_processor.get_buffer_content(), '&#invalid;')
        # Out of range character
        self.text_processor.clear_buffer()
        self.text_processor.add_character_reference('99999999999')
        self.assertTrue(self.text_processor.get_buffer_content().startswith('&#'))
    def test_buffer_operations(self):
        """Test buffer state operations."""
        # Test has_pending_text
        self.assertFalse(self.text_processor.has_pending_text())
        self.text_processor.add_text("Some text")
        self.assertTrue(self.text_processor.has_pending_text())
        # Test clear_buffer
        self.text_processor.clear_buffer()
        self.assertFalse(self.text_processor.has_pending_text())
        self.assertEqual(self.text_processor.get_buffer_content(), "")
        # Test with whitespace only
        self.text_processor.add_text("   \n\t  ")
        self.assertFalse(self.text_processor.has_pending_text())  # Should ignore whitespace
    def test_paragraph_management(self):
        """Test current paragraph setting."""
        # Initially no paragraph
        self.assertIsNone(self.text_processor._current_paragraph)
        # Set paragraph
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        self.assertEqual(self.text_processor._current_paragraph, self.mock_paragraph)
        # Clear paragraph
        self.text_processor.set_current_paragraph(None)
        self.assertIsNone(self.text_processor._current_paragraph)
    def test_flush_text_with_paragraph(self):
        """Test flushing text when paragraph is set."""
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        self.text_processor.add_text("Hello world test")
        # Mock the style manager to return a specific font
        mock_font = Mock()
        self.style_manager.create_font = Mock(return_value=mock_font)
        result = self.text_processor.flush_text()
        # Should return True (text was flushed)
        self.assertTrue(result)
        # Should have created words
        self.assertEqual(self.mock_paragraph.add_word.call_count, 3)  # "Hello", "world", "test"
        # Verify the words were created with correct text
        calls = self.mock_paragraph.add_word.call_args_list
        word_texts = [call[0][0].text for call in calls]
        self.assertEqual(word_texts, ["Hello", "world", "test"])
        # Buffer should be empty after flush
        self.assertEqual(self.text_processor.get_buffer_content(), "")
    def test_flush_text_without_paragraph(self):
        """Test flushing text when no paragraph is set."""
        self.text_processor.add_text("Hello world")
        result = self.text_processor.flush_text()
        # Should return False (no paragraph to flush to)
        self.assertFalse(result)
        # Buffer should be cleared anyway
        self.assertEqual(self.text_processor.get_buffer_content(), "")
    def test_flush_empty_buffer(self):
        """Test flushing when buffer is empty."""
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        result = self.text_processor.flush_text()
        # Should return False (nothing to flush)
        self.assertFalse(result)
        # No words should be added
        self.mock_paragraph.add_word.assert_not_called()
    def test_flush_whitespace_only(self):
        """Test flushing when buffer contains only whitespace."""
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        self.text_processor.add_text("   \n\t  ")
        result = self.text_processor.flush_text()
        # Should return False (no meaningful content)
        self.assertFalse(result)
        # No words should be added
        self.mock_paragraph.add_word.assert_not_called()
    def test_word_creation_with_styling(self):
        """Test that words are created with proper styling."""
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        self.text_processor.add_text("styled text")
        # Set up style manager to return specific font
        mock_font = Mock()
        mock_font.font_size = 16
        mock_font.weight = "bold"
        self.style_manager.create_font = Mock(return_value=mock_font)
        self.text_processor.flush_text()
        # Verify font was created
        self.style_manager.create_font.assert_called()
        # Verify words were created with the font
        calls = self.mock_paragraph.add_word.call_args_list
        for call in calls:
            word = call[0][0]
            self.assertEqual(word.style, mock_font)
    def test_reset(self):
        """Test resetting the text processor."""
        # Set up some state
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        self.text_processor.add_text("Some text")
        # Reset
        self.text_processor.reset()
        # Should be back to initial state
        self.assertEqual(self.text_processor._text_buffer, "")
        self.assertIsNone(self.text_processor._current_paragraph)
    def test_complex_text_processing(self):
        """Test processing text with mixed content."""
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        # Mock font creation
        mock_font = Mock()
        self.style_manager.create_font = Mock(return_value=mock_font)
        # Add mixed content
        self.text_processor.add_text("Hello ")
        self.text_processor.add_entity_reference('amp')
        self.text_processor.add_text(" world")
        self.text_processor.add_character_reference('33')  # '!'
        # Should have "Hello & world!"
        expected_content = "Hello & world!"
        self.assertEqual(self.text_processor.get_buffer_content(), expected_content)
        # Flush and verify words
        self.text_processor.flush_text()
        calls = self.mock_paragraph.add_word.call_args_list
        word_texts = [call[0][0].text for call in calls]
        self.assertEqual(word_texts, ["Hello", "&", "world!"])
 if __name__ == '__main__':
    unittest.main()
--- a/tests/test_runner.py
+++ b/tests/test_runner.py
@ -0,0 +1,84 @@
 """
 Test runner for pyWebLayout.
 This script runs all unit tests and provides a summary of results.
 """
 import unittest
 import sys
 import os
 # Add the project root to the Python path
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 def run_all_tests():
    """Run all unit tests and return the result."""
    # Discover and run all tests
    loader = unittest.TestLoader()
    start_dir = os.path.dirname(os.path.abspath(__file__))
    suite = loader.discover(start_dir, pattern='test_*.py')
    # Run tests with detailed output
    runner = unittest.TextTestRunner(
        verbosity=2,
        stream=sys.stdout,
        descriptions=True,
        failfast=False
    )
    result = runner.run(suite)
    # Print summary
    print("\n" + "="*70)
    print("TEST SUMMARY")
    print("="*70)
    print(f"Tests run: {result.testsRun}")
    print(f"Failures: {len(result.failures)}")
    print(f"Errors: {len(result.errors)}")
    print(f"Skipped: {len(result.skipped) if hasattr(result, 'skipped') else 0}")
    if result.failures:
        print(f"\nFAILURES ({len(result.failures)}):")
        for test, traceback in result.failures:
            print(f"- {test}")
    if result.errors:
        print(f"\nERRORS ({len(result.errors)}):")
        for test, traceback in result.errors:
            print(f"- {test}")
    success = len(result.failures) == 0 and len(result.errors) == 0
    print(f"\nResult: {'PASSED' if success else 'FAILED'}")
    print("="*70)
    return success
 def run_specific_test(test_module):
    """Run a specific test module."""
    loader = unittest.TestLoader()
    suite = loader.loadTestsFromName(test_module)
    runner = unittest.TextTestRunner(verbosity=2)
    result = runner.run(suite)
    return len(result.failures) == 0 and len(result.errors) == 0
 if __name__ == '__main__':
    if len(sys.argv) > 1:
        # Run specific test
        test_name = sys.argv[1]
        if not test_name.startswith('test_'):
            test_name = f'test_{test_name}'
        if not test_name.endswith('.py'):
            test_name = f'{test_name}.py'
        module_name = test_name[:-3]  # Remove .py extension
        success = run_specific_test(module_name)
    else:
        # Run all tests
        success = run_all_tests()
    sys.exit(0 if success else 1)