all tests passing

2025-06-07 15:20:42 +02:00 · 2025-06-07 15:20:42 +02:00 · ad0ac238f3
commit ad0ac238f3
parent ab84691278
15 changed files with 499 additions and 2004 deletions
--- a/pyWebLayout/abstract/block.py
+++ b/pyWebLayout/abstract/block.py
@ -1011,14 +1011,246 @@ class Table(Block):
        elif section.lower() == "footer":
            self._footer_rows.append(row)
        else:  # Default to body
-            self._rows
+            self._rows.append(row)
    def create_row(self, section: str = "body", style=None) -> TableRow:
        """
        Create a new table row and add it to this table.
        Args:
            section: The section to add the row to ("header", "body", or "footer")
            style: Optional style override. If None, inherits from table
        Returns:
            The newly created TableRow object
        """
        return TableRow.create_and_add_to(self, section, style)
    def header_rows(self) -> Iterator[TableRow]:
        """
        Iterate over the header rows in this table.
        Yields:
            Each TableRow in the header section
        """
        for row in self._header_rows:
            yield row
    def body_rows(self) -> Iterator[TableRow]:
        """
        Iterate over the body rows in this table.
        Yields:
            Each TableRow in the body section
        """
        for row in self._rows:
            yield row
    def footer_rows(self) -> Iterator[TableRow]:
        """
        Iterate over the footer rows in this table.
        Yields:
            Each TableRow in the footer section
        """
        for row in self._footer_rows:
            yield row
    def all_rows(self) -> Iterator[Tuple[str, TableRow]]:
        """
        Iterate over all rows in this table with their section labels.
        Yields:
            Tuples of (section, row) for each row in the table
        """
        for row in self._header_rows:
            yield ("header", row)
        for row in self._rows:
            yield ("body", row)
        for row in self._footer_rows:
            yield ("footer", row)
    @property
    def row_count(self) -> Dict[str, int]:
        """Get the row counts by section"""
        return {
            "header": len(self._header_rows),
            "body": len(self._rows),
            "footer": len(self._footer_rows),
            "total": len(self._header_rows) + len(self._rows) + len(self._footer_rows)
        }
 class Image(Block):
    """
    An image element with source, dimensions, and alternative text.
    """
-class Image:
+    def __init__(self, source: str = "", alt_text: str = "", width: Optional[int] = None, height: Optional[int] = None):
        """
        Initialize an image element.
-    pass
+        Args:
            source: The image source URL or path
            alt_text: Alternative text for accessibility
            width: Optional image width in pixels
            height: Optional image height in pixels
        """
        super().__init__(BlockType.IMAGE)
        self._source = source
        self._alt_text = alt_text
        self._width = width
        self._height = height
-class HorizontalRule:
+    @classmethod
    def create_and_add_to(cls, container, source: str = "", alt_text: str = "", 
                         width: Optional[int] = None, height: Optional[int] = None) -> 'Image':
        """
        Create a new Image and add it to a container.
-    pass
+        Args:
            container: The container to add the image to (must have add_block method)
            source: The image source URL or path
            alt_text: Alternative text for accessibility
            width: Optional image width in pixels
            height: Optional image height in pixels
        Returns:
            The newly created Image object
        Raises:
            AttributeError: If the container doesn't have the required add_block method
        """
        # Create the new image
        image = cls(source, alt_text, width, height)
        # Add the image to the container
        if hasattr(container, 'add_block'):
            container.add_block(image)
        else:
            raise AttributeError(f"Container {type(container).__name__} must have an 'add_block' method")
        return image
    @property
    def source(self) -> str:
        """Get the image source"""
        return self._source
    @source.setter
    def source(self, source: str):
        """Set the image source"""
        self._source = source
    @property
    def alt_text(self) -> str:
        """Get the alternative text"""
        return self._alt_text
    @alt_text.setter
    def alt_text(self, alt_text: str):
        """Set the alternative text"""
        self._alt_text = alt_text
    @property
    def width(self) -> Optional[int]:
        """Get the image width"""
        return self._width
    @width.setter
    def width(self, width: Optional[int]):
        """Set the image width"""
        self._width = width
    @property
    def height(self) -> Optional[int]:
        """Get the image height"""
        return self._height
    @height.setter
    def height(self, height: Optional[int]):
        """Set the image height"""
        self._height = height
    def get_dimensions(self) -> Tuple[Optional[int], Optional[int]]:
        """
        Get the image dimensions as a tuple.
        Returns:
            Tuple of (width, height)
        """
        return (self._width, self._height)
    def get_aspect_ratio(self) -> Optional[float]:
        """
        Calculate the aspect ratio of the image.
        Returns:
            The aspect ratio (width/height) or None if either dimension is missing
        """
        if self._width is not None and self._height is not None and self._height > 0:
            return self._width / self._height
        return None
    def calculate_scaled_dimensions(self, max_width: Optional[int] = None, 
                                  max_height: Optional[int] = None) -> Tuple[Optional[int], Optional[int]]:
        """
        Calculate scaled dimensions that fit within the given constraints.
        Args:
            max_width: Maximum allowed width
            max_height: Maximum allowed height
        Returns:
            Tuple of (scaled_width, scaled_height)
        """
        if self._width is None or self._height is None:
            return (self._width, self._height)
        width, height = self._width, self._height
        # Scale down if needed
        if max_width is not None and width > max_width:
            height = int(height * max_width / width)
            width = max_width
        if max_height is not None and height > max_height:
            width = int(width * max_height / height)
            height = max_height
        return (width, height)
 class HorizontalRule(Block):
    """
    A horizontal rule element (hr tag).
    """
    def __init__(self):
        """Initialize a horizontal rule element."""
        super().__init__(BlockType.HORIZONTAL_RULE)
    @classmethod
    def create_and_add_to(cls, container) -> 'HorizontalRule':
        """
        Create a new HorizontalRule and add it to a container.
        Args:
            container: The container to add the horizontal rule to (must have add_block method)
        Returns:
            The newly created HorizontalRule object
        Raises:
            AttributeError: If the container doesn't have the required add_block method
        """
        # Create the new horizontal rule
        hr = cls()
        # Add the horizontal rule to the container
        if hasattr(container, 'add_block'):
            container.add_block(hr)
        else:
            raise AttributeError(f"Container {type(container).__name__} must have an 'add_block' method")
        return hr
--- a/pyWebLayout/abstract/functional.py
+++ b/pyWebLayout/abstract/functional.py
@ -124,6 +124,11 @@ class Button(Interactable):
        """Enable or disable the button"""
        self._enabled = enabled
    @property
    def params(self) -> Dict[str, Any]:
        """Get the button parameters"""
        return self._params
    def execute(self) -> Any:
        """
        Execute the button's callback function if the button is enabled.
--- a/pyWebLayout/abstract/inline.py
+++ b/pyWebLayout/abstract/inline.py
@ -2,6 +2,7 @@ from __future__ import annotations
 from pyWebLayout.base import Queriable
 from pyWebLayout.style import Font
 from typing import Tuple, Union, List, Optional, Dict
 import pyphen
 class Word:
@ -157,9 +158,6 @@ class Word:
        Returns:
            bool: True if the word can be hyphenated, False otherwise.
        """
        # Only import pyphen when needed
        import pyphen
        # Use the provided language or fall back to style language
        lang = language if language else self._style.language
        dic = pyphen.Pyphen(lang=lang)
@ -178,9 +176,6 @@ class Word:
        Returns:
            bool: True if the word was hyphenated, False otherwise.
        """
        # Only import pyphen when needed
        import pyphen
        # Use the provided language or fall back to style language
        lang = language if language else self._style.language
        dic = pyphen.Pyphen(lang=lang)
@ -333,5 +328,58 @@ class FormattedSpan:
 class LineBreak:
    """
    A line break element that forces a new line within text content.
    While this is an inline element that can occur within paragraphs,
    it has block-like properties for consistency with the abstract model.
    """
-    pass
+    def __init__(self):
        """Initialize a line break element."""
        # Import here to avoid circular imports
        from .block import BlockType
        self._block_type = BlockType.LINE_BREAK
        self._parent = None
    @property
    def block_type(self):
        """Get the block type for this line break"""
        return self._block_type
    @property
    def parent(self):
        """Get the parent element containing this line break, if any"""
        return self._parent
    @parent.setter
    def parent(self, parent):
        """Set the parent element"""
        self._parent = parent
    @classmethod
    def create_and_add_to(cls, container) -> 'LineBreak':
        """
        Create a new LineBreak and add it to a container.
        Args:
            container: The container to add the line break to
        Returns:
            The newly created LineBreak object
        """
        # Create the new line break
        line_break = cls()
        # Add the line break to the container if it has an appropriate method
        if hasattr(container, 'add_line_break'):
            container.add_line_break(line_break)
        elif hasattr(container, 'add_element'):
            container.add_element(line_break)
        elif hasattr(container, 'add_word'):
            # Some containers might treat line breaks like words
            container.add_word(line_break)
        else:
            # Set parent relationship manually
            line_break.parent = container
        return line_break
--- a/pyWebLayout/io/init.py
+++ b/pyWebLayout/io/init.py
@ -21,9 +21,11 @@ from pyWebLayout.io.readers.base import BaseReader, MetadataReader, ContentReade
 # Specialized HTML readers
 from pyWebLayout.io.readers.html_metadata import HTMLMetadataReader
 from pyWebLayout.io.readers.html_content import HTMLContentReader
 from pyWebLayout.io.readers.html_resources import HTMLResourceReader
 # HTML extraction parser (the best approach)
 from pyWebLayout.io.readers.html_extraction import parse_html_string as parse_html_extraction
 # Specialized EPUB readers
 from pyWebLayout.io.readers.epub_metadata import EPUBMetadataReader
--- a/pyWebLayout/io/readers/init.py
+++ b/pyWebLayout/io/readers/init.py
@ -11,13 +11,8 @@ from .base import BaseReader, MetadataReader, ContentReader, ResourceReader, Com
 # HTML readers (decomposed)
 from .html import HTMLReader, read_html, read_html_file, parse_html_string
 from .html_metadata import HTMLMetadataReader
 from .html_content import HTMLContentReader
 from .html_resources import HTMLResourceReader
 # HTML processing components (supporting modules)
 from .html_style import HTMLStyleManager
 from .html_text import HTMLTextProcessor
 from .html_elements import BlockElementHandler, ListElementHandler, TableElementHandler, InlineElementHandler
 # EPUB readers
 from .epub_reader import read_epub  # Legacy
@ -29,7 +24,7 @@ __all__ = [
    # HTML readers
    'HTMLReader', 'read_html', 'read_html_file', 'parse_html_string',
-    'HTMLMetadataReader', 'HTMLContentReader', 'HTMLResourceReader',
+    'HTMLMetadataReader', 'HTMLResourceReader',
    # EPUB readers
    'read_epub', 'EPUBMetadataReader',
--- a/pyWebLayout/io/readers/html.py
+++ b/pyWebLayout/io/readers/html.py
@ -1,36 +1,33 @@
 """
 Modern HTML reader for pyWebLayout.
-This module provides a decomposed HTML reader that uses specialized
+This module provides an HTML reader that uses the html_extraction module
-readers for metadata, content, and resources, following the pattern
+for clean, handler-based parsing using BeautifulSoup.
 established in the abstract module.
 """
 import os
 from typing import Union, Optional
 from pyWebLayout.abstract.document import Document
-from pyWebLayout.io.readers.base import CompositeReader
+from pyWebLayout.io.readers.base import BaseReader
 from pyWebLayout.io.readers.html_metadata import HTMLMetadataReader
 from pyWebLayout.io.readers.html_content import HTMLContentReader
 from pyWebLayout.io.readers.html_resources import HTMLResourceReader
 from pyWebLayout.io.readers.html_extraction import parse_html_string
 from pyWebLayout.style import Font
-class HTMLReader(CompositeReader):
+class HTMLReader(BaseReader):
    """
-    Modern HTML reader using decomposed architecture.
+    Modern HTML reader using the html_extraction parser.
-    This reader combines specialized readers for metadata, content,
+    This reader uses the clean, handler-based architecture from html_extraction.py
-    and resources to provide a complete HTML parsing solution.
+    for parsing HTML content into pyWebLayout's abstract document structure.
    """
    def __init__(self):
-        """Initialize the HTML reader with all specialized readers."""
+        """Initialize the HTML reader."""
        super().__init__()
-        
+        self._metadata_reader = HTMLMetadataReader()
-        # Set up specialized readers
+        self._resource_reader = HTMLResourceReader()
        self.set_metadata_reader(HTMLMetadataReader())
        self.set_content_reader(HTMLContentReader())
        self.set_resource_reader(HTMLResourceReader())
    def can_read(self, source: Union[str, bytes]) -> bool:
        """
@ -76,6 +73,7 @@ class HTMLReader(CompositeReader):
                - encoding: Character encoding (default: 'utf-8')
                - extract_metadata: Whether to extract metadata (default: True)
                - extract_resources: Whether to extract resources (default: True)
                - base_font: Base font for styling (default: None)
        Returns:
            The parsed Document
@ -85,6 +83,7 @@ class HTMLReader(CompositeReader):
        encoding = options.get('encoding', 'utf-8')
        extract_metadata = options.get('extract_metadata', True)
        extract_resources = options.get('extract_resources', True)
        base_font = options.get('base_font')
        # Read the HTML content
        html_content = self._read_html_content(source, encoding)
@ -93,10 +92,6 @@ class HTMLReader(CompositeReader):
        if not base_url and isinstance(source, str) and os.path.isfile(source):
            base_url = f"file://{os.path.dirname(os.path.abspath(source))}/"
        # Set base URL in content reader
        if self._content_reader and hasattr(self._content_reader, 'set_base_url'):
            self._content_reader.set_base_url(base_url)
        # Create a new document
        document = Document()
@ -104,9 +99,10 @@ class HTMLReader(CompositeReader):
        if extract_metadata and self._metadata_reader:
            self._metadata_reader.extract_metadata(html_content, document)
-        # Extract content
+        # Parse content using html_extraction
-        if self._content_reader:
+        blocks = parse_html_string(html_content, base_font)
-            self._content_reader.extract_content(html_content, document)
+        for block in blocks:
            document.add_block(block)
        # Extract resources if enabled
        if extract_resources and self._resource_reader:
--- a/pyWebLayout/io/readers/html_content.py
+++ b/pyWebLayout/io/readers/html_content.py
@ -1,269 +0,0 @@
 """
 Modern HTML content reader for pyWebLayout.
 This module provides a decomposed HTML content reader that uses specialized
 handlers and managers for different aspects of HTML parsing.
 """
 from html.parser import HTMLParser as BaseHTMLParser
 from typing import Dict, List, Optional, Tuple, Union, Any
 from pyWebLayout.abstract.document import Document
 from pyWebLayout.io.readers.base import ContentReader
 from pyWebLayout.io.readers.html_style import HTMLStyleManager
 from pyWebLayout.io.readers.html_text import HTMLTextProcessor
 from pyWebLayout.io.readers.html_elements import (
    BlockElementHandler, ListElementHandler, TableElementHandler, InlineElementHandler
 )
 class HTMLContentReader(ContentReader, BaseHTMLParser):
    """
    Modern HTML content reader using decomposed architecture.
    This class orchestrates specialized handlers to parse HTML content
    and convert it to pyWebLayout's abstract document model.
    """
    def __init__(self):
        """Initialize the HTML content reader."""
        BaseHTMLParser.__init__(self)
        # Initialize managers and processors
        self.style_manager = HTMLStyleManager()
        self.text_processor = HTMLTextProcessor(self.style_manager)
        # Initialize element handlers
        self.block_handler = BlockElementHandler(self.style_manager, self.text_processor)
        self.list_handler = ListElementHandler(self.text_processor)
        self.table_handler = TableElementHandler(self.text_processor)
        self.inline_handler = InlineElementHandler(self.text_processor)
        # Document and parsing state
        self._document: Optional[Document] = None
        self._in_head = False
        self._in_script = False
        self._in_style = False
    def extract_content(self, html_content: str, document: Document) -> Any:
        """
        Extract content from HTML.
        Args:
            html_content: The HTML content to parse
            document: The document to populate with content
        Returns:
            The document with populated content
        """
        self._document = document
        self._reset_state()
        # Parse the HTML content
        self.feed(html_content)
        # Flush any remaining text
        self.text_processor.flush_text()
        return document
    def set_base_url(self, base_url: str):
        """Set the base URL for resolving relative links."""
        self.inline_handler.set_base_url(base_url)
    def _reset_state(self):
        """Reset all parser state for new content."""
        # Reset managers and processors
        self.style_manager.reset()
        self.text_processor.reset()
        # Reset element handlers
        self.block_handler.reset()
        self.list_handler.reset()
        self.table_handler.reset()
        self.inline_handler.reset()
        # Reset parser flags
        self._in_head = False
        self._in_script = False
        self._in_style = False
    def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
        """Handle the start of an HTML tag."""
        tag = tag.lower()
        attrs_dict = dict(attrs)
        # Skip content in head, script, style (except body)
        if self._should_skip_content(tag):
            return
        # Handle special section markers
        if self._handle_special_sections_start(tag):
            return
        # Apply styles for this element
        style = self.style_manager.apply_style_to_element(tag, attrs_dict)
        self.style_manager.push_style(style)
        # Delegate to appropriate handler
        self._delegate_start_tag(tag, attrs_dict)
    def handle_endtag(self, tag: str):
        """Handle the end of an HTML tag."""
        tag = tag.lower()
        # Handle special section markers
        if self._handle_special_sections_end(tag):
            return
        # Skip content in head, script, style
        if self._in_head or self._in_script or self._in_style:
            return
        # Flush any accumulated text
        self.text_processor.flush_text()
        # Delegate to appropriate handler
        self._delegate_end_tag(tag)
        # Pop style regardless of tag
        self.style_manager.pop_style()
    def handle_data(self, data: str):
        """Handle text data."""
        if self._in_head or self._in_script or self._in_style:
            return
        self.text_processor.add_text(data)
    def handle_entityref(self, name: str):
        """Handle an HTML entity reference."""
        if self._in_head or self._in_script or self._in_style:
            return
        self.text_processor.add_entity_reference(name)
    def handle_charref(self, name: str):
        """Handle a character reference."""
        if self._in_head or self._in_script or self._in_style:
            return
        self.text_processor.add_character_reference(name)
    def _should_skip_content(self, tag: str) -> bool:
        """Check if we should skip content based on current state."""
        if self._in_head or self._in_script or self._in_style:
            if tag in ('head', 'script', 'style'):
                return False  # Let special section handlers deal with these
            if tag != 'body':
                return True
        return False
    def _handle_special_sections_start(self, tag: str) -> bool:
        """Handle special section start tags. Returns True if handled."""
        if tag == 'head':
            self._in_head = True
            return True
        elif tag == 'body':
            self._in_head = False
            return True
        elif tag == 'script':
            self._in_script = True
            return True
        elif tag == 'style':
            self._in_style = True
            return True
        return False
    def _handle_special_sections_end(self, tag: str) -> bool:
        """Handle special section end tags. Returns True if handled."""
        if tag == 'head':
            self._in_head = False
            self.style_manager.pop_style()
            return True
        elif tag == 'script':
            self._in_script = False
            self.style_manager.pop_style()
            return True
        elif tag == 'style':
            self._in_style = False
            self.style_manager.pop_style()
            return True
        return False
    def _delegate_start_tag(self, tag: str, attrs: Dict[str, str]):
        """Delegate start tag handling to appropriate handler."""
        # Block elements
        if tag == 'p':
            self.block_handler.handle_paragraph_start(self._document)
        elif tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
            self.block_handler.handle_heading_start(tag, self._document)
        elif tag == 'div':
            self.block_handler.handle_div_start(self._document)
        elif tag == 'blockquote':
            self.block_handler.handle_blockquote_start(self._document)
        elif tag == 'pre':
            self.block_handler.handle_pre_start(self._document)
        elif tag == 'code':
            self.block_handler.handle_code_start(attrs, self._document)
        # List elements
        elif tag in ('ul', 'ol', 'dl'):
            self.list_handler.handle_list_start(tag, self.block_handler, self._document)
        elif tag == 'li':
            self.list_handler.handle_list_item_start(self.block_handler)
        elif tag in ('dt', 'dd'):
            self.list_handler.handle_definition_start(tag, self.block_handler)
        # Table elements
        elif tag == 'table':
            self.table_handler.handle_table_start(attrs, self.block_handler, self._document)
        elif tag in ('thead', 'tbody', 'tfoot'):
            self.table_handler.handle_table_section_start(tag)
        elif tag == 'tr':
            self.table_handler.handle_table_row_start()
        elif tag in ('td', 'th'):
            self.table_handler.handle_table_cell_start(tag, attrs, self.block_handler)
        # Inline elements
        elif tag == 'a':
            self.inline_handler.handle_link_start(attrs)
        elif tag == 'img':
            self.inline_handler.handle_image(attrs, self.block_handler, self._document)
        elif tag == 'br':
            self.inline_handler.handle_line_break(self.block_handler)
        elif tag == 'hr':
            self.inline_handler.handle_horizontal_rule(self.block_handler, self._document)
        # Style-only elements (no special handling needed, just styling)
        elif tag in ('b', 'strong', 'i', 'em', 'u', 'span'):
            pass  # Styles are already applied by style manager
    def _delegate_end_tag(self, tag: str):
        """Delegate end tag handling to appropriate handler."""
        # Block elements
        if tag in ('p', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'pre', 'code'):
            self.block_handler.handle_block_end()
        # List elements
        elif tag in ('ul', 'ol', 'dl'):
            self.list_handler.handle_list_end(self.block_handler)
        elif tag in ('li', 'dt', 'dd'):
            self.list_handler.handle_list_item_end(self.block_handler)
        # Table elements
        elif tag == 'table':
            self.table_handler.handle_table_end(self.block_handler)
        elif tag in ('thead', 'tbody', 'tfoot'):
            self.table_handler.handle_table_section_end()
        elif tag == 'tr':
            self.table_handler.handle_table_row_end()
        elif tag in ('td', 'th'):
            self.table_handler.handle_table_cell_end(self.block_handler)
        # Inline elements
        elif tag == 'a':
            self.inline_handler.handle_link_end()
        # Style-only elements (no special handling needed)
        elif tag in ('b', 'strong', 'i', 'em', 'u', 'span'):
            pass  # Styles are handled by style manager
--- a/pyWebLayout/io/readers/html_elements.py
+++ b/pyWebLayout/io/readers/html_elements.py
@ -1,473 +0,0 @@
 """
 HTML element handlers for pyWebLayout.
 This module provides specialized handlers for different types of HTML elements,
 using composition and delegation to handle specific element types.
 """
 from typing import Dict, List, Optional, Any
 import urllib.parse
 from pyWebLayout.abstract.document import Document
 from pyWebLayout.abstract.block import (
    Block, Paragraph, Heading, HeadingLevel, Quote, CodeBlock,
    HList, ListStyle, ListItem, Table, TableRow, TableCell, 
    HorizontalRule, Image
 )
 from pyWebLayout.abstract.inline import LineBreak
 from pyWebLayout.abstract.functional import Link, LinkType
 from pyWebLayout.io.readers.html_style import HTMLStyleManager
 from pyWebLayout.io.readers.html_text import HTMLTextProcessor
 class BlockElementHandler:
    """Handles block-level HTML elements like paragraphs, headings, divs."""
    def __init__(self, style_manager: HTMLStyleManager, text_processor: HTMLTextProcessor):
        self.style_manager = style_manager
        self.text_processor = text_processor
        self.block_stack: List[Block] = []
        self.current_block: Optional[Block] = None
        self.current_paragraph: Optional[Paragraph] = None
    def reset(self):
        """Reset the handler state."""
        self.block_stack = []
        self.current_block = None
        self.current_paragraph = None
    def add_block_to_document_or_parent(self, block: Block, document: Document):
        """Add a block to the document or current parent block."""
        if self.current_block and hasattr(self.current_block, 'add_block'):
            self.current_block.add_block(block)
        else:
            document.add_block(block)
    def handle_paragraph_start(self, document: Document):
        """Handle the start of a paragraph element."""
        self.text_processor.flush_text()
        paragraph = Paragraph()
        self.add_block_to_document_or_parent(paragraph, document)
        self.block_stack.append(paragraph)
        self.current_block = paragraph
        self.current_paragraph = paragraph
        self.text_processor.set_current_paragraph(paragraph)
    def handle_heading_start(self, tag: str, document: Document):
        """Handle the start of a heading element."""
        self.text_processor.flush_text()
        level_map = {
            'h1': HeadingLevel.H1, 'h2': HeadingLevel.H2, 'h3': HeadingLevel.H3,
            'h4': HeadingLevel.H4, 'h5': HeadingLevel.H5, 'h6': HeadingLevel.H6
        }
        heading = Heading(level=level_map[tag])
        self.add_block_to_document_or_parent(heading, document)
        self.block_stack.append(heading)
        self.current_block = heading
        self.current_paragraph = heading  # Heading inherits from Paragraph
        self.text_processor.set_current_paragraph(heading)
    def handle_div_start(self, document: Document):
        """Handle the start of a div element."""
        self.text_processor.flush_text()
        div_para = Paragraph()
        self.add_block_to_document_or_parent(div_para, document)
        self.block_stack.append(div_para)
        self.current_block = div_para
        self.current_paragraph = div_para
        self.text_processor.set_current_paragraph(div_para)
    def handle_blockquote_start(self, document: Document):
        """Handle the start of a blockquote element."""
        self.text_processor.flush_text()
        quote = Quote()
        self.add_block_to_document_or_parent(quote, document)
        self.block_stack.append(quote)
        self.current_block = quote
        self.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
    def handle_pre_start(self, document: Document):
        """Handle the start of a pre element."""
        self.text_processor.flush_text()
        pre_para = Paragraph()
        self.add_block_to_document_or_parent(pre_para, document)
        self.block_stack.append(pre_para)
        self.current_block = pre_para
        self.current_paragraph = pre_para
        self.text_processor.set_current_paragraph(pre_para)
    def handle_code_start(self, attrs: Dict[str, str], document: Document):
        """Handle the start of a code element."""
        # If we're inside a pre, replace the paragraph with a code block
        if self.block_stack and isinstance(self.block_stack[-1], Paragraph):
            pre_para = self.block_stack.pop()
            # Get the language from class if specified
            language = ""
            if 'class' in attrs:
                class_attr = attrs['class']
                if class_attr.startswith('language-'):
                    language = class_attr[9:]
            code_block = CodeBlock(language=language)
            # Replace the paragraph with the code block in its parent
            if pre_para.parent:
                parent = pre_para.parent
                if hasattr(parent, '_blocks'):
                    for i, block in enumerate(parent._blocks):
                        if block == pre_para:
                            parent._blocks[i] = code_block
                            code_block.parent = parent
                            break
            else:
                # Replace in document blocks
                for i, block in enumerate(document.blocks):
                    if block == pre_para:
                        document.blocks[i] = code_block
                        break
            self.block_stack.append(code_block)
            self.current_block = code_block
            self.current_paragraph = None
            self.text_processor.set_current_paragraph(None)
    def handle_block_end(self):
        """Handle the end of a block element."""
        if self.block_stack:
            self.block_stack.pop()
        if self.block_stack:
            self.current_block = self.block_stack[-1]
            # Update current paragraph based on block type
            if isinstance(self.current_block, Paragraph):
                self.current_paragraph = self.current_block
            else:
                self.current_paragraph = None
        else:
            self.current_block = None
            self.current_paragraph = None
        self.text_processor.set_current_paragraph(self.current_paragraph)
 class ListElementHandler:
    """Handles list-related HTML elements (ul, ol, dl, li, dt, dd)."""
    def __init__(self, text_processor: HTMLTextProcessor):
        self.text_processor = text_processor
        self.list_stack: List[HList] = []
    def reset(self):
        """Reset the handler state."""
        self.list_stack = []
    def handle_list_start(self, tag: str, block_handler: BlockElementHandler, document: Document):
        """Handle the start of a list element."""
        self.text_processor.flush_text()
        style_map = {
            'ul': ListStyle.UNORDERED,
            'ol': ListStyle.ORDERED,
            'dl': ListStyle.DEFINITION
        }
        list_block = HList(style=style_map[tag])
        block_handler.add_block_to_document_or_parent(list_block, document)
        block_handler.block_stack.append(list_block)
        self.list_stack.append(list_block)
        block_handler.current_block = list_block
        block_handler.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
    def handle_list_item_start(self, block_handler: BlockElementHandler):
        """Handle the start of a list item."""
        if not self.list_stack:
            return
        self.text_processor.flush_text()
        list_item = ListItem()
        current_list = self.list_stack[-1]
        current_list.add_item(list_item)
        block_handler.block_stack.append(list_item)
        block_handler.current_block = list_item
        # Create a paragraph for the list item content
        item_para = Paragraph()
        list_item.add_block(item_para)
        block_handler.current_paragraph = item_para
        self.text_processor.set_current_paragraph(item_para)
    def handle_definition_start(self, tag: str, block_handler: BlockElementHandler):
        """Handle the start of definition terms or descriptions."""
        if not self.list_stack or self.list_stack[-1].style != ListStyle.DEFINITION:
            return
        self.text_processor.flush_text()
        current_list = self.list_stack[-1]
        if tag == 'dt':
            list_item = ListItem(term="")
            current_list.add_item(list_item)
            block_handler.block_stack.append(list_item)
            block_handler.current_block = list_item
            term_para = Paragraph()
            list_item.add_block(term_para)
            block_handler.current_paragraph = term_para
            self.text_processor.set_current_paragraph(term_para)
        elif tag == 'dd':
            if current_list._items:
                list_item = current_list._items[-1]
                desc_para = Paragraph()
                list_item.add_block(desc_para)
                block_handler.current_paragraph = desc_para
                self.text_processor.set_current_paragraph(desc_para)
    def handle_list_end(self, block_handler: BlockElementHandler):
        """Handle the end of a list."""
        if block_handler.block_stack:
            block_handler.block_stack.pop()
        if self.list_stack:
            self.list_stack.pop()
        if block_handler.block_stack:
            block_handler.current_block = block_handler.block_stack[-1]
        else:
            block_handler.current_block = None
        block_handler.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
    def handle_list_item_end(self, block_handler: BlockElementHandler):
        """Handle the end of a list item."""
        if block_handler.block_stack:
            block_handler.block_stack.pop()
        if block_handler.block_stack:
            block_handler.current_block = block_handler.block_stack[-1]
        else:
            block_handler.current_block = None
        block_handler.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
 class TableElementHandler:
    """Handles table-related HTML elements (table, tr, td, th, thead, tbody, tfoot)."""
    def __init__(self, text_processor: HTMLTextProcessor):
        self.text_processor = text_processor
        self.table_stack: List[Table] = []
        self.current_table_row: Optional[TableRow] = None
        self.current_table_section = "body"
    def reset(self):
        """Reset the handler state."""
        self.table_stack = []
        self.current_table_row = None
        self.current_table_section = "body"
    def handle_table_start(self, attrs: Dict[str, str], block_handler: BlockElementHandler, document: Document):
        """Handle the start of a table element."""
        self.text_processor.flush_text()
        caption = attrs.get('summary')
        table = Table(caption=caption)
        block_handler.add_block_to_document_or_parent(table, document)
        block_handler.block_stack.append(table)
        self.table_stack.append(table)
        block_handler.current_block = table
        block_handler.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
    def handle_table_section_start(self, tag: str):
        """Handle the start of a table section."""
        self.current_table_section = tag
    def handle_table_row_start(self):
        """Handle the start of a table row."""
        if not self.table_stack:
            return
        self.text_processor.flush_text()
        row = TableRow()
        current_table = self.table_stack[-1]
        section = self.current_table_section
        if section == 'thead':
            section = "header"
        elif section == 'tfoot':
            section = "footer"
        else:
            section = "body"
        current_table.add_row(row, section=section)
        self.current_table_row = row
    def handle_table_cell_start(self, tag: str, attrs: Dict[str, str], block_handler: BlockElementHandler):
        """Handle the start of a table cell."""
        if not self.current_table_row:
            return
        self.text_processor.flush_text()
        # Parse attributes
        try:
            colspan = int(attrs.get('colspan', 1))
            rowspan = int(attrs.get('rowspan', 1))
        except ValueError:
            colspan, rowspan = 1, 1
        is_header = (tag == 'th')
        cell = TableCell(is_header=is_header, colspan=colspan, rowspan=rowspan)
        self.current_table_row.add_cell(cell)
        block_handler.block_stack.append(cell)
        block_handler.current_block = cell
        # Create a paragraph for the cell content
        cell_para = Paragraph()
        cell.add_block(cell_para)
        block_handler.current_paragraph = cell_para
        self.text_processor.set_current_paragraph(cell_para)
    def handle_table_end(self, block_handler: BlockElementHandler):
        """Handle the end of a table."""
        if block_handler.block_stack:
            block_handler.block_stack.pop()
        if self.table_stack:
            self.table_stack.pop()
        if block_handler.block_stack:
            block_handler.current_block = block_handler.block_stack[-1]
        else:
            block_handler.current_block = None
        block_handler.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
        self.current_table_row = None
        self.current_table_section = "body"
    def handle_table_section_end(self):
        """Handle the end of a table section."""
        self.current_table_section = "body"
    def handle_table_row_end(self):
        """Handle the end of a table row."""
        self.current_table_row = None
    def handle_table_cell_end(self, block_handler: BlockElementHandler):
        """Handle the end of a table cell."""
        if block_handler.block_stack:
            block_handler.block_stack.pop()
        if block_handler.block_stack:
            block_handler.current_block = block_handler.block_stack[-1]
        else:
            block_handler.current_block = None
        block_handler.current_paragraph = None
        self.text_processor.set_current_paragraph(None)
 class InlineElementHandler:
    """Handles inline and special HTML elements (a, img, br, hr)."""
    def __init__(self, text_processor: HTMLTextProcessor, base_url: Optional[str] = None):
        self.text_processor = text_processor
        self.base_url = base_url
        self.in_link = False
        self.current_link: Optional[Link] = None
    def reset(self):
        """Reset the handler state."""
        self.in_link = False
        self.current_link = None
    def set_base_url(self, base_url: Optional[str]):
        """Set the base URL for resolving relative links."""
        self.base_url = base_url
    def handle_link_start(self, attrs: Dict[str, str]):
        """Handle the start of a link element."""
        self.text_processor.flush_text()
        href = attrs.get('href', '')
        title = attrs.get('title', '')
        # Determine link type
        link_type = LinkType.INTERNAL
        if href.startswith('http://') or href.startswith('https://'):
            link_type = LinkType.EXTERNAL
        elif href.startswith('javascript:'):
            link_type = LinkType.FUNCTION
        elif href.startswith('api:'):
            link_type = LinkType.API
            href = href[4:]
        # Resolve relative URLs
        if self.base_url and not href.startswith(('http://', 'https://', 'javascript:', 'api:', '#')):
            href = urllib.parse.urljoin(self.base_url, href)
        self.current_link = Link(
            location=href,
            link_type=link_type,
            title=title if title else None
        )
        self.in_link = True
    def handle_link_end(self):
        """Handle the end of a link element."""
        self.in_link = False
        self.current_link = None
    def handle_image(self, attrs: Dict[str, str], block_handler: BlockElementHandler, document: Document):
        """Handle an image element."""
        src = attrs.get('src', '')
        alt = attrs.get('alt', '')
        # Parse dimensions
        width = height = None
        try:
            if 'width' in attrs:
                width = int(attrs['width'])
            if 'height' in attrs:
                height = int(attrs['height'])
        except ValueError:
            pass
        # Resolve relative URLs
        if self.base_url and not src.startswith(('http://', 'https://')):
            src = urllib.parse.urljoin(self.base_url, src)
        image = Image(source=src, alt_text=alt, width=width, height=height)
        block_handler.add_block_to_document_or_parent(image, document)
    def handle_line_break(self, block_handler: BlockElementHandler):
        """Handle a line break element."""
        if block_handler.current_paragraph:
            line_break = LineBreak()
            if hasattr(block_handler.current_paragraph, 'add_block'):
                block_handler.current_paragraph.add_block(line_break)
        self.text_processor.flush_text()
    def handle_horizontal_rule(self, block_handler: BlockElementHandler, document: Document):
        """Handle a horizontal rule element."""
        self.text_processor.flush_text()
        hr = HorizontalRule()
        block_handler.add_block_to_document_or_parent(hr, document)
--- a/pyWebLayout/io/readers/html_extraction.py
+++ b/pyWebLayout/io/readers/html_extraction.py
@ -12,7 +12,8 @@ from bs4 import BeautifulSoup, Tag, NavigableString
 from pyWebLayout.abstract.inline import Word, FormattedSpan
 from pyWebLayout.abstract.block import (
    Block, Paragraph, Heading, HeadingLevel, Quote, CodeBlock, 
-    HList, ListItem, ListStyle, Table, TableRow, TableCell
+    HList, ListItem, ListStyle, Table, TableRow, TableCell,
    HorizontalRule, Image
 )
 from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration
@ -576,11 +577,9 @@ def table_header_cell_handler(element: Tag, context: StyleContext) -> TableCell:
    return cell
-def horizontal_rule_handler(element: Tag, context: StyleContext) -> Block:
+def horizontal_rule_handler(element: Tag, context: StyleContext) -> HorizontalRule:
    """Handle <hr> elements."""
-    # TODO: Create a specific HorizontalRule block type
+    return HorizontalRule()
    # For now, return an empty paragraph
    return Paragraph(context.font)
 def line_break_handler(element: Tag, context: StyleContext) -> None:
@ -589,18 +588,22 @@ def line_break_handler(element: Tag, context: StyleContext) -> None:
    return None
-def image_handler(element: Tag, context: StyleContext) -> Block:
+def image_handler(element: Tag, context: StyleContext) -> Image:
    """Handle <img> elements."""
-    # TODO: Create Image block type
+    src = context.element_attributes.get('src', '')
    # For now, return empty paragraph with alt text if available
    paragraph = Paragraph(context.font)
    alt_text = context.element_attributes.get('alt', '')
-    if alt_text:
+    
-        words = alt_text.split()
+    # Parse dimensions if provided
-        for word_text in words:
+    width = height = None
-            if word_text:
+    try:
-                paragraph.add_word(Word(word_text, context.font))
+        if 'width' in context.element_attributes:
-    return paragraph
+            width = int(context.element_attributes['width'])
        if 'height' in context.element_attributes:
            height = int(context.element_attributes['height'])
    except ValueError:
        pass
    return Image(source=src, alt_text=alt_text, width=width, height=height)
 def ignore_handler(element: Tag, context: StyleContext) -> None:
--- a/pyWebLayout/io/readers/html_style.py
+++ b/pyWebLayout/io/readers/html_style.py
@ -1,281 +0,0 @@
 """
 HTML style management for pyWebLayout.
 This module provides specialized functionality for handling CSS styles,
 style stacks, and style parsing in HTML documents.
 """
 from typing import Dict, List, Any, Optional, Tuple
 import re
 from pyWebLayout.style import Font, FontStyle, FontWeight, TextDecoration
 class HTMLStyleManager:
    """
    Manages CSS styles and style stacks during HTML parsing.
    This class handles style parsing, style inheritance, and maintains
    the style stack for proper style nesting.
    """
    def __init__(self):
        """Initialize the style manager."""
        self._style_stack: List[Dict[str, Any]] = []
        self._current_style = self._get_default_style()
    def _get_default_style(self) -> Dict[str, Any]:
        """Get the default style settings."""
        return {
            'font_size': 12,
            'font_weight': FontWeight.NORMAL,
            'font_style': FontStyle.NORMAL,
            'decoration': TextDecoration.NONE,
            'color': (0, 0, 0),
            'background': None,
            'language': 'en_US'
        }
    def reset(self):
        """Reset the style manager to initial state."""
        self._style_stack = []
        self._current_style = self._get_default_style()
    def push_style(self, style: Dict[str, Any]):
        """
        Push a new style onto the style stack.
        Args:
            style: The style to push
        """
        # Save the current style
        self._style_stack.append(self._current_style.copy())
        # Apply the new style
        for key, value in style.items():
            self._current_style[key] = value
    def pop_style(self):
        """Pop a style from the style stack."""
        if self._style_stack:
            self._current_style = self._style_stack.pop()
    def get_current_style(self) -> Dict[str, Any]:
        """Get the current style."""
        return self._current_style.copy()
    def get_tag_style(self, tag: str) -> Dict[str, Any]:
        """
        Get the default style for a tag.
        Args:
            tag: The tag name
        Returns:
            A dictionary of style properties
        """
        tag_styles = {
            'h1': {'font_size': 24, 'font_weight': FontWeight.BOLD},
            'h2': {'font_size': 20, 'font_weight': FontWeight.BOLD},
            'h3': {'font_size': 18, 'font_weight': FontWeight.BOLD},
            'h4': {'font_size': 16, 'font_weight': FontWeight.BOLD},
            'h5': {'font_size': 14, 'font_weight': FontWeight.BOLD},
            'h6': {'font_size': 12, 'font_weight': FontWeight.BOLD},
            'b': {'font_weight': FontWeight.BOLD},
            'strong': {'font_weight': FontWeight.BOLD},
            'i': {'font_style': FontStyle.ITALIC},
            'em': {'font_style': FontStyle.ITALIC},
            'u': {'decoration': TextDecoration.UNDERLINE},
            'a': {'decoration': TextDecoration.UNDERLINE, 'color': (0, 0, 255)},
            'code': {'font_family': 'monospace', 'background': (240, 240, 240, 255)},
            'pre': {'font_family': 'monospace'},
        }
        return tag_styles.get(tag, {})
    def create_font(self) -> Font:
        """
        Create a Font object from the current style.
        Returns:
            Font: A font object with the current style settings
        """
        return Font(
            font_size=self._current_style['font_size'],
            colour=self._current_style['color'],
            weight=self._current_style['font_weight'],
            style=self._current_style['font_style'],
            decoration=self._current_style['decoration'],
            background=self._current_style['background'],
            langauge=self._current_style['language']
        )
    def parse_inline_style(self, style_str: str) -> Dict[str, Any]:
        """
        Parse inline CSS style string.
        Args:
            style_str: CSS style string
        Returns:
            Dictionary of style properties
        """
        if not style_str:
            return {}
        style_dict = {}
        declarations = [d.strip() for d in style_str.split(';') if d.strip()]
        for declaration in declarations:
            parts = declaration.split(':', 1)
            if len(parts) != 2:
                continue
            prop = parts[0].strip().lower()
            value = parts[1].strip()
            # Handle specific properties
            if prop == 'font-size':
                if value.endswith('px'):
                    try:
                        size = int(value[:-2])
                        style_dict['font_size'] = size
                    except ValueError:
                        pass
                elif value.endswith('pt'):
                    try:
                        size = int(value[:-2])
                        style_dict['font_size'] = size
                    except ValueError:
                        pass
            elif prop == 'font-weight':
                if value == 'bold':
                    style_dict['font_weight'] = FontWeight.BOLD
                elif value == 'normal':
                    style_dict['font_weight'] = FontWeight.NORMAL
            elif prop == 'font-style':
                if value == 'italic':
                    style_dict['font_style'] = FontStyle.ITALIC
                elif value == 'normal':
                    style_dict['font_style'] = FontStyle.NORMAL
            elif prop == 'text-decoration':
                if value == 'underline':
                    style_dict['decoration'] = TextDecoration.UNDERLINE
                elif value == 'line-through':
                    style_dict['decoration'] = TextDecoration.STRIKETHROUGH
                elif value == 'none':
                    style_dict['decoration'] = TextDecoration.NONE
            elif prop == 'color':
                color = self.parse_color(value)
                if color:
                    style_dict['color'] = color
            elif prop == 'background-color':
                color = self.parse_color(value)
                if color:
                    style_dict['background'] = color + (255,)
        return style_dict
    def parse_color(self, color_str: str) -> Optional[Tuple[int, int, int]]:
        """
        Parse a CSS color string.
        Args:
            color_str: CSS color string
        Returns:
            RGB tuple or None if parsing fails
        """
        # Named colors
        color_map = {
            'black': (0, 0, 0),
            'white': (255, 255, 255),
            'red': (255, 0, 0),
            'green': (0, 128, 0),
            'blue': (0, 0, 255),
            'yellow': (255, 255, 0),
            'cyan': (0, 255, 255),
            'magenta': (255, 0, 255),
            'gray': (128, 128, 128),
            'grey': (128, 128, 128),
            'silver': (192, 192, 192),
            'maroon': (128, 0, 0),
            'olive': (128, 128, 0),
            'navy': (0, 0, 128),
            'purple': (128, 0, 128),
            'teal': (0, 128, 128),
            'lime': (0, 255, 0),
            'aqua': (0, 255, 255),
            'fuchsia': (255, 0, 255),
        }
        # Check for named color
        color_str = color_str.lower().strip()
        if color_str in color_map:
            return color_map[color_str]
        # Check for hex color
        if color_str.startswith('#'):
            try:
                if len(color_str) == 4:  # #RGB
                    r = int(color_str[1] + color_str[1], 16)
                    g = int(color_str[2] + color_str[2], 16)
                    b = int(color_str[3] + color_str[3], 16)
                    return (r, g, b)
                elif len(color_str) == 7:  # #RRGGBB
                    r = int(color_str[1:3], 16)
                    g = int(color_str[3:5], 16)
                    b = int(color_str[5:7], 16)
                    return (r, g, b)
            except ValueError:
                pass
        # Check for rgb() color
        rgb_match = re.match(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
        if rgb_match:
            try:
                r_val = int(rgb_match.group(1))
                g_val = int(rgb_match.group(2))
                b_val = int(rgb_match.group(3))
                # Check if values are in valid range (0-255)
                if r_val > 255 or g_val > 255 or b_val > 255 or r_val < 0 or g_val < 0 or b_val < 0:
                    return None  # Invalid color values
                return (r_val, g_val, b_val)
            except ValueError:
                pass
        # Check for rgba() color (ignore alpha)
        rgba_match = re.match(r'rgba\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*[\d.]+\s*\)', color_str)
        if rgba_match:
            try:
                r = min(255, max(0, int(rgba_match.group(1))))
                g = min(255, max(0, int(rgba_match.group(2))))
                b = min(255, max(0, int(rgba_match.group(3))))
                return (r, g, b)
            except ValueError:
                pass
        # Failed to parse color
        return None
    def apply_style_to_element(self, tag: str, attrs: Dict[str, str]) -> Dict[str, Any]:
        """
        Apply combined styles (tag defaults + inline styles) for an element.
        Args:
            tag: The HTML tag name
            attrs: Dictionary of tag attributes
        Returns:
            Combined style dictionary
        """
        # Start with tag-specific styles
        style = self.get_tag_style(tag)
        # Override with inline styles if present
        if 'style' in attrs:
            inline_style = self.parse_inline_style(attrs['style'])
            style.update(inline_style)
        return style
--- a/pyWebLayout/io/readers/html_text.py
+++ b/pyWebLayout/io/readers/html_text.py
@ -1,163 +0,0 @@
 """
 HTML text processing for pyWebLayout.
 This module provides specialized functionality for handling text content,
 entity references, and word creation in HTML documents.
 """
 from typing import Optional
 from pyWebLayout.abstract.inline import Word
 from pyWebLayout.abstract.block import Paragraph
 from pyWebLayout.io.readers.html_style import HTMLStyleManager
 class HTMLTextProcessor:
    """
    Processes text content during HTML parsing.
    This class handles text buffering, entity resolution, and word creation
    with proper styling applied.
    """
    def __init__(self, style_manager: HTMLStyleManager):
        """
        Initialize the text processor.
        Args:
            style_manager: The style manager for creating styled words
        """
        self._style_manager = style_manager
        self._text_buffer = ""
        self._current_paragraph: Optional[Paragraph] = None
    def reset(self):
        """Reset the text processor state."""
        self._text_buffer = ""
        self._current_paragraph = None
    def set_current_paragraph(self, paragraph: Optional[Paragraph]):
        """
        Set the current paragraph for text output.
        Args:
            paragraph: The paragraph to receive text, or None
        """
        self._current_paragraph = paragraph
    def add_text(self, text: str):
        """
        Add text to the buffer.
        Args:
            text: The text to add
        """
        self._text_buffer += text
    def add_entity_reference(self, name: str):
        """
        Add an HTML entity reference to the buffer.
        Args:
            name: The entity name (e.g., 'lt', 'gt', 'amp')
        """
        # Map common entity references to characters
        entities = {
            'lt': '<',
            'gt': '>',
            'amp': '&',
            'quot': '"',
            'apos': "'",
            'nbsp': ' ',
            'copy': '©',
            'reg': '®',
            'trade': '™',
            'mdash': '—',
            'ndash': '–',
            'hellip': '…',
            'laquo': '«',
            'raquo': '»',
            'ldquo': '"',
            'rdquo': '"',
            'lsquo': ''',
            'rsquo': ''',
            'deg': '°',
            'plusmn': '±',
            'times': '×',
            'divide': '÷',
            'euro': '€',
            'pound': '£',
            'yen': '¥',
        }
        char = entities.get(name, f'&{name};')
        self._text_buffer += char
    def add_character_reference(self, name: str):
        """
        Add a character reference to the buffer.
        Args:
            name: The character reference (decimal or hex)
        """
        try:
            if name.startswith('x'):
                # Hexadecimal reference
                char = chr(int(name[1:], 16))
            else:
                # Decimal reference
                char = chr(int(name))
            self._text_buffer += char
        except (ValueError, OverflowError):
            # Invalid character reference
            self._text_buffer += f'&#{name};'
    def flush_text(self) -> bool:
        """
        Flush the text buffer, creating words as needed.
        Returns:
            True if text was flushed, False if buffer was empty
        """
        if not self._text_buffer or not self._current_paragraph:
            self._text_buffer = ""
            return False
        # Clean up the text
        text = self._text_buffer.strip()
        if not text:
            self._text_buffer = ""
            return False
        # Create words from the text
        words = text.split()
        for word_text in words:
            if word_text:
                font = self._style_manager.create_font()
                word = Word(word_text, font)
                self._current_paragraph.add_word(word)
        # Reset text buffer
        self._text_buffer = ""
        return True
    def has_pending_text(self) -> bool:
        """
        Check if there is pending text in the buffer.
        Returns:
            True if there is text waiting to be flushed
        """
        return bool(self._text_buffer.strip())
    def get_buffer_content(self) -> str:
        """
        Get the current buffer content without flushing.
        Returns:
            The current text buffer content
        """
        return self._text_buffer
    def clear_buffer(self):
        """Clear the text buffer without creating words."""
        self._text_buffer = ""
--- a/pyWebLayout/style/fonts.py
+++ b/pyWebLayout/style/fonts.py
@ -34,7 +34,7 @@ class Font:
                 style: FontStyle = FontStyle.NORMAL,
                 decoration: TextDecoration = TextDecoration.NONE,
                 background: Optional[Tuple[int, int, int, int]] = None,
-                 langauge = "en_EN"):
+                 language = "en_EN"):
        """
        Initialize a Font object with the specified properties.
@ -46,6 +46,7 @@ class Font:
            style: Font style (normal or italic).
            decoration: Text decoration (none, underline, or strikethrough).
            background: RGBA background color for the text. If None, transparent background.
            language: Language code for hyphenation and text processing.
        """
        self._font_path = font_path
        self._font_size = font_size
@ -54,7 +55,7 @@ class Font:
        self._style = style
        self._decoration = decoration
        self._background = background if background else (255, 255, 255, 0)
-        self.language = langauge
+        self.language = language
        # Load the font file or use default
        self._load_font()
--- a/tests/test_html_content.py
+++ b/tests/test_html_content.py
@ -1,354 +0,0 @@
 """
 Unit tests for HTML content reading.
 Tests the HTMLContentReader class for parsing complete HTML documents.
 This is more of an integration test covering the entire parsing pipeline.
 """
 import unittest
 from pyWebLayout.io.readers.html_content import HTMLContentReader
 from pyWebLayout.abstract.document import Document
 from pyWebLayout.abstract.block import (
    Paragraph, Heading, HeadingLevel, HList, ListStyle, 
    Table, Quote, CodeBlock, HorizontalRule
 )
 from pyWebLayout.abstract.inline import LineBreak
 class TestHTMLContentReader(unittest.TestCase):
    """Test cases for HTMLContentReader."""
    def setUp(self):
        """Set up test fixtures."""
        self.reader = HTMLContentReader()
        self.document = Document()
    def test_simple_paragraph(self):
        """Test parsing a simple paragraph."""
        html = '<p>Hello world!</p>'
        result = self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        self.assertIsInstance(self.document.blocks[0], Paragraph)
        paragraph = self.document.blocks[0]
        words = list(paragraph.words())
        self.assertEqual(len(words), 2)
        self.assertEqual(words[0][1].text, "Hello")
        self.assertEqual(words[1][1].text, "world!")
    def test_headings(self):
        """Test parsing different heading levels."""
        html = '''
        <h1>Heading 1</h1>
        <h2>Heading 2</h2>
        <h3>Heading 3</h3>
        <h6>Heading 6</h6>
        '''
        self.reader.extract_content(html, self.document)
        # Should have 4 heading blocks
        headings = [block for block in self.document.blocks if isinstance(block, Heading)]
        self.assertEqual(len(headings), 4)
        # Check heading levels
        self.assertEqual(headings[0].level, HeadingLevel.H1)
        self.assertEqual(headings[1].level, HeadingLevel.H2)
        self.assertEqual(headings[2].level, HeadingLevel.H3)
        self.assertEqual(headings[3].level, HeadingLevel.H6)
        # Check text content
        h1_words = list(headings[0].words())
        self.assertEqual(len(h1_words), 2)
        self.assertEqual(h1_words[0][1].text, "Heading")
        self.assertEqual(h1_words[1][1].text, "1")
    def test_styled_text(self):
        """Test parsing text with inline styling."""
        html = '<p>This is <b>bold</b> and <i>italic</i> text.</p>'
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        paragraph = self.document.blocks[0]
        words = list(paragraph.words())
        # Should have words: "This", "is", "bold", "and", "italic", "text."
        self.assertEqual(len(words), 6)
        # The styling information is embedded in the Font objects
        # We can't easily test the exact styling without more complex setup
        # but we can verify the words are created correctly
        word_texts = [word[1].text for word in words]
        self.assertEqual(word_texts, ["This", "is", "bold", "and", "italic", "text."])
    def test_unordered_list(self):
        """Test parsing unordered lists."""
        html = '''
        <ul>
            <li>First item</li>
            <li>Second item</li>
            <li>Third item</li>
        </ul>
        '''
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        self.assertIsInstance(self.document.blocks[0], HList)
        list_block = self.document.blocks[0]
        self.assertEqual(list_block.style, ListStyle.UNORDERED)
        items = list(list_block.items())
        self.assertEqual(len(items), 3)
        # Check first item content
        first_item_blocks = list(items[0].blocks())
        self.assertEqual(len(first_item_blocks), 1)
        self.assertIsInstance(first_item_blocks[0], Paragraph)
    def test_ordered_list(self):
        """Test parsing ordered lists."""
        html = '''
        <ol>
            <li>First step</li>
            <li>Second step</li>
        </ol>
        '''
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        list_block = self.document.blocks[0]
        self.assertEqual(list_block.style, ListStyle.ORDERED)
        items = list(list_block.items())
        self.assertEqual(len(items), 2)
    def test_definition_list(self):
        """Test parsing definition lists."""
        html = '''
        <dl>
            <dt>Term 1</dt>
            <dd>Definition 1</dd>
            <dt>Term 2</dt>
            <dd>Definition 2</dd>
        </dl>
        '''
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        list_block = self.document.blocks[0]
        self.assertEqual(list_block.style, ListStyle.DEFINITION)
        items = list(list_block.items())
        self.assertEqual(len(items), 2)  # Two dt/dd pairs
    def test_table(self):
        """Test parsing simple tables."""
        html = '''
        <table>
            <tr>
                <th>Header 1</th>
                <th>Header 2</th>
            </tr>
            <tr>
                <td>Cell 1</td>
                <td>Cell 2</td>
            </tr>
        </table>
        '''
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        self.assertIsInstance(self.document.blocks[0], Table)
        table = self.document.blocks[0]
        # Check body rows
        body_rows = list(table.body_rows())
        self.assertEqual(len(body_rows), 2)  # Header row + data row
        # Check first row (header)
        first_row_cells = list(body_rows[0].cells())
        self.assertEqual(len(first_row_cells), 2)
        self.assertTrue(first_row_cells[0].is_header)
        self.assertTrue(first_row_cells[1].is_header)
        # Check second row (data)
        second_row_cells = list(body_rows[1].cells())
        self.assertEqual(len(second_row_cells), 2)
        self.assertFalse(second_row_cells[0].is_header)
        self.assertFalse(second_row_cells[1].is_header)
    def test_blockquote(self):
        """Test parsing blockquotes."""
        html = '''
        <blockquote>
            <p>This is a quoted paragraph.</p>
            <p>Another quoted paragraph.</p>
        </blockquote>
        '''
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        self.assertIsInstance(self.document.blocks[0], Quote)
        quote = self.document.blocks[0]
        quote_blocks = list(quote.blocks())
        self.assertEqual(len(quote_blocks), 2)
        self.assertIsInstance(quote_blocks[0], Paragraph)
        self.assertIsInstance(quote_blocks[1], Paragraph)
    def test_code_block(self):
        """Test parsing code blocks."""
        html = '''
        <pre><code class="language-python">
 def hello():
    print("Hello, world!")
        </code></pre>
        '''
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 1)
        self.assertIsInstance(self.document.blocks[0], CodeBlock)
        code_block = self.document.blocks[0]
        self.assertEqual(code_block.language, "python")
    def test_horizontal_rule(self):
        """Test parsing horizontal rules."""
        html = '<p>Before</p><hr><p>After</p>'
        self.reader.extract_content(html, self.document)
        self.assertEqual(len(self.document.blocks), 3)
        self.assertIsInstance(self.document.blocks[0], Paragraph)
        self.assertIsInstance(self.document.blocks[1], HorizontalRule)
        self.assertIsInstance(self.document.blocks[2], Paragraph)
    def test_html_entities(self):
        """Test handling HTML entities."""
        html = '<p>Less than: &lt; Greater than: &gt; Ampersand: &amp;</p>'
        self.reader.extract_content(html, self.document)
        paragraph = self.document.blocks[0]
        words = list(paragraph.words())
        # Find the entity words
        word_texts = [word[1].text for word in words]
        self.assertIn('<', word_texts)
        self.assertIn('>', word_texts)
        self.assertIn('&', word_texts)
    def test_nested_elements(self):
        """Test parsing nested HTML elements."""
        html = '''
        <div>
            <h2>Section Title</h2>
            <p>Section content with <strong>important</strong> text.</p>
            <ul>
                <li>List item 1</li>
                <li>List item 2</li>
            </ul>
        </div>
        '''
        self.reader.extract_content(html, self.document)
        # Should have multiple blocks
        self.assertGreater(len(self.document.blocks), 1)
        # Check that we have different types of blocks
        block_types = [type(block).__name__ for block in self.document.blocks]
        self.assertIn('Paragraph', block_types)  # From div
        self.assertIn('Heading', block_types)
        self.assertIn('HList', block_types)
    def test_empty_elements(self):
        """Test handling empty HTML elements."""
        html = '<p></p><div></div><ul></ul>'
        self.reader.extract_content(html, self.document)
        # Empty elements should still create blocks
        self.assertEqual(len(self.document.blocks), 3)
    def test_whitespace_handling(self):
        """Test proper whitespace handling."""
        html = '''
        <p>  Word1    Word2  
        Word3   </p>
        '''
        self.reader.extract_content(html, self.document)
        paragraph = self.document.blocks[0]
        words = list(paragraph.words())
        # Should normalize whitespace and create separate words
        word_texts = [word[1].text for word in words]
        self.assertEqual(word_texts, ["Word1", "Word2", "Word3"])
    def test_base_url_setting(self):
        """Test setting base URL for link resolution."""
        base_url = "https://example.com/path/"
        self.reader.set_base_url(base_url)
        # The base URL should be passed to the inline handler
        self.assertEqual(self.reader.inline_handler.base_url, base_url)
    def test_complex_document(self):
        """Test parsing a complex HTML document."""
        html = '''
        <!DOCTYPE html>
        <html>
        <head>
            <title>Test Document</title>
            <style>body { font-family: Arial; }</style>
        </head>
        <body>
            <h1>Main Title</h1>
            <p>Introduction paragraph with <em>emphasis</em>.</p>
            <h2>Section 1</h2>
            <p>Content with <a href="link.html">a link</a>.</p>
            <ul>
                <li>Item 1</li>
                <li>Item 2 with <strong>bold text</strong></li>
            </ul>
            <h2>Section 2</h2>
            <blockquote>
                <p>A quoted paragraph.</p>
            </blockquote>
            <table>
                <tr><th>Col1</th><th>Col2</th></tr>
                <tr><td>A</td><td>B</td></tr>
            </table>
        </body>
        </html>
        '''
        self.reader.extract_content(html, self.document)
        # Should have parsed multiple blocks
        self.assertGreater(len(self.document.blocks), 5)
        # Should have different types of content
        block_types = set(type(block).__name__ for block in self.document.blocks)
        expected_types = {'Heading', 'Paragraph', 'HList', 'Quote', 'Table'}
        self.assertTrue(expected_types.issubset(block_types))
 if __name__ == '__main__':
    unittest.main()
--- a/tests/test_html_style.py
+++ b/tests/test_html_style.py
@ -1,181 +1,181 @@
 """
-Unit tests for HTML style management.
+Unit tests for pyWebLayout style objects.
-Tests the HTMLStyleManager class for CSS parsing, style stacks, and font creation.
+Tests the Font class and style enums for proper functionality and immutability.
 """
 import unittest
-from pyWebLayout.io.readers.html_style import HTMLStyleManager
+from pyWebLayout.style import Font, FontStyle, FontWeight, TextDecoration, Alignment
 from pyWebLayout.style import FontStyle, FontWeight, TextDecoration
-class TestHTMLStyleManager(unittest.TestCase):
+class TestStyleObjects(unittest.TestCase):
-    """Test cases for HTMLStyleManager."""
+    """Test cases for pyWebLayout style objects."""
-    def setUp(self):
+    def test_font_weight_enum(self):
-        """Set up test fixtures."""
+        """Test FontWeight enum values."""
-        self.style_manager = HTMLStyleManager()
+        self.assertEqual(FontWeight.NORMAL.value, "normal")
        self.assertEqual(FontWeight.BOLD.value, "bold")
-    def test_initialization(self):
+        # Test that all expected values exist
-        """Test proper initialization of style manager."""
+        weights = [FontWeight.NORMAL, FontWeight.BOLD]
-        style = self.style_manager.get_current_style()
+        self.assertEqual(len(weights), 2)
-        self.assertEqual(style['font_size'], 12)
+    def test_font_style_enum(self):
-        self.assertEqual(style['font_weight'], FontWeight.NORMAL)
+        """Test FontStyle enum values."""
-        self.assertEqual(style['font_style'], FontStyle.NORMAL)
+        self.assertEqual(FontStyle.NORMAL.value, "normal")
-        self.assertEqual(style['decoration'], TextDecoration.NONE)
+        self.assertEqual(FontStyle.ITALIC.value, "italic")
        self.assertEqual(style['color'], (0, 0, 0))
        self.assertIsNone(style['background'])
        self.assertEqual(style['language'], 'en_US')
-    def test_style_stack_operations(self):
+        # Test that all expected values exist
-        """Test push and pop operations on style stack."""
+        styles = [FontStyle.NORMAL, FontStyle.ITALIC]
-        # Initial state
+        self.assertEqual(len(styles), 2)
        initial_style = self.style_manager.get_current_style()
-        # Push a new style
+    def test_text_decoration_enum(self):
-        new_style = {'font_size': 16, 'font_weight': FontWeight.BOLD}
+        """Test TextDecoration enum values."""
-        self.style_manager.push_style(new_style)
+        self.assertEqual(TextDecoration.NONE.value, "none")
        self.assertEqual(TextDecoration.UNDERLINE.value, "underline")
        self.assertEqual(TextDecoration.STRIKETHROUGH.value, "strikethrough")
-        current_style = self.style_manager.get_current_style()
+        # Test that all expected values exist
-        self.assertEqual(current_style['font_size'], 16)
+        decorations = [TextDecoration.NONE, TextDecoration.UNDERLINE, TextDecoration.STRIKETHROUGH]
-        self.assertEqual(current_style['font_weight'], FontWeight.BOLD)
+        self.assertEqual(len(decorations), 3)
        self.assertEqual(current_style['color'], (0, 0, 0))  # Unchanged
-        # Pop the style
+    def test_alignment_enum(self):
-        self.style_manager.pop_style()
+        """Test Alignment enum values."""
-        restored_style = self.style_manager.get_current_style()
+        self.assertEqual(Alignment.LEFT.value, 1)
-        self.assertEqual(restored_style, initial_style)
+        self.assertEqual(Alignment.CENTER.value, 2)
        self.assertEqual(Alignment.RIGHT.value, 3)
        self.assertEqual(Alignment.TOP.value, 4)
        self.assertEqual(Alignment.BOTTOM.value, 5)
        self.assertEqual(Alignment.JUSTIFY.value, 6)
-    def test_tag_styles(self):
+    def test_font_initialization_defaults(self):
-        """Test default styles for HTML tags."""
+        """Test Font initialization with default values."""
-        h1_style = self.style_manager.get_tag_style('h1')
+        font = Font()
        self.assertEqual(h1_style['font_size'], 24)
        self.assertEqual(h1_style['font_weight'], FontWeight.BOLD)
-        h6_style = self.style_manager.get_tag_style('h6')
+        self.assertIsNone(font._font_path)
-        self.assertEqual(h6_style['font_size'], 12)
+        self.assertEqual(font.font_size, 12)
-        self.assertEqual(h6_style['font_weight'], FontWeight.BOLD)
+        self.assertEqual(font.colour, (0, 0, 0))
        self.assertEqual(font.color, (0, 0, 0))  # Alias
        self.assertEqual(font.weight, FontWeight.NORMAL)
        self.assertEqual(font.style, FontStyle.NORMAL)
        self.assertEqual(font.decoration, TextDecoration.NONE)
        self.assertEqual(font.background, (255, 255, 255, 0))  # Transparent
        self.assertEqual(font.language, "en_EN")
-        em_style = self.style_manager.get_tag_style('em')
+    def test_font_initialization_custom(self):
-        self.assertEqual(em_style['font_style'], FontStyle.ITALIC)
+        """Test Font initialization with custom values."""
-        
+        font = Font(
-        unknown_style = self.style_manager.get_tag_style('unknown')
+            font_path="/path/to/font.ttf",
-        self.assertEqual(unknown_style, {})
+            font_size=16,
-    
+            colour=(255, 0, 0),
-    def test_inline_style_parsing(self):
+            weight=FontWeight.BOLD,
-        """Test parsing of inline CSS styles."""
+            style=FontStyle.ITALIC,
-        # Test font-size
+            decoration=TextDecoration.UNDERLINE,
-        style = self.style_manager.parse_inline_style('font-size: 18px')
+            background=(255, 255, 0, 255),
-        self.assertEqual(style['font_size'], 18)
+            langauge="fr_FR"
        style = self.style_manager.parse_inline_style('font-size: 14pt')
        self.assertEqual(style['font_size'], 14)
        # Test font-weight
        style = self.style_manager.parse_inline_style('font-weight: bold')
        self.assertEqual(style['font_weight'], FontWeight.BOLD)
        # Test font-style
        style = self.style_manager.parse_inline_style('font-style: italic')
        self.assertEqual(style['font_style'], FontStyle.ITALIC)
        # Test text-decoration
        style = self.style_manager.parse_inline_style('text-decoration: underline')
        self.assertEqual(style['decoration'], TextDecoration.UNDERLINE)
        # Test multiple properties
        style = self.style_manager.parse_inline_style(
            'font-size: 20px; font-weight: bold; color: red'
        )
        self.assertEqual(style['font_size'], 20)
        self.assertEqual(style['font_weight'], FontWeight.BOLD)
        self.assertEqual(style['color'], (255, 0, 0))
    def test_color_parsing(self):
        """Test CSS color parsing."""
        # Named colors
        self.assertEqual(self.style_manager.parse_color('red'), (255, 0, 0))
        self.assertEqual(self.style_manager.parse_color('blue'), (0, 0, 255))
        self.assertEqual(self.style_manager.parse_color('white'), (255, 255, 255))
        self.assertEqual(self.style_manager.parse_color('gray'), (128, 128, 128))
        self.assertEqual(self.style_manager.parse_color('grey'), (128, 128, 128))
        # Hex colors
        self.assertEqual(self.style_manager.parse_color('#ff0000'), (255, 0, 0))
        self.assertEqual(self.style_manager.parse_color('#00ff00'), (0, 255, 0))
        self.assertEqual(self.style_manager.parse_color('#f00'), (255, 0, 0))
        self.assertEqual(self.style_manager.parse_color('#0f0'), (0, 255, 0))
        # RGB colors
        self.assertEqual(self.style_manager.parse_color('rgb(255, 0, 0)'), (255, 0, 0))
        self.assertEqual(self.style_manager.parse_color('rgb(128, 128, 128)'), (128, 128, 128))
        self.assertEqual(self.style_manager.parse_color('rgb( 255 , 255 , 255 )'), (255, 255, 255))
        # RGBA colors (alpha ignored)
        self.assertEqual(self.style_manager.parse_color('rgba(255, 0, 0, 0.5)'), (255, 0, 0))
        # Invalid colors
        self.assertIsNone(self.style_manager.parse_color('invalid'))
        self.assertIsNone(self.style_manager.parse_color('#gg0000'))
        self.assertIsNone(self.style_manager.parse_color('rgb(300, 0, 0)'))  # Invalid values return None
    def test_color_clamping(self):
        """Test that RGB values outside valid range return None."""
        # Values outside 0-255 range should return None
        color = self.style_manager.parse_color('rgb(300, -10, 128)')
        self.assertIsNone(color)  # Invalid values return None
    def test_apply_style_to_element(self):
        """Test combining tag styles with inline styles."""
        # Test h1 with inline style
        attrs = {'style': 'color: blue; font-size: 30px'}
        combined = self.style_manager.apply_style_to_element('h1', attrs)
        # Should have h1 defaults plus inline overrides
        self.assertEqual(combined['font_size'], 30)  # Overridden
        self.assertEqual(combined['font_weight'], FontWeight.BOLD)  # From h1
        self.assertEqual(combined['color'], (0, 0, 255))  # Inline
        # Test without inline styles
        combined = self.style_manager.apply_style_to_element('strong', {})
        self.assertEqual(combined['font_weight'], FontWeight.BOLD)
    def test_reset(self):
        """Test resetting the style manager."""
        # Change the state
        self.style_manager.push_style({'font_size': 20})
        self.style_manager.push_style({'color': (255, 0, 0)})
        # Reset
        self.style_manager.reset()
        # Should be back to initial state
        style = self.style_manager.get_current_style()
        self.assertEqual(style['font_size'], 12)
        self.assertEqual(style['color'], (0, 0, 0))
        self.assertEqual(len(self.style_manager._style_stack), 0)
    def test_font_creation(self):
        """Test Font object creation from current style."""
        # Set some specific styles
        self.style_manager.push_style({
            'font_size': 16,
            'font_weight': FontWeight.BOLD,
            'font_style': FontStyle.ITALIC,
            'decoration': TextDecoration.UNDERLINE,
            'color': (255, 0, 0),
            'background': (255, 255, 0, 255)
        })
        font = self.style_manager.create_font()
        self.assertEqual(font._font_path, "/path/to/font.ttf")
        self.assertEqual(font.font_size, 16)
        self.assertEqual(font.colour, (255, 0, 0))
        self.assertEqual(font.weight, FontWeight.BOLD)
        self.assertEqual(font.style, FontStyle.ITALIC)
        self.assertEqual(font.decoration, TextDecoration.UNDERLINE)
        self.assertEqual(font.colour, (255, 0, 0))
        self.assertEqual(font.background, (255, 255, 0, 255))
        self.assertEqual(font.language, "fr_FR")
    def test_font_with_methods(self):
        """Test Font immutable modification methods."""
        original_font = Font(
            font_size=12,
            colour=(0, 0, 0),
            weight=FontWeight.NORMAL,
            style=FontStyle.NORMAL,
            decoration=TextDecoration.NONE
        )
        # Test with_size
        size_font = original_font.with_size(16)
        self.assertEqual(size_font.font_size, 16)
        self.assertEqual(original_font.font_size, 12)  # Original unchanged
        self.assertEqual(size_font.colour, (0, 0, 0))  # Other properties preserved
        # Test with_colour
        color_font = original_font.with_colour((255, 0, 0))
        self.assertEqual(color_font.colour, (255, 0, 0))
        self.assertEqual(original_font.colour, (0, 0, 0))  # Original unchanged
        self.assertEqual(color_font.font_size, 12)  # Other properties preserved
        # Test with_weight
        weight_font = original_font.with_weight(FontWeight.BOLD)
        self.assertEqual(weight_font.weight, FontWeight.BOLD)
        self.assertEqual(original_font.weight, FontWeight.NORMAL)  # Original unchanged
        # Test with_style
        style_font = original_font.with_style(FontStyle.ITALIC)
        self.assertEqual(style_font.style, FontStyle.ITALIC)
        self.assertEqual(original_font.style, FontStyle.NORMAL)  # Original unchanged
        # Test with_decoration
        decoration_font = original_font.with_decoration(TextDecoration.UNDERLINE)
        self.assertEqual(decoration_font.decoration, TextDecoration.UNDERLINE)
        self.assertEqual(original_font.decoration, TextDecoration.NONE)  # Original unchanged
    def test_font_property_access(self):
        """Test Font property access methods."""
        font = Font(
            font_size=20,
            colour=(128, 128, 128),
            weight=FontWeight.BOLD,
            style=FontStyle.ITALIC,
            decoration=TextDecoration.STRIKETHROUGH
        )
        # Test all property getters
        self.assertEqual(font.font_size, 20)
        self.assertEqual(font.colour, (128, 128, 128))
        self.assertEqual(font.color, (128, 128, 128))  # Alias
        self.assertEqual(font.weight, FontWeight.BOLD)
        self.assertEqual(font.style, FontStyle.ITALIC)
        self.assertEqual(font.decoration, TextDecoration.STRIKETHROUGH)
        # Test that font object is accessible
        self.assertIsNotNone(font.font)
    def test_font_immutability(self):
        """Test that Font objects behave immutably."""
        font1 = Font(font_size=12, colour=(0, 0, 0))
        font2 = font1.with_size(16)
        font3 = font2.with_colour((255, 0, 0))
        # Each should be different objects
        self.assertIsNot(font1, font2)
        self.assertIsNot(font2, font3)
        self.assertIsNot(font1, font3)
        # Original properties should be unchanged
        self.assertEqual(font1.font_size, 12)
        self.assertEqual(font1.colour, (0, 0, 0))
        self.assertEqual(font2.font_size, 16)
        self.assertEqual(font2.colour, (0, 0, 0))
        self.assertEqual(font3.font_size, 16)
        self.assertEqual(font3.colour, (255, 0, 0))
    def test_background_handling(self):
        """Test background color handling."""
        # Test default transparent background
        font1 = Font()
        self.assertEqual(font1.background, (255, 255, 255, 0))
        # Test explicit background
        font2 = Font(background=(255, 0, 0, 128))
        self.assertEqual(font2.background, (255, 0, 0, 128))
        # Test None background becomes transparent
        font3 = Font(background=None)
        self.assertEqual(font3.background, (255, 255, 255, 0))
 if __name__ == '__main__':
--- a/tests/test_html_text.py
+++ b/tests/test_html_text.py
@ -1,247 +0,0 @@
 """
 Unit tests for HTML text processing.
 Tests the HTMLTextProcessor class for text buffering, entity handling, and word creation.
 """
 import unittest
 from unittest.mock import Mock, MagicMock
 from pyWebLayout.io.readers.html_text import HTMLTextProcessor
 from pyWebLayout.io.readers.html_style import HTMLStyleManager
 from pyWebLayout.abstract.block import Paragraph
 from pyWebLayout.abstract.inline import Word
 class TestHTMLTextProcessor(unittest.TestCase):
    """Test cases for HTMLTextProcessor."""
    def setUp(self):
        """Set up test fixtures."""
        self.style_manager = HTMLStyleManager()
        self.text_processor = HTMLTextProcessor(self.style_manager)
        # Create a mock paragraph
        self.mock_paragraph = Mock(spec=Paragraph)
        self.mock_paragraph.add_word = Mock()
    def test_initialization(self):
        """Test proper initialization of text processor."""
        self.assertEqual(self.text_processor._text_buffer, "")
        self.assertIsNone(self.text_processor._current_paragraph)
        self.assertEqual(self.text_processor._style_manager, self.style_manager)
    def test_add_text(self):
        """Test adding text to buffer."""
        self.text_processor.add_text("Hello")
        self.assertEqual(self.text_processor.get_buffer_content(), "Hello")
        self.text_processor.add_text(" World")
        self.assertEqual(self.text_processor.get_buffer_content(), "Hello World")
    def test_entity_references(self):
        """Test HTML entity reference handling."""
        test_cases = [
            ('lt', '<'),
            ('gt', '>'),
            ('amp', '&'),
            ('quot', '"'),
            ('apos', "'"),
            ('nbsp', ' '),
            ('copy', '©'),
            ('reg', '®'),
            ('trade', '™'),
            ('mdash', '—'),
            ('ndash', '–'),
            ('hellip', '…'),
            ('euro', '€'),
            ('unknown', '&unknown;')  # Unknown entities should be preserved
        ]
        for entity, expected in test_cases:
            with self.subTest(entity=entity):
                self.text_processor.clear_buffer()
                self.text_processor.add_entity_reference(entity)
                self.assertEqual(self.text_processor.get_buffer_content(), expected)
    def test_character_references(self):
        """Test character reference handling."""
        # Decimal character references
        self.text_processor.clear_buffer()
        self.text_processor.add_character_reference('65')  # 'A'
        self.assertEqual(self.text_processor.get_buffer_content(), 'A')
        # Hexadecimal character references
        self.text_processor.clear_buffer()
        self.text_processor.add_character_reference('x41')  # 'A'
        self.assertEqual(self.text_processor.get_buffer_content(), 'A')
        # Unicode character
        self.text_processor.clear_buffer()
        self.text_processor.add_character_reference('8364')  # Euro symbol
        self.assertEqual(self.text_processor.get_buffer_content(), '€')
        # Invalid character reference
        self.text_processor.clear_buffer()
        self.text_processor.add_character_reference('invalid')
        self.assertEqual(self.text_processor.get_buffer_content(), '&#invalid;')
        # Out of range character
        self.text_processor.clear_buffer()
        self.text_processor.add_character_reference('99999999999')
        self.assertTrue(self.text_processor.get_buffer_content().startswith('&#'))
    def test_buffer_operations(self):
        """Test buffer state operations."""
        # Test has_pending_text
        self.assertFalse(self.text_processor.has_pending_text())
        self.text_processor.add_text("Some text")
        self.assertTrue(self.text_processor.has_pending_text())
        # Test clear_buffer
        self.text_processor.clear_buffer()
        self.assertFalse(self.text_processor.has_pending_text())
        self.assertEqual(self.text_processor.get_buffer_content(), "")
        # Test with whitespace only
        self.text_processor.add_text("   \n\t  ")
        self.assertFalse(self.text_processor.has_pending_text())  # Should ignore whitespace
    def test_paragraph_management(self):
        """Test current paragraph setting."""
        # Initially no paragraph
        self.assertIsNone(self.text_processor._current_paragraph)
        # Set paragraph
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        self.assertEqual(self.text_processor._current_paragraph, self.mock_paragraph)
        # Clear paragraph
        self.text_processor.set_current_paragraph(None)
        self.assertIsNone(self.text_processor._current_paragraph)
    def test_flush_text_with_paragraph(self):
        """Test flushing text when paragraph is set."""
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        self.text_processor.add_text("Hello world test")
        # Mock the style manager to return a specific font
        mock_font = Mock()
        self.style_manager.create_font = Mock(return_value=mock_font)
        result = self.text_processor.flush_text()
        # Should return True (text was flushed)
        self.assertTrue(result)
        # Should have created words
        self.assertEqual(self.mock_paragraph.add_word.call_count, 3)  # "Hello", "world", "test"
        # Verify the words were created with correct text
        calls = self.mock_paragraph.add_word.call_args_list
        word_texts = [call[0][0].text for call in calls]
        self.assertEqual(word_texts, ["Hello", "world", "test"])
        # Buffer should be empty after flush
        self.assertEqual(self.text_processor.get_buffer_content(), "")
    def test_flush_text_without_paragraph(self):
        """Test flushing text when no paragraph is set."""
        self.text_processor.add_text("Hello world")
        result = self.text_processor.flush_text()
        # Should return False (no paragraph to flush to)
        self.assertFalse(result)
        # Buffer should be cleared anyway
        self.assertEqual(self.text_processor.get_buffer_content(), "")
    def test_flush_empty_buffer(self):
        """Test flushing when buffer is empty."""
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        result = self.text_processor.flush_text()
        # Should return False (nothing to flush)
        self.assertFalse(result)
        # No words should be added
        self.mock_paragraph.add_word.assert_not_called()
    def test_flush_whitespace_only(self):
        """Test flushing when buffer contains only whitespace."""
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        self.text_processor.add_text("   \n\t  ")
        result = self.text_processor.flush_text()
        # Should return False (no meaningful content)
        self.assertFalse(result)
        # No words should be added
        self.mock_paragraph.add_word.assert_not_called()
    def test_word_creation_with_styling(self):
        """Test that words are created with proper styling."""
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        self.text_processor.add_text("styled text")
        # Set up style manager to return specific font
        mock_font = Mock()
        mock_font.font_size = 16
        mock_font.weight = "bold"
        self.style_manager.create_font = Mock(return_value=mock_font)
        self.text_processor.flush_text()
        # Verify font was created
        self.style_manager.create_font.assert_called()
        # Verify words were created with the font
        calls = self.mock_paragraph.add_word.call_args_list
        for call in calls:
            word = call[0][0]
            self.assertEqual(word.style, mock_font)
    def test_reset(self):
        """Test resetting the text processor."""
        # Set up some state
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        self.text_processor.add_text("Some text")
        # Reset
        self.text_processor.reset()
        # Should be back to initial state
        self.assertEqual(self.text_processor._text_buffer, "")
        self.assertIsNone(self.text_processor._current_paragraph)
    def test_complex_text_processing(self):
        """Test processing text with mixed content."""
        self.text_processor.set_current_paragraph(self.mock_paragraph)
        # Mock font creation
        mock_font = Mock()
        self.style_manager.create_font = Mock(return_value=mock_font)
        # Add mixed content
        self.text_processor.add_text("Hello ")
        self.text_processor.add_entity_reference('amp')
        self.text_processor.add_text(" world")
        self.text_processor.add_character_reference('33')  # '!'
        # Should have "Hello & world!"
        expected_content = "Hello & world!"
        self.assertEqual(self.text_processor.get_buffer_content(), expected_content)
        # Flush and verify words
        self.text_processor.flush_text()
        calls = self.mock_paragraph.add_word.call_args_list
        word_texts = [call[0][0].text for call in calls]
        self.assertEqual(word_texts, ["Hello", "&", "world!"])
 if __name__ == '__main__':
    unittest.main()