all tests passing

2025-06-07 15:20:42 +02:00 · 2025-06-07 15:20:42 +02:00 · ad0ac238f3
commit ad0ac238f3
parent ab84691278
15 changed files with 499 additions and 2004 deletions
--- a/pyWebLayout/abstract/block.py
+++ b/pyWebLayout/abstract/block.py
@ -1011,14 +1011,246 @@ class Table(Block):
        elif section.lower() == "footer":
            self._footer_rows.append(row)
        else:  # Default to body
-            self._rows
+            self._rows.append(row)
+    
+    def create_row(self, section: str = "body", style=None) -> TableRow:
+        """
+        Create a new table row and add it to this table.
+        
+        Args:
+            section: The section to add the row to ("header", "body", or "footer")
+            style: Optional style override. If None, inherits from table
+            
+        Returns:
+            The newly created TableRow object
+        """
+        return TableRow.create_and_add_to(self, section, style)
+    
+    def header_rows(self) -> Iterator[TableRow]:
+        """
+        Iterate over the header rows in this table.
+        
+        Yields:
+            Each TableRow in the header section
+        """
+        for row in self._header_rows:
+            yield row
+    
+    def body_rows(self) -> Iterator[TableRow]:
+        """
+        Iterate over the body rows in this table.
+        
+        Yields:
+            Each TableRow in the body section
+        """
+        for row in self._rows:
+            yield row
+    
+    def footer_rows(self) -> Iterator[TableRow]:
+        """
+        Iterate over the footer rows in this table.
+        
+        Yields:
+            Each TableRow in the footer section
+        """
+        for row in self._footer_rows:
+            yield row
+    
+    def all_rows(self) -> Iterator[Tuple[str, TableRow]]:
+        """
+        Iterate over all rows in this table with their section labels.
+        
+        Yields:
+            Tuples of (section, row) for each row in the table
+        """
+        for row in self._header_rows:
+            yield ("header", row)
+        for row in self._rows:
+            yield ("body", row)
+        for row in self._footer_rows:
+            yield ("footer", row)
+    
+    @property
+    def row_count(self) -> Dict[str, int]:
+        """Get the row counts by section"""
+        return {
+            "header": len(self._header_rows),
+            "body": len(self._rows),
+            "footer": len(self._footer_rows),
+            "total": len(self._header_rows) + len(self._rows) + len(self._footer_rows)
+        }


+class Image(Block):
+    """
+    An image element with source, dimensions, and alternative text.
+    """
+    
+    def __init__(self, source: str = "", alt_text: str = "", width: Optional[int] = None, height: Optional[int] = None):
+        """
+        Initialize an image element.
+        
+        Args:
+            source: The image source URL or path
+            alt_text: Alternative text for accessibility
+            width: Optional image width in pixels
+            height: Optional image height in pixels
+        """
+        super().__init__(BlockType.IMAGE)
+        self._source = source
+        self._alt_text = alt_text
+        self._width = width
+        self._height = height
+    
+    @classmethod
+    def create_and_add_to(cls, container, source: str = "", alt_text: str = "", 
+                         width: Optional[int] = None, height: Optional[int] = None) -> 'Image':
+        """
+        Create a new Image and add it to a container.
+        
+        Args:
+            container: The container to add the image to (must have add_block method)
+            source: The image source URL or path
+            alt_text: Alternative text for accessibility
+            width: Optional image width in pixels
+            height: Optional image height in pixels
+            
+        Returns:
+            The newly created Image object
+            
+        Raises:
+            AttributeError: If the container doesn't have the required add_block method
+        """
+        # Create the new image
+        image = cls(source, alt_text, width, height)
+        
+        # Add the image to the container
+        if hasattr(container, 'add_block'):
+            container.add_block(image)
+        else:
+            raise AttributeError(f"Container {type(container).__name__} must have an 'add_block' method")
+        
+        return image
+    
+    @property
+    def source(self) -> str:
+        """Get the image source"""
+        return self._source
+    
+    @source.setter
+    def source(self, source: str):
+        """Set the image source"""
+        self._source = source
+    
+    @property
+    def alt_text(self) -> str:
+        """Get the alternative text"""
+        return self._alt_text
+    
+    @alt_text.setter
+    def alt_text(self, alt_text: str):
+        """Set the alternative text"""
+        self._alt_text = alt_text
+    
+    @property
+    def width(self) -> Optional[int]:
+        """Get the image width"""
+        return self._width
+    
+    @width.setter
+    def width(self, width: Optional[int]):
+        """Set the image width"""
+        self._width = width
+    
+    @property
+    def height(self) -> Optional[int]:
+        """Get the image height"""
+        return self._height
+    
+    @height.setter
+    def height(self, height: Optional[int]):
+        """Set the image height"""
+        self._height = height
+    
+    def get_dimensions(self) -> Tuple[Optional[int], Optional[int]]:
+        """
+        Get the image dimensions as a tuple.
+        
+        Returns:
+            Tuple of (width, height)
+        """
+        return (self._width, self._height)
+    
+    def get_aspect_ratio(self) -> Optional[float]:
+        """
+        Calculate the aspect ratio of the image.
+        
+        Returns:
+            The aspect ratio (width/height) or None if either dimension is missing
+        """
+        if self._width is not None and self._height is not None and self._height > 0:
+            return self._width / self._height
+        return None
+    
+    def calculate_scaled_dimensions(self, max_width: Optional[int] = None, 
+                                  max_height: Optional[int] = None) -> Tuple[Optional[int], Optional[int]]:
+        """
+        Calculate scaled dimensions that fit within the given constraints.
+        
+        Args:
+            max_width: Maximum allowed width
+            max_height: Maximum allowed height
+            
+        Returns:
+            Tuple of (scaled_width, scaled_height)
+        """
+        if self._width is None or self._height is None:
+            return (self._width, self._height)
+        
+        width, height = self._width, self._height
+        
+        # Scale down if needed
+        if max_width is not None and width > max_width:
+            height = int(height * max_width / width)
+            width = max_width
+        
+        if max_height is not None and height > max_height:
+            width = int(width * max_height / height)
+            height = max_height
+        
+        return (width, height)

-class Image:

-    pass
-
-class HorizontalRule:
-
-    pass
+class HorizontalRule(Block):
+    """
+    A horizontal rule element (hr tag).
+    """
+    
+    def __init__(self):
+        """Initialize a horizontal rule element."""
+        super().__init__(BlockType.HORIZONTAL_RULE)
+    
+    @classmethod
+    def create_and_add_to(cls, container) -> 'HorizontalRule':
+        """
+        Create a new HorizontalRule and add it to a container.
+        
+        Args:
+            container: The container to add the horizontal rule to (must have add_block method)
+            
+        Returns:
+            The newly created HorizontalRule object
+            
+        Raises:
+            AttributeError: If the container doesn't have the required add_block method
+        """
+        # Create the new horizontal rule
+        hr = cls()
+        
+        # Add the horizontal rule to the container
+        if hasattr(container, 'add_block'):
+            container.add_block(hr)
+        else:
+            raise AttributeError(f"Container {type(container).__name__} must have an 'add_block' method")
+        
+        return hr
--- a/pyWebLayout/abstract/functional.py
+++ b/pyWebLayout/abstract/functional.py
@ -124,6 +124,11 @@ class Button(Interactable):
        """Enable or disable the button"""
        self._enabled = enabled
    
+    @property
+    def params(self) -> Dict[str, Any]:
+        """Get the button parameters"""
+        return self._params
+    
    def execute(self) -> Any:
        """
        Execute the button's callback function if the button is enabled.
--- a/pyWebLayout/abstract/inline.py
+++ b/pyWebLayout/abstract/inline.py
@ -2,6 +2,7 @@ from __future__ import annotations
 from pyWebLayout.base import Queriable
 from pyWebLayout.style import Font
 from typing import Tuple, Union, List, Optional, Dict
+import pyphen


 class Word:
@ -157,9 +158,6 @@ class Word:
        Returns:
            bool: True if the word can be hyphenated, False otherwise.
        """
-        # Only import pyphen when needed
-        import pyphen
-        
        # Use the provided language or fall back to style language
        lang = language if language else self._style.language
        dic = pyphen.Pyphen(lang=lang)
@ -178,9 +176,6 @@ class Word:
        Returns:
            bool: True if the word was hyphenated, False otherwise.
        """
-        # Only import pyphen when needed
-        import pyphen
-        
        # Use the provided language or fall back to style language
        lang = language if language else self._style.language
        dic = pyphen.Pyphen(lang=lang)
@ -333,5 +328,58 @@ class FormattedSpan:


 class LineBreak:
-
-    pass
+    """
+    A line break element that forces a new line within text content.
+    While this is an inline element that can occur within paragraphs,
+    it has block-like properties for consistency with the abstract model.
+    """
+    
+    def __init__(self):
+        """Initialize a line break element."""
+        # Import here to avoid circular imports
+        from .block import BlockType
+        self._block_type = BlockType.LINE_BREAK
+        self._parent = None
+    
+    @property
+    def block_type(self):
+        """Get the block type for this line break"""
+        return self._block_type
+    
+    @property
+    def parent(self):
+        """Get the parent element containing this line break, if any"""
+        return self._parent
+    
+    @parent.setter
+    def parent(self, parent):
+        """Set the parent element"""
+        self._parent = parent
+    
+    @classmethod
+    def create_and_add_to(cls, container) -> 'LineBreak':
+        """
+        Create a new LineBreak and add it to a container.
+        
+        Args:
+            container: The container to add the line break to
+            
+        Returns:
+            The newly created LineBreak object
+        """
+        # Create the new line break
+        line_break = cls()
+        
+        # Add the line break to the container if it has an appropriate method
+        if hasattr(container, 'add_line_break'):
+            container.add_line_break(line_break)
+        elif hasattr(container, 'add_element'):
+            container.add_element(line_break)
+        elif hasattr(container, 'add_word'):
+            # Some containers might treat line breaks like words
+            container.add_word(line_break)
+        else:
+            # Set parent relationship manually
+            line_break.parent = container
+        
+        return line_break
--- a/pyWebLayout/io/init.py
+++ b/pyWebLayout/io/init.py
@ -21,9 +21,11 @@ from pyWebLayout.io.readers.base import BaseReader, MetadataReader, ContentReade

 # Specialized HTML readers
 from pyWebLayout.io.readers.html_metadata import HTMLMetadataReader
-from pyWebLayout.io.readers.html_content import HTMLContentReader
 from pyWebLayout.io.readers.html_resources import HTMLResourceReader

+# HTML extraction parser (the best approach)
+from pyWebLayout.io.readers.html_extraction import parse_html_string as parse_html_extraction
+
 # Specialized EPUB readers
 from pyWebLayout.io.readers.epub_metadata import EPUBMetadataReader

--- a/pyWebLayout/io/readers/init.py
+++ b/pyWebLayout/io/readers/init.py
@ -11,13 +11,8 @@ from .base import BaseReader, MetadataReader, ContentReader, ResourceReader, Com
 # HTML readers (decomposed)
 from .html import HTMLReader, read_html, read_html_file, parse_html_string
 from .html_metadata import HTMLMetadataReader
-from .html_content import HTMLContentReader
 from .html_resources import HTMLResourceReader

-# HTML processing components (supporting modules)
-from .html_style import HTMLStyleManager
-from .html_text import HTMLTextProcessor
-from .html_elements import BlockElementHandler, ListElementHandler, TableElementHandler, InlineElementHandler

 # EPUB readers
 from .epub_reader import read_epub  # Legacy
@ -29,7 +24,7 @@ __all__ = [
    
    # HTML readers
    'HTMLReader', 'read_html', 'read_html_file', 'parse_html_string',
-    'HTMLMetadataReader', 'HTMLContentReader', 'HTMLResourceReader',
+    'HTMLMetadataReader', 'HTMLResourceReader',
    
    # EPUB readers
    'read_epub', 'EPUBMetadataReader',
--- a/pyWebLayout/io/readers/html.py
+++ b/pyWebLayout/io/readers/html.py
@ -1,36 +1,33 @@
 """
 Modern HTML reader for pyWebLayout.

-This module provides a decomposed HTML reader that uses specialized
-readers for metadata, content, and resources, following the pattern
-established in the abstract module.
+This module provides an HTML reader that uses the html_extraction module
+for clean, handler-based parsing using BeautifulSoup.
 """

 import os
 from typing import Union, Optional
 from pyWebLayout.abstract.document import Document
-from pyWebLayout.io.readers.base import CompositeReader
+from pyWebLayout.io.readers.base import BaseReader
 from pyWebLayout.io.readers.html_metadata import HTMLMetadataReader
-from pyWebLayout.io.readers.html_content import HTMLContentReader
 from pyWebLayout.io.readers.html_resources import HTMLResourceReader
+from pyWebLayout.io.readers.html_extraction import parse_html_string
+from pyWebLayout.style import Font


-class HTMLReader(CompositeReader):
+class HTMLReader(BaseReader):
    """
-    Modern HTML reader using decomposed architecture.
+    Modern HTML reader using the html_extraction parser.
    
-    This reader combines specialized readers for metadata, content,
-    and resources to provide a complete HTML parsing solution.
+    This reader uses the clean, handler-based architecture from html_extraction.py
+    for parsing HTML content into pyWebLayout's abstract document structure.
    """
    
    def __init__(self):
-        """Initialize the HTML reader with all specialized readers."""
+        """Initialize the HTML reader."""
        super().__init__()
-        
-        # Set up specialized readers
-        self.set_metadata_reader(HTMLMetadataReader())
-        self.set_content_reader(HTMLContentReader())
-        self.set_resource_reader(HTMLResourceReader())
+        self._metadata_reader = HTMLMetadataReader()
+        self._resource_reader = HTMLResourceReader()
    
    def can_read(self, source: Union[str, bytes]) -> bool:
        """
@ -76,6 +73,7 @@ class HTMLReader(CompositeReader):
                - encoding: Character encoding (default: 'utf-8')
                - extract_metadata: Whether to extract metadata (default: True)
                - extract_resources: Whether to extract resources (default: True)
+                - base_font: Base font for styling (default: None)
            
        Returns:
            The parsed Document
@ -85,6 +83,7 @@ class HTMLReader(CompositeReader):
        encoding = options.get('encoding', 'utf-8')
        extract_metadata = options.get('extract_metadata', True)
        extract_resources = options.get('extract_resources', True)
+        base_font = options.get('base_font')
        
        # Read the HTML content
        html_content = self._read_html_content(source, encoding)
@ -93,10 +92,6 @@ class HTMLReader(CompositeReader):
        if not base_url and isinstance(source, str) and os.path.isfile(source):
            base_url = f"file://{os.path.dirname(os.path.abspath(source))}/"
        
-        # Set base URL in content reader
-        if self._content_reader and hasattr(self._content_reader, 'set_base_url'):
-            self._content_reader.set_base_url(base_url)
-        
        # Create a new document
        document = Document()
        
@ -104,9 +99,10 @@ class HTMLReader(CompositeReader):
        if extract_metadata and self._metadata_reader:
            self._metadata_reader.extract_metadata(html_content, document)
        
-        # Extract content
-        if self._content_reader:
-            self._content_reader.extract_content(html_content, document)
+        # Parse content using html_extraction
+        blocks = parse_html_string(html_content, base_font)
+        for block in blocks:
+            document.add_block(block)
        
        # Extract resources if enabled
        if extract_resources and self._resource_reader:
--- a/pyWebLayout/io/readers/html_content.py
+++ b/pyWebLayout/io/readers/html_content.py
@ -1,269 +0,0 @@
-"""
-Modern HTML content reader for pyWebLayout.
-
-This module provides a decomposed HTML content reader that uses specialized
-handlers and managers for different aspects of HTML parsing.
-"""
-
-from html.parser import HTMLParser as BaseHTMLParser
-from typing import Dict, List, Optional, Tuple, Union, Any
-from pyWebLayout.abstract.document import Document
-from pyWebLayout.io.readers.base import ContentReader
-from pyWebLayout.io.readers.html_style import HTMLStyleManager
-from pyWebLayout.io.readers.html_text import HTMLTextProcessor
-from pyWebLayout.io.readers.html_elements import (
-    BlockElementHandler, ListElementHandler, TableElementHandler, InlineElementHandler
-)
-
-
-class HTMLContentReader(ContentReader, BaseHTMLParser):
-    """
-    Modern HTML content reader using decomposed architecture.
-    
-    This class orchestrates specialized handlers to parse HTML content
-    and convert it to pyWebLayout's abstract document model.
-    """
-    
-    def __init__(self):
-        """Initialize the HTML content reader."""
-        BaseHTMLParser.__init__(self)
-        
-        # Initialize managers and processors
-        self.style_manager = HTMLStyleManager()
-        self.text_processor = HTMLTextProcessor(self.style_manager)
-        
-        # Initialize element handlers
-        self.block_handler = BlockElementHandler(self.style_manager, self.text_processor)
-        self.list_handler = ListElementHandler(self.text_processor)
-        self.table_handler = TableElementHandler(self.text_processor)
-        self.inline_handler = InlineElementHandler(self.text_processor)
-        
-        # Document and parsing state
-        self._document: Optional[Document] = None
-        self._in_head = False
-        self._in_script = False
-        self._in_style = False
-    
-    def extract_content(self, html_content: str, document: Document) -> Any:
-        """
-        Extract content from HTML.
-        
-        Args:
-            html_content: The HTML content to parse
-            document: The document to populate with content
-            
-        Returns:
-            The document with populated content
-        """
-        self._document = document
-        self._reset_state()
-        
-        # Parse the HTML content
-        self.feed(html_content)
-        
-        # Flush any remaining text
-        self.text_processor.flush_text()
-        
-        return document
-    
-    def set_base_url(self, base_url: str):
-        """Set the base URL for resolving relative links."""
-        self.inline_handler.set_base_url(base_url)
-    
-    def _reset_state(self):
-        """Reset all parser state for new content."""
-        # Reset managers and processors
-        self.style_manager.reset()
-        self.text_processor.reset()
-        
-        # Reset element handlers
-        self.block_handler.reset()
-        self.list_handler.reset()
-        self.table_handler.reset()
-        self.inline_handler.reset()
-        
-        # Reset parser flags
-        self._in_head = False
-        self._in_script = False
-        self._in_style = False
-    
-    def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
-        """Handle the start of an HTML tag."""
-        tag = tag.lower()
-        attrs_dict = dict(attrs)
-        
-        # Skip content in head, script, style (except body)
-        if self._should_skip_content(tag):
-            return
-        
-        # Handle special section markers
-        if self._handle_special_sections_start(tag):
-            return
-        
-        # Apply styles for this element
-        style = self.style_manager.apply_style_to_element(tag, attrs_dict)
-        self.style_manager.push_style(style)
-        
-        # Delegate to appropriate handler
-        self._delegate_start_tag(tag, attrs_dict)
-    
-    def handle_endtag(self, tag: str):
-        """Handle the end of an HTML tag."""
-        tag = tag.lower()
-        
-        # Handle special section markers
-        if self._handle_special_sections_end(tag):
-            return
-        
-        # Skip content in head, script, style
-        if self._in_head or self._in_script or self._in_style:
-            return
-        
-        # Flush any accumulated text
-        self.text_processor.flush_text()
-        
-        # Delegate to appropriate handler
-        self._delegate_end_tag(tag)
-        
-        # Pop style regardless of tag
-        self.style_manager.pop_style()
-    
-    def handle_data(self, data: str):
-        """Handle text data."""
-        if self._in_head or self._in_script or self._in_style:
-            return
-        
-        self.text_processor.add_text(data)
-    
-    def handle_entityref(self, name: str):
-        """Handle an HTML entity reference."""
-        if self._in_head or self._in_script or self._in_style:
-            return
-        
-        self.text_processor.add_entity_reference(name)
-    
-    def handle_charref(self, name: str):
-        """Handle a character reference."""
-        if self._in_head or self._in_script or self._in_style:
-            return
-        
-        self.text_processor.add_character_reference(name)
-    
-    def _should_skip_content(self, tag: str) -> bool:
-        """Check if we should skip content based on current state."""
-        if self._in_head or self._in_script or self._in_style:
-            if tag in ('head', 'script', 'style'):
-                return False  # Let special section handlers deal with these
-            if tag != 'body':
-                return True
-        return False
-    
-    def _handle_special_sections_start(self, tag: str) -> bool:
-        """Handle special section start tags. Returns True if handled."""
-        if tag == 'head':
-            self._in_head = True
-            return True
-        elif tag == 'body':
-            self._in_head = False
-            return True
-        elif tag == 'script':
-            self._in_script = True
-            return True
-        elif tag == 'style':
-            self._in_style = True
-            return True
-        return False
-    
-    def _handle_special_sections_end(self, tag: str) -> bool:
-        """Handle special section end tags. Returns True if handled."""
-        if tag == 'head':
-            self._in_head = False
-            self.style_manager.pop_style()
-            return True
-        elif tag == 'script':
-            self._in_script = False
-            self.style_manager.pop_style()
-            return True
-        elif tag == 'style':
-            self._in_style = False
-            self.style_manager.pop_style()
-            return True
-        return False
-    
-    def _delegate_start_tag(self, tag: str, attrs: Dict[str, str]):
-        """Delegate start tag handling to appropriate handler."""
-        # Block elements
-        if tag == 'p':
-            self.block_handler.handle_paragraph_start(self._document)
-        elif tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
-            self.block_handler.handle_heading_start(tag, self._document)
-        elif tag == 'div':
-            self.block_handler.handle_div_start(self._document)
-        elif tag == 'blockquote':
-            self.block_handler.handle_blockquote_start(self._document)
-        elif tag == 'pre':
-            self.block_handler.handle_pre_start(self._document)
-        elif tag == 'code':
-            self.block_handler.handle_code_start(attrs, self._document)
-        
-        # List elements
-        elif tag in ('ul', 'ol', 'dl'):
-            self.list_handler.handle_list_start(tag, self.block_handler, self._document)
-        elif tag == 'li':
-            self.list_handler.handle_list_item_start(self.block_handler)
-        elif tag in ('dt', 'dd'):
-            self.list_handler.handle_definition_start(tag, self.block_handler)
-        
-        # Table elements
-        elif tag == 'table':
-            self.table_handler.handle_table_start(attrs, self.block_handler, self._document)
-        elif tag in ('thead', 'tbody', 'tfoot'):
-            self.table_handler.handle_table_section_start(tag)
-        elif tag == 'tr':
-            self.table_handler.handle_table_row_start()
-        elif tag in ('td', 'th'):
-            self.table_handler.handle_table_cell_start(tag, attrs, self.block_handler)
-        
-        # Inline elements
-        elif tag == 'a':
-            self.inline_handler.handle_link_start(attrs)
-        elif tag == 'img':
-            self.inline_handler.handle_image(attrs, self.block_handler, self._document)
-        elif tag == 'br':
-            self.inline_handler.handle_line_break(self.block_handler)
-        elif tag == 'hr':
-            self.inline_handler.handle_horizontal_rule(self.block_handler, self._document)
-        
-        # Style-only elements (no special handling needed, just styling)
-        elif tag in ('b', 'strong', 'i', 'em', 'u', 'span'):
-            pass  # Styles are already applied by style manager
-    
-    def _delegate_end_tag(self, tag: str):
-        """Delegate end tag handling to appropriate handler."""
-        # Block elements
-        if tag in ('p', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote', 'pre', 'code'):
-            self.block_handler.handle_block_end()
-        
-        # List elements
-        elif tag in ('ul', 'ol', 'dl'):
-            self.list_handler.handle_list_end(self.block_handler)
-        elif tag in ('li', 'dt', 'dd'):
-            self.list_handler.handle_list_item_end(self.block_handler)
-        
-        # Table elements
-        elif tag == 'table':
-            self.table_handler.handle_table_end(self.block_handler)
-        elif tag in ('thead', 'tbody', 'tfoot'):
-            self.table_handler.handle_table_section_end()
-        elif tag == 'tr':
-            self.table_handler.handle_table_row_end()
-        elif tag in ('td', 'th'):
-            self.table_handler.handle_table_cell_end(self.block_handler)
-        
-        # Inline elements
-        elif tag == 'a':
-            self.inline_handler.handle_link_end()
-        
-        # Style-only elements (no special handling needed)
-        elif tag in ('b', 'strong', 'i', 'em', 'u', 'span'):
-            pass  # Styles are handled by style manager
--- a/pyWebLayout/io/readers/html_elements.py
+++ b/pyWebLayout/io/readers/html_elements.py
@ -1,473 +0,0 @@
-"""
-HTML element handlers for pyWebLayout.
-
-This module provides specialized handlers for different types of HTML elements,
-using composition and delegation to handle specific element types.
-"""
-
-from typing import Dict, List, Optional, Any
-import urllib.parse
-from pyWebLayout.abstract.document import Document
-from pyWebLayout.abstract.block import (
-    Block, Paragraph, Heading, HeadingLevel, Quote, CodeBlock,
-    HList, ListStyle, ListItem, Table, TableRow, TableCell, 
-    HorizontalRule, Image
-)
-from pyWebLayout.abstract.inline import LineBreak
-from pyWebLayout.abstract.functional import Link, LinkType
-from pyWebLayout.io.readers.html_style import HTMLStyleManager
-from pyWebLayout.io.readers.html_text import HTMLTextProcessor
-
-
-class BlockElementHandler:
-    """Handles block-level HTML elements like paragraphs, headings, divs."""
-    
-    def __init__(self, style_manager: HTMLStyleManager, text_processor: HTMLTextProcessor):
-        self.style_manager = style_manager
-        self.text_processor = text_processor
-        self.block_stack: List[Block] = []
-        self.current_block: Optional[Block] = None
-        self.current_paragraph: Optional[Paragraph] = None
-    
-    def reset(self):
-        """Reset the handler state."""
-        self.block_stack = []
-        self.current_block = None
-        self.current_paragraph = None
-    
-    def add_block_to_document_or_parent(self, block: Block, document: Document):
-        """Add a block to the document or current parent block."""
-        if self.current_block and hasattr(self.current_block, 'add_block'):
-            self.current_block.add_block(block)
-        else:
-            document.add_block(block)
-    
-    def handle_paragraph_start(self, document: Document):
-        """Handle the start of a paragraph element."""
-        self.text_processor.flush_text()
-        paragraph = Paragraph()
-        
-        self.add_block_to_document_or_parent(paragraph, document)
-        self.block_stack.append(paragraph)
-        self.current_block = paragraph
-        self.current_paragraph = paragraph
-        self.text_processor.set_current_paragraph(paragraph)
-    
-    def handle_heading_start(self, tag: str, document: Document):
-        """Handle the start of a heading element."""
-        self.text_processor.flush_text()
-        
-        level_map = {
-            'h1': HeadingLevel.H1, 'h2': HeadingLevel.H2, 'h3': HeadingLevel.H3,
-            'h4': HeadingLevel.H4, 'h5': HeadingLevel.H5, 'h6': HeadingLevel.H6
-        }
-        
-        heading = Heading(level=level_map[tag])
-        self.add_block_to_document_or_parent(heading, document)
-        self.block_stack.append(heading)
-        self.current_block = heading
-        self.current_paragraph = heading  # Heading inherits from Paragraph
-        self.text_processor.set_current_paragraph(heading)
-    
-    def handle_div_start(self, document: Document):
-        """Handle the start of a div element."""
-        self.text_processor.flush_text()
-        div_para = Paragraph()
-        
-        self.add_block_to_document_or_parent(div_para, document)
-        self.block_stack.append(div_para)
-        self.current_block = div_para
-        self.current_paragraph = div_para
-        self.text_processor.set_current_paragraph(div_para)
-    
-    def handle_blockquote_start(self, document: Document):
-        """Handle the start of a blockquote element."""
-        self.text_processor.flush_text()
-        quote = Quote()
-        
-        self.add_block_to_document_or_parent(quote, document)
-        self.block_stack.append(quote)
-        self.current_block = quote
-        self.current_paragraph = None
-        self.text_processor.set_current_paragraph(None)
-    
-    def handle_pre_start(self, document: Document):
-        """Handle the start of a pre element."""
-        self.text_processor.flush_text()
-        pre_para = Paragraph()
-        
-        self.add_block_to_document_or_parent(pre_para, document)
-        self.block_stack.append(pre_para)
-        self.current_block = pre_para
-        self.current_paragraph = pre_para
-        self.text_processor.set_current_paragraph(pre_para)
-    
-    def handle_code_start(self, attrs: Dict[str, str], document: Document):
-        """Handle the start of a code element."""
-        # If we're inside a pre, replace the paragraph with a code block
-        if self.block_stack and isinstance(self.block_stack[-1], Paragraph):
-            pre_para = self.block_stack.pop()
-            
-            # Get the language from class if specified
-            language = ""
-            if 'class' in attrs:
-                class_attr = attrs['class']
-                if class_attr.startswith('language-'):
-                    language = class_attr[9:]
-            
-            code_block = CodeBlock(language=language)
-            
-            # Replace the paragraph with the code block in its parent
-            if pre_para.parent:
-                parent = pre_para.parent
-                if hasattr(parent, '_blocks'):
-                    for i, block in enumerate(parent._blocks):
-                        if block == pre_para:
-                            parent._blocks[i] = code_block
-                            code_block.parent = parent
-                            break
-            else:
-                # Replace in document blocks
-                for i, block in enumerate(document.blocks):
-                    if block == pre_para:
-                        document.blocks[i] = code_block
-                        break
-            
-            self.block_stack.append(code_block)
-            self.current_block = code_block
-            self.current_paragraph = None
-            self.text_processor.set_current_paragraph(None)
-    
-    def handle_block_end(self):
-        """Handle the end of a block element."""
-        if self.block_stack:
-            self.block_stack.pop()
-        
-        if self.block_stack:
-            self.current_block = self.block_stack[-1]
-            # Update current paragraph based on block type
-            if isinstance(self.current_block, Paragraph):
-                self.current_paragraph = self.current_block
-            else:
-                self.current_paragraph = None
-        else:
-            self.current_block = None
-            self.current_paragraph = None
-        
-        self.text_processor.set_current_paragraph(self.current_paragraph)
-
-
-class ListElementHandler:
-    """Handles list-related HTML elements (ul, ol, dl, li, dt, dd)."""
-    
-    def __init__(self, text_processor: HTMLTextProcessor):
-        self.text_processor = text_processor
-        self.list_stack: List[HList] = []
-    
-    def reset(self):
-        """Reset the handler state."""
-        self.list_stack = []
-    
-    def handle_list_start(self, tag: str, block_handler: BlockElementHandler, document: Document):
-        """Handle the start of a list element."""
-        self.text_processor.flush_text()
-        
-        style_map = {
-            'ul': ListStyle.UNORDERED,
-            'ol': ListStyle.ORDERED,
-            'dl': ListStyle.DEFINITION
-        }
-        
-        list_block = HList(style=style_map[tag])
-        block_handler.add_block_to_document_or_parent(list_block, document)
-        
-        block_handler.block_stack.append(list_block)
-        self.list_stack.append(list_block)
-        block_handler.current_block = list_block
-        block_handler.current_paragraph = None
-        self.text_processor.set_current_paragraph(None)
-    
-    def handle_list_item_start(self, block_handler: BlockElementHandler):
-        """Handle the start of a list item."""
-        if not self.list_stack:
-            return
-        
-        self.text_processor.flush_text()
-        list_item = ListItem()
-        
-        current_list = self.list_stack[-1]
-        current_list.add_item(list_item)
-        
-        block_handler.block_stack.append(list_item)
-        block_handler.current_block = list_item
-        
-        # Create a paragraph for the list item content
-        item_para = Paragraph()
-        list_item.add_block(item_para)
-        block_handler.current_paragraph = item_para
-        self.text_processor.set_current_paragraph(item_para)
-    
-    def handle_definition_start(self, tag: str, block_handler: BlockElementHandler):
-        """Handle the start of definition terms or descriptions."""
-        if not self.list_stack or self.list_stack[-1].style != ListStyle.DEFINITION:
-            return
-        
-        self.text_processor.flush_text()
-        current_list = self.list_stack[-1]
-        
-        if tag == 'dt':
-            list_item = ListItem(term="")
-            current_list.add_item(list_item)
-            block_handler.block_stack.append(list_item)
-            block_handler.current_block = list_item
-            
-            term_para = Paragraph()
-            list_item.add_block(term_para)
-            block_handler.current_paragraph = term_para
-            self.text_processor.set_current_paragraph(term_para)
-        
-        elif tag == 'dd':
-            if current_list._items:
-                list_item = current_list._items[-1]
-                desc_para = Paragraph()
-                list_item.add_block(desc_para)
-                block_handler.current_paragraph = desc_para
-                self.text_processor.set_current_paragraph(desc_para)
-    
-    def handle_list_end(self, block_handler: BlockElementHandler):
-        """Handle the end of a list."""
-        if block_handler.block_stack:
-            block_handler.block_stack.pop()
-        if self.list_stack:
-            self.list_stack.pop()
-        
-        if block_handler.block_stack:
-            block_handler.current_block = block_handler.block_stack[-1]
-        else:
-            block_handler.current_block = None
-        
-        block_handler.current_paragraph = None
-        self.text_processor.set_current_paragraph(None)
-    
-    def handle_list_item_end(self, block_handler: BlockElementHandler):
-        """Handle the end of a list item."""
-        if block_handler.block_stack:
-            block_handler.block_stack.pop()
-        
-        if block_handler.block_stack:
-            block_handler.current_block = block_handler.block_stack[-1]
-        else:
-            block_handler.current_block = None
-        
-        block_handler.current_paragraph = None
-        self.text_processor.set_current_paragraph(None)
-
-
-class TableElementHandler:
-    """Handles table-related HTML elements (table, tr, td, th, thead, tbody, tfoot)."""
-    
-    def __init__(self, text_processor: HTMLTextProcessor):
-        self.text_processor = text_processor
-        self.table_stack: List[Table] = []
-        self.current_table_row: Optional[TableRow] = None
-        self.current_table_section = "body"
-    
-    def reset(self):
-        """Reset the handler state."""
-        self.table_stack = []
-        self.current_table_row = None
-        self.current_table_section = "body"
-    
-    def handle_table_start(self, attrs: Dict[str, str], block_handler: BlockElementHandler, document: Document):
-        """Handle the start of a table element."""
-        self.text_processor.flush_text()
-        
-        caption = attrs.get('summary')
-        table = Table(caption=caption)
-        
-        block_handler.add_block_to_document_or_parent(table, document)
-        block_handler.block_stack.append(table)
-        self.table_stack.append(table)
-        block_handler.current_block = table
-        block_handler.current_paragraph = None
-        self.text_processor.set_current_paragraph(None)
-    
-    def handle_table_section_start(self, tag: str):
-        """Handle the start of a table section."""
-        self.current_table_section = tag
-    
-    def handle_table_row_start(self):
-        """Handle the start of a table row."""
-        if not self.table_stack:
-            return
-        
-        self.text_processor.flush_text()
-        row = TableRow()
-        
-        current_table = self.table_stack[-1]
-        section = self.current_table_section
-        
-        if section == 'thead':
-            section = "header"
-        elif section == 'tfoot':
-            section = "footer"
-        else:
-            section = "body"
-        
-        current_table.add_row(row, section=section)
-        self.current_table_row = row
-    
-    def handle_table_cell_start(self, tag: str, attrs: Dict[str, str], block_handler: BlockElementHandler):
-        """Handle the start of a table cell."""
-        if not self.current_table_row:
-            return
-        
-        self.text_processor.flush_text()
-        
-        # Parse attributes
-        try:
-            colspan = int(attrs.get('colspan', 1))
-            rowspan = int(attrs.get('rowspan', 1))
-        except ValueError:
-            colspan, rowspan = 1, 1
-        
-        is_header = (tag == 'th')
-        
-        cell = TableCell(is_header=is_header, colspan=colspan, rowspan=rowspan)
-        self.current_table_row.add_cell(cell)
-        
-        block_handler.block_stack.append(cell)
-        block_handler.current_block = cell
-        
-        # Create a paragraph for the cell content
-        cell_para = Paragraph()
-        cell.add_block(cell_para)
-        block_handler.current_paragraph = cell_para
-        self.text_processor.set_current_paragraph(cell_para)
-    
-    def handle_table_end(self, block_handler: BlockElementHandler):
-        """Handle the end of a table."""
-        if block_handler.block_stack:
-            block_handler.block_stack.pop()
-        if self.table_stack:
-            self.table_stack.pop()
-        
-        if block_handler.block_stack:
-            block_handler.current_block = block_handler.block_stack[-1]
-        else:
-            block_handler.current_block = None
-        
-        block_handler.current_paragraph = None
-        self.text_processor.set_current_paragraph(None)
-        self.current_table_row = None
-        self.current_table_section = "body"
-    
-    def handle_table_section_end(self):
-        """Handle the end of a table section."""
-        self.current_table_section = "body"
-    
-    def handle_table_row_end(self):
-        """Handle the end of a table row."""
-        self.current_table_row = None
-    
-    def handle_table_cell_end(self, block_handler: BlockElementHandler):
-        """Handle the end of a table cell."""
-        if block_handler.block_stack:
-            block_handler.block_stack.pop()
-        
-        if block_handler.block_stack:
-            block_handler.current_block = block_handler.block_stack[-1]
-        else:
-            block_handler.current_block = None
-        
-        block_handler.current_paragraph = None
-        self.text_processor.set_current_paragraph(None)
-
-
-class InlineElementHandler:
-    """Handles inline and special HTML elements (a, img, br, hr)."""
-    
-    def __init__(self, text_processor: HTMLTextProcessor, base_url: Optional[str] = None):
-        self.text_processor = text_processor
-        self.base_url = base_url
-        self.in_link = False
-        self.current_link: Optional[Link] = None
-    
-    def reset(self):
-        """Reset the handler state."""
-        self.in_link = False
-        self.current_link = None
-    
-    def set_base_url(self, base_url: Optional[str]):
-        """Set the base URL for resolving relative links."""
-        self.base_url = base_url
-    
-    def handle_link_start(self, attrs: Dict[str, str]):
-        """Handle the start of a link element."""
-        self.text_processor.flush_text()
-        
-        href = attrs.get('href', '')
-        title = attrs.get('title', '')
-        
-        # Determine link type
-        link_type = LinkType.INTERNAL
-        if href.startswith('http://') or href.startswith('https://'):
-            link_type = LinkType.EXTERNAL
-        elif href.startswith('javascript:'):
-            link_type = LinkType.FUNCTION
-        elif href.startswith('api:'):
-            link_type = LinkType.API
-            href = href[4:]
-        
-        # Resolve relative URLs
-        if self.base_url and not href.startswith(('http://', 'https://', 'javascript:', 'api:', '#')):
-            href = urllib.parse.urljoin(self.base_url, href)
-        
-        self.current_link = Link(
-            location=href,
-            link_type=link_type,
-            title=title if title else None
-        )
-        
-        self.in_link = True
-    
-    def handle_link_end(self):
-        """Handle the end of a link element."""
-        self.in_link = False
-        self.current_link = None
-    
-    def handle_image(self, attrs: Dict[str, str], block_handler: BlockElementHandler, document: Document):
-        """Handle an image element."""
-        src = attrs.get('src', '')
-        alt = attrs.get('alt', '')
-        
-        # Parse dimensions
-        width = height = None
-        try:
-            if 'width' in attrs:
-                width = int(attrs['width'])
-            if 'height' in attrs:
-                height = int(attrs['height'])
-        except ValueError:
-            pass
-        
-        # Resolve relative URLs
-        if self.base_url and not src.startswith(('http://', 'https://')):
-            src = urllib.parse.urljoin(self.base_url, src)
-        
-        image = Image(source=src, alt_text=alt, width=width, height=height)
-        block_handler.add_block_to_document_or_parent(image, document)
-    
-    def handle_line_break(self, block_handler: BlockElementHandler):
-        """Handle a line break element."""
-        if block_handler.current_paragraph:
-            line_break = LineBreak()
-            if hasattr(block_handler.current_paragraph, 'add_block'):
-                block_handler.current_paragraph.add_block(line_break)
-        self.text_processor.flush_text()
-    
-    def handle_horizontal_rule(self, block_handler: BlockElementHandler, document: Document):
-        """Handle a horizontal rule element."""
-        self.text_processor.flush_text()
-        hr = HorizontalRule()
-        block_handler.add_block_to_document_or_parent(hr, document)
--- a/pyWebLayout/io/readers/html_extraction.py
+++ b/pyWebLayout/io/readers/html_extraction.py
@ -12,7 +12,8 @@ from bs4 import BeautifulSoup, Tag, NavigableString
 from pyWebLayout.abstract.inline import Word, FormattedSpan
 from pyWebLayout.abstract.block import (
    Block, Paragraph, Heading, HeadingLevel, Quote, CodeBlock, 
-    HList, ListItem, ListStyle, Table, TableRow, TableCell
+    HList, ListItem, ListStyle, Table, TableRow, TableCell,
+    HorizontalRule, Image
 )
 from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration

@ -576,11 +577,9 @@ def table_header_cell_handler(element: Tag, context: StyleContext) -> TableCell:
    return cell


-def horizontal_rule_handler(element: Tag, context: StyleContext) -> Block:
+def horizontal_rule_handler(element: Tag, context: StyleContext) -> HorizontalRule:
    """Handle <hr> elements."""
-    # TODO: Create a specific HorizontalRule block type
-    # For now, return an empty paragraph
-    return Paragraph(context.font)
+    return HorizontalRule()


 def line_break_handler(element: Tag, context: StyleContext) -> None:
@ -589,18 +588,22 @@ def line_break_handler(element: Tag, context: StyleContext) -> None:
    return None


-def image_handler(element: Tag, context: StyleContext) -> Block:
+def image_handler(element: Tag, context: StyleContext) -> Image:
    """Handle <img> elements."""
-    # TODO: Create Image block type
-    # For now, return empty paragraph with alt text if available
-    paragraph = Paragraph(context.font)
+    src = context.element_attributes.get('src', '')
    alt_text = context.element_attributes.get('alt', '')
-    if alt_text:
-        words = alt_text.split()
-        for word_text in words:
-            if word_text:
-                paragraph.add_word(Word(word_text, context.font))
-    return paragraph
+    
+    # Parse dimensions if provided
+    width = height = None
+    try:
+        if 'width' in context.element_attributes:
+            width = int(context.element_attributes['width'])
+        if 'height' in context.element_attributes:
+            height = int(context.element_attributes['height'])
+    except ValueError:
+        pass
+    
+    return Image(source=src, alt_text=alt_text, width=width, height=height)


 def ignore_handler(element: Tag, context: StyleContext) -> None:
--- a/pyWebLayout/io/readers/html_style.py
+++ b/pyWebLayout/io/readers/html_style.py
@ -1,281 +0,0 @@
-"""
-HTML style management for pyWebLayout.
-
-This module provides specialized functionality for handling CSS styles,
-style stacks, and style parsing in HTML documents.
-"""
-
-from typing import Dict, List, Any, Optional, Tuple
-import re
-from pyWebLayout.style import Font, FontStyle, FontWeight, TextDecoration
-
-
-class HTMLStyleManager:
-    """
-    Manages CSS styles and style stacks during HTML parsing.
-    
-    This class handles style parsing, style inheritance, and maintains
-    the style stack for proper style nesting.
-    """
-    
-    def __init__(self):
-        """Initialize the style manager."""
-        self._style_stack: List[Dict[str, Any]] = []
-        self._current_style = self._get_default_style()
-    
-    def _get_default_style(self) -> Dict[str, Any]:
-        """Get the default style settings."""
-        return {
-            'font_size': 12,
-            'font_weight': FontWeight.NORMAL,
-            'font_style': FontStyle.NORMAL,
-            'decoration': TextDecoration.NONE,
-            'color': (0, 0, 0),
-            'background': None,
-            'language': 'en_US'
-        }
-    
-    def reset(self):
-        """Reset the style manager to initial state."""
-        self._style_stack = []
-        self._current_style = self._get_default_style()
-    
-    def push_style(self, style: Dict[str, Any]):
-        """
-        Push a new style onto the style stack.
-        
-        Args:
-            style: The style to push
-        """
-        # Save the current style
-        self._style_stack.append(self._current_style.copy())
-        
-        # Apply the new style
-        for key, value in style.items():
-            self._current_style[key] = value
-    
-    def pop_style(self):
-        """Pop a style from the style stack."""
-        if self._style_stack:
-            self._current_style = self._style_stack.pop()
-    
-    def get_current_style(self) -> Dict[str, Any]:
-        """Get the current style."""
-        return self._current_style.copy()
-    
-    def get_tag_style(self, tag: str) -> Dict[str, Any]:
-        """
-        Get the default style for a tag.
-        
-        Args:
-            tag: The tag name
-            
-        Returns:
-            A dictionary of style properties
-        """
-        tag_styles = {
-            'h1': {'font_size': 24, 'font_weight': FontWeight.BOLD},
-            'h2': {'font_size': 20, 'font_weight': FontWeight.BOLD},
-            'h3': {'font_size': 18, 'font_weight': FontWeight.BOLD},
-            'h4': {'font_size': 16, 'font_weight': FontWeight.BOLD},
-            'h5': {'font_size': 14, 'font_weight': FontWeight.BOLD},
-            'h6': {'font_size': 12, 'font_weight': FontWeight.BOLD},
-            'b': {'font_weight': FontWeight.BOLD},
-            'strong': {'font_weight': FontWeight.BOLD},
-            'i': {'font_style': FontStyle.ITALIC},
-            'em': {'font_style': FontStyle.ITALIC},
-            'u': {'decoration': TextDecoration.UNDERLINE},
-            'a': {'decoration': TextDecoration.UNDERLINE, 'color': (0, 0, 255)},
-            'code': {'font_family': 'monospace', 'background': (240, 240, 240, 255)},
-            'pre': {'font_family': 'monospace'},
-        }
-        
-        return tag_styles.get(tag, {})
-    
-    def create_font(self) -> Font:
-        """
-        Create a Font object from the current style.
-        
-        Returns:
-            Font: A font object with the current style settings
-        """
-        return Font(
-            font_size=self._current_style['font_size'],
-            colour=self._current_style['color'],
-            weight=self._current_style['font_weight'],
-            style=self._current_style['font_style'],
-            decoration=self._current_style['decoration'],
-            background=self._current_style['background'],
-            langauge=self._current_style['language']
-        )
-    
-    def parse_inline_style(self, style_str: str) -> Dict[str, Any]:
-        """
-        Parse inline CSS style string.
-        
-        Args:
-            style_str: CSS style string
-            
-        Returns:
-            Dictionary of style properties
-        """
-        if not style_str:
-            return {}
-        
-        style_dict = {}
-        declarations = [d.strip() for d in style_str.split(';') if d.strip()]
-        
-        for declaration in declarations:
-            parts = declaration.split(':', 1)
-            if len(parts) != 2:
-                continue
-            
-            prop = parts[0].strip().lower()
-            value = parts[1].strip()
-            
-            # Handle specific properties
-            if prop == 'font-size':
-                if value.endswith('px'):
-                    try:
-                        size = int(value[:-2])
-                        style_dict['font_size'] = size
-                    except ValueError:
-                        pass
-                elif value.endswith('pt'):
-                    try:
-                        size = int(value[:-2])
-                        style_dict['font_size'] = size
-                    except ValueError:
-                        pass
-            elif prop == 'font-weight':
-                if value == 'bold':
-                    style_dict['font_weight'] = FontWeight.BOLD
-                elif value == 'normal':
-                    style_dict['font_weight'] = FontWeight.NORMAL
-            elif prop == 'font-style':
-                if value == 'italic':
-                    style_dict['font_style'] = FontStyle.ITALIC
-                elif value == 'normal':
-                    style_dict['font_style'] = FontStyle.NORMAL
-            elif prop == 'text-decoration':
-                if value == 'underline':
-                    style_dict['decoration'] = TextDecoration.UNDERLINE
-                elif value == 'line-through':
-                    style_dict['decoration'] = TextDecoration.STRIKETHROUGH
-                elif value == 'none':
-                    style_dict['decoration'] = TextDecoration.NONE
-            elif prop == 'color':
-                color = self.parse_color(value)
-                if color:
-                    style_dict['color'] = color
-            elif prop == 'background-color':
-                color = self.parse_color(value)
-                if color:
-                    style_dict['background'] = color + (255,)
-        
-        return style_dict
-    
-    def parse_color(self, color_str: str) -> Optional[Tuple[int, int, int]]:
-        """
-        Parse a CSS color string.
-        
-        Args:
-            color_str: CSS color string
-            
-        Returns:
-            RGB tuple or None if parsing fails
-        """
-        # Named colors
-        color_map = {
-            'black': (0, 0, 0),
-            'white': (255, 255, 255),
-            'red': (255, 0, 0),
-            'green': (0, 128, 0),
-            'blue': (0, 0, 255),
-            'yellow': (255, 255, 0),
-            'cyan': (0, 255, 255),
-            'magenta': (255, 0, 255),
-            'gray': (128, 128, 128),
-            'grey': (128, 128, 128),
-            'silver': (192, 192, 192),
-            'maroon': (128, 0, 0),
-            'olive': (128, 128, 0),
-            'navy': (0, 0, 128),
-            'purple': (128, 0, 128),
-            'teal': (0, 128, 128),
-            'lime': (0, 255, 0),
-            'aqua': (0, 255, 255),
-            'fuchsia': (255, 0, 255),
-        }
-        
-        # Check for named color
-        color_str = color_str.lower().strip()
-        if color_str in color_map:
-            return color_map[color_str]
-        
-        # Check for hex color
-        if color_str.startswith('#'):
-            try:
-                if len(color_str) == 4:  # #RGB
-                    r = int(color_str[1] + color_str[1], 16)
-                    g = int(color_str[2] + color_str[2], 16)
-                    b = int(color_str[3] + color_str[3], 16)
-                    return (r, g, b)
-                elif len(color_str) == 7:  # #RRGGBB
-                    r = int(color_str[1:3], 16)
-                    g = int(color_str[3:5], 16)
-                    b = int(color_str[5:7], 16)
-                    return (r, g, b)
-            except ValueError:
-                pass
-        
-        # Check for rgb() color
-        rgb_match = re.match(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', color_str)
-        if rgb_match:
-            try:
-                r_val = int(rgb_match.group(1))
-                g_val = int(rgb_match.group(2))
-                b_val = int(rgb_match.group(3))
-                
-                # Check if values are in valid range (0-255)
-                if r_val > 255 or g_val > 255 or b_val > 255 or r_val < 0 or g_val < 0 or b_val < 0:
-                    return None  # Invalid color values
-                
-                return (r_val, g_val, b_val)
-            except ValueError:
-                pass
-        
-        # Check for rgba() color (ignore alpha)
-        rgba_match = re.match(r'rgba\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*[\d.]+\s*\)', color_str)
-        if rgba_match:
-            try:
-                r = min(255, max(0, int(rgba_match.group(1))))
-                g = min(255, max(0, int(rgba_match.group(2))))
-                b = min(255, max(0, int(rgba_match.group(3))))
-                return (r, g, b)
-            except ValueError:
-                pass
-        
-        # Failed to parse color
-        return None
-    
-    def apply_style_to_element(self, tag: str, attrs: Dict[str, str]) -> Dict[str, Any]:
-        """
-        Apply combined styles (tag defaults + inline styles) for an element.
-        
-        Args:
-            tag: The HTML tag name
-            attrs: Dictionary of tag attributes
-            
-        Returns:
-            Combined style dictionary
-        """
-        # Start with tag-specific styles
-        style = self.get_tag_style(tag)
-        
-        # Override with inline styles if present
-        if 'style' in attrs:
-            inline_style = self.parse_inline_style(attrs['style'])
-            style.update(inline_style)
-        
-        return style
--- a/pyWebLayout/io/readers/html_text.py
+++ b/pyWebLayout/io/readers/html_text.py
@ -1,163 +0,0 @@
-"""
-HTML text processing for pyWebLayout.
-
-This module provides specialized functionality for handling text content,
-entity references, and word creation in HTML documents.
-"""
-
-from typing import Optional
-from pyWebLayout.abstract.inline import Word
-from pyWebLayout.abstract.block import Paragraph
-from pyWebLayout.io.readers.html_style import HTMLStyleManager
-
-
-class HTMLTextProcessor:
-    """
-    Processes text content during HTML parsing.
-    
-    This class handles text buffering, entity resolution, and word creation
-    with proper styling applied.
-    """
-    
-    def __init__(self, style_manager: HTMLStyleManager):
-        """
-        Initialize the text processor.
-        
-        Args:
-            style_manager: The style manager for creating styled words
-        """
-        self._style_manager = style_manager
-        self._text_buffer = ""
-        self._current_paragraph: Optional[Paragraph] = None
-    
-    def reset(self):
-        """Reset the text processor state."""
-        self._text_buffer = ""
-        self._current_paragraph = None
-    
-    def set_current_paragraph(self, paragraph: Optional[Paragraph]):
-        """
-        Set the current paragraph for text output.
-        
-        Args:
-            paragraph: The paragraph to receive text, or None
-        """
-        self._current_paragraph = paragraph
-    
-    def add_text(self, text: str):
-        """
-        Add text to the buffer.
-        
-        Args:
-            text: The text to add
-        """
-        self._text_buffer += text
-    
-    def add_entity_reference(self, name: str):
-        """
-        Add an HTML entity reference to the buffer.
-        
-        Args:
-            name: The entity name (e.g., 'lt', 'gt', 'amp')
-        """
-        # Map common entity references to characters
-        entities = {
-            'lt': '<',
-            'gt': '>',
-            'amp': '&',
-            'quot': '"',
-            'apos': "'",
-            'nbsp': ' ',
-            'copy': '©',
-            'reg': '®',
-            'trade': '™',
-            'mdash': '—',
-            'ndash': '–',
-            'hellip': '…',
-            'laquo': '«',
-            'raquo': '»',
-            'ldquo': '"',
-            'rdquo': '"',
-            'lsquo': ''',
-            'rsquo': ''',
-            'deg': '°',
-            'plusmn': '±',
-            'times': '×',
-            'divide': '÷',
-            'euro': '€',
-            'pound': '£',
-            'yen': '¥',
-        }
-        
-        char = entities.get(name, f'&{name};')
-        self._text_buffer += char
-    
-    def add_character_reference(self, name: str):
-        """
-        Add a character reference to the buffer.
-        
-        Args:
-            name: The character reference (decimal or hex)
-        """
-        try:
-            if name.startswith('x'):
-                # Hexadecimal reference
-                char = chr(int(name[1:], 16))
-            else:
-                # Decimal reference
-                char = chr(int(name))
-            self._text_buffer += char
-        except (ValueError, OverflowError):
-            # Invalid character reference
-            self._text_buffer += f'&#{name};'
-    
-    def flush_text(self) -> bool:
-        """
-        Flush the text buffer, creating words as needed.
-        
-        Returns:
-            True if text was flushed, False if buffer was empty
-        """
-        if not self._text_buffer or not self._current_paragraph:
-            self._text_buffer = ""
-            return False
-        
-        # Clean up the text
-        text = self._text_buffer.strip()
-        if not text:
-            self._text_buffer = ""
-            return False
-        
-        # Create words from the text
-        words = text.split()
-        for word_text in words:
-            if word_text:
-                font = self._style_manager.create_font()
-                word = Word(word_text, font)
-                self._current_paragraph.add_word(word)
-        
-        # Reset text buffer
-        self._text_buffer = ""
-        return True
-    
-    def has_pending_text(self) -> bool:
-        """
-        Check if there is pending text in the buffer.
-        
-        Returns:
-            True if there is text waiting to be flushed
-        """
-        return bool(self._text_buffer.strip())
-    
-    def get_buffer_content(self) -> str:
-        """
-        Get the current buffer content without flushing.
-        
-        Returns:
-            The current text buffer content
-        """
-        return self._text_buffer
-    
-    def clear_buffer(self):
-        """Clear the text buffer without creating words."""
-        self._text_buffer = ""
--- a/pyWebLayout/style/fonts.py
+++ b/pyWebLayout/style/fonts.py
@ -34,7 +34,7 @@ class Font:
                 style: FontStyle = FontStyle.NORMAL,
                 decoration: TextDecoration = TextDecoration.NONE,
                 background: Optional[Tuple[int, int, int, int]] = None,
-                 langauge = "en_EN"):
+                 language = "en_EN"):
        """
        Initialize a Font object with the specified properties.
        
@ -46,6 +46,7 @@ class Font:
            style: Font style (normal or italic).
            decoration: Text decoration (none, underline, or strikethrough).
            background: RGBA background color for the text. If None, transparent background.
+            language: Language code for hyphenation and text processing.
        """
        self._font_path = font_path
        self._font_size = font_size
@ -54,7 +55,7 @@ class Font:
        self._style = style
        self._decoration = decoration
        self._background = background if background else (255, 255, 255, 0)
-        self.language = langauge
+        self.language = language
        # Load the font file or use default
        self._load_font()
    
--- a/tests/test_html_content.py
+++ b/tests/test_html_content.py
@ -1,354 +0,0 @@
-"""
-Unit tests for HTML content reading.
-
-Tests the HTMLContentReader class for parsing complete HTML documents.
-This is more of an integration test covering the entire parsing pipeline.
-"""
-
-import unittest
-from pyWebLayout.io.readers.html_content import HTMLContentReader
-from pyWebLayout.abstract.document import Document
-from pyWebLayout.abstract.block import (
-    Paragraph, Heading, HeadingLevel, HList, ListStyle, 
-    Table, Quote, CodeBlock, HorizontalRule
-)
-from pyWebLayout.abstract.inline import LineBreak
-
-class TestHTMLContentReader(unittest.TestCase):
-    """Test cases for HTMLContentReader."""
-    
-    def setUp(self):
-        """Set up test fixtures."""
-        self.reader = HTMLContentReader()
-        self.document = Document()
-    
-    def test_simple_paragraph(self):
-        """Test parsing a simple paragraph."""
-        html = '<p>Hello world!</p>'
-        
-        result = self.reader.extract_content(html, self.document)
-        
-        self.assertEqual(len(self.document.blocks), 1)
-        self.assertIsInstance(self.document.blocks[0], Paragraph)
-        
-        paragraph = self.document.blocks[0]
-        words = list(paragraph.words())
-        self.assertEqual(len(words), 2)
-        self.assertEqual(words[0][1].text, "Hello")
-        self.assertEqual(words[1][1].text, "world!")
-    
-    def test_headings(self):
-        """Test parsing different heading levels."""
-        html = '''
-        <h1>Heading 1</h1>
-        <h2>Heading 2</h2>
-        <h3>Heading 3</h3>
-        <h6>Heading 6</h6>
-        '''
-        
-        self.reader.extract_content(html, self.document)
-        
-        # Should have 4 heading blocks
-        headings = [block for block in self.document.blocks if isinstance(block, Heading)]
-        self.assertEqual(len(headings), 4)
-        
-        # Check heading levels
-        self.assertEqual(headings[0].level, HeadingLevel.H1)
-        self.assertEqual(headings[1].level, HeadingLevel.H2)
-        self.assertEqual(headings[2].level, HeadingLevel.H3)
-        self.assertEqual(headings[3].level, HeadingLevel.H6)
-        
-        # Check text content
-        h1_words = list(headings[0].words())
-        self.assertEqual(len(h1_words), 2)
-        self.assertEqual(h1_words[0][1].text, "Heading")
-        self.assertEqual(h1_words[1][1].text, "1")
-    
-    def test_styled_text(self):
-        """Test parsing text with inline styling."""
-        html = '<p>This is <b>bold</b> and <i>italic</i> text.</p>'
-        
-        self.reader.extract_content(html, self.document)
-        
-        self.assertEqual(len(self.document.blocks), 1)
-        paragraph = self.document.blocks[0]
-        words = list(paragraph.words())
-        
-        # Should have words: "This", "is", "bold", "and", "italic", "text."
-        self.assertEqual(len(words), 6)
-        
-        # The styling information is embedded in the Font objects
-        # We can't easily test the exact styling without more complex setup
-        # but we can verify the words are created correctly
-        word_texts = [word[1].text for word in words]
-        self.assertEqual(word_texts, ["This", "is", "bold", "and", "italic", "text."])
-    
-    def test_unordered_list(self):
-        """Test parsing unordered lists."""
-        html = '''
-        <ul>
-            <li>First item</li>
-            <li>Second item</li>
-            <li>Third item</li>
-        </ul>
-        '''
-        
-        self.reader.extract_content(html, self.document)
-        
-        self.assertEqual(len(self.document.blocks), 1)
-        self.assertIsInstance(self.document.blocks[0], HList)
-        
-        list_block = self.document.blocks[0]
-        self.assertEqual(list_block.style, ListStyle.UNORDERED)
-        
-        items = list(list_block.items())
-        self.assertEqual(len(items), 3)
-        
-        # Check first item content
-        first_item_blocks = list(items[0].blocks())
-        self.assertEqual(len(first_item_blocks), 1)
-        self.assertIsInstance(first_item_blocks[0], Paragraph)
-    
-    def test_ordered_list(self):
-        """Test parsing ordered lists."""
-        html = '''
-        <ol>
-            <li>First step</li>
-            <li>Second step</li>
-        </ol>
-        '''
-        
-        self.reader.extract_content(html, self.document)
-        
-        self.assertEqual(len(self.document.blocks), 1)
-        list_block = self.document.blocks[0]
-        self.assertEqual(list_block.style, ListStyle.ORDERED)
-        
-        items = list(list_block.items())
-        self.assertEqual(len(items), 2)
-    
-    def test_definition_list(self):
-        """Test parsing definition lists."""
-        html = '''
-        <dl>
-            <dt>Term 1</dt>
-            <dd>Definition 1</dd>
-            <dt>Term 2</dt>
-            <dd>Definition 2</dd>
-        </dl>
-        '''
-        
-        self.reader.extract_content(html, self.document)
-        
-        self.assertEqual(len(self.document.blocks), 1)
-        list_block = self.document.blocks[0]
-        self.assertEqual(list_block.style, ListStyle.DEFINITION)
-        
-        items = list(list_block.items())
-        self.assertEqual(len(items), 2)  # Two dt/dd pairs
-    
-    def test_table(self):
-        """Test parsing simple tables."""
-        html = '''
-        <table>
-            <tr>
-                <th>Header 1</th>
-                <th>Header 2</th>
-            </tr>
-            <tr>
-                <td>Cell 1</td>
-                <td>Cell 2</td>
-            </tr>
-        </table>
-        '''
-        
-        self.reader.extract_content(html, self.document)
-        
-        self.assertEqual(len(self.document.blocks), 1)
-        self.assertIsInstance(self.document.blocks[0], Table)
-        
-        table = self.document.blocks[0]
-        
-        # Check body rows
-        body_rows = list(table.body_rows())
-        self.assertEqual(len(body_rows), 2)  # Header row + data row
-        
-        # Check first row (header)
-        first_row_cells = list(body_rows[0].cells())
-        self.assertEqual(len(first_row_cells), 2)
-        self.assertTrue(first_row_cells[0].is_header)
-        self.assertTrue(first_row_cells[1].is_header)
-        
-        # Check second row (data)
-        second_row_cells = list(body_rows[1].cells())
-        self.assertEqual(len(second_row_cells), 2)
-        self.assertFalse(second_row_cells[0].is_header)
-        self.assertFalse(second_row_cells[1].is_header)
-    
-    def test_blockquote(self):
-        """Test parsing blockquotes."""
-        html = '''
-        <blockquote>
-            <p>This is a quoted paragraph.</p>
-            <p>Another quoted paragraph.</p>
-        </blockquote>
-        '''
-        
-        self.reader.extract_content(html, self.document)
-        
-        self.assertEqual(len(self.document.blocks), 1)
-        self.assertIsInstance(self.document.blocks[0], Quote)
-        
-        quote = self.document.blocks[0]
-        quote_blocks = list(quote.blocks())
-        self.assertEqual(len(quote_blocks), 2)
-        self.assertIsInstance(quote_blocks[0], Paragraph)
-        self.assertIsInstance(quote_blocks[1], Paragraph)
-    
-    def test_code_block(self):
-        """Test parsing code blocks."""
-        html = '''
-        <pre><code class="language-python">
-def hello():
-    print("Hello, world!")
-        </code></pre>
-        '''
-        
-        self.reader.extract_content(html, self.document)
-        
-        self.assertEqual(len(self.document.blocks), 1)
-        self.assertIsInstance(self.document.blocks[0], CodeBlock)
-        
-        code_block = self.document.blocks[0]
-        self.assertEqual(code_block.language, "python")
-    
-    def test_horizontal_rule(self):
-        """Test parsing horizontal rules."""
-        html = '<p>Before</p><hr><p>After</p>'
-        
-        self.reader.extract_content(html, self.document)
-        
-        self.assertEqual(len(self.document.blocks), 3)
-        self.assertIsInstance(self.document.blocks[0], Paragraph)
-        self.assertIsInstance(self.document.blocks[1], HorizontalRule)
-        self.assertIsInstance(self.document.blocks[2], Paragraph)
-    
-    def test_html_entities(self):
-        """Test handling HTML entities."""
-        html = '<p>Less than: &lt; Greater than: &gt; Ampersand: &amp;</p>'
-        
-        self.reader.extract_content(html, self.document)
-        
-        paragraph = self.document.blocks[0]
-        words = list(paragraph.words())
-        
-        # Find the entity words
-        word_texts = [word[1].text for word in words]
-        self.assertIn('<', word_texts)
-        self.assertIn('>', word_texts)
-        self.assertIn('&', word_texts)
-    
-    def test_nested_elements(self):
-        """Test parsing nested HTML elements."""
-        html = '''
-        <div>
-            <h2>Section Title</h2>
-            <p>Section content with <strong>important</strong> text.</p>
-            <ul>
-                <li>List item 1</li>
-                <li>List item 2</li>
-            </ul>
-        </div>
-        '''
-        
-        self.reader.extract_content(html, self.document)
-        
-        # Should have multiple blocks
-        self.assertGreater(len(self.document.blocks), 1)
-        
-        # Check that we have different types of blocks
-        block_types = [type(block).__name__ for block in self.document.blocks]
-        self.assertIn('Paragraph', block_types)  # From div
-        self.assertIn('Heading', block_types)
-        self.assertIn('HList', block_types)
-    
-    def test_empty_elements(self):
-        """Test handling empty HTML elements."""
-        html = '<p></p><div></div><ul></ul>'
-        
-        self.reader.extract_content(html, self.document)
-        
-        # Empty elements should still create blocks
-        self.assertEqual(len(self.document.blocks), 3)
-    
-    def test_whitespace_handling(self):
-        """Test proper whitespace handling."""
-        html = '''
-        <p>  Word1    Word2  
-        Word3   </p>
-        '''
-        
-        self.reader.extract_content(html, self.document)
-        
-        paragraph = self.document.blocks[0]
-        words = list(paragraph.words())
-        
-        # Should normalize whitespace and create separate words
-        word_texts = [word[1].text for word in words]
-        self.assertEqual(word_texts, ["Word1", "Word2", "Word3"])
-    
-    def test_base_url_setting(self):
-        """Test setting base URL for link resolution."""
-        base_url = "https://example.com/path/"
-        self.reader.set_base_url(base_url)
-        
-        # The base URL should be passed to the inline handler
-        self.assertEqual(self.reader.inline_handler.base_url, base_url)
-    
-    def test_complex_document(self):
-        """Test parsing a complex HTML document."""
-        html = '''
-        <!DOCTYPE html>
-        <html>
-        <head>
-            <title>Test Document</title>
-            <style>body { font-family: Arial; }</style>
-        </head>
-        <body>
-            <h1>Main Title</h1>
-            <p>Introduction paragraph with <em>emphasis</em>.</p>
-            
-            <h2>Section 1</h2>
-            <p>Content with <a href="link.html">a link</a>.</p>
-            
-            <ul>
-                <li>Item 1</li>
-                <li>Item 2 with <strong>bold text</strong></li>
-            </ul>
-            
-            <h2>Section 2</h2>
-            <blockquote>
-                <p>A quoted paragraph.</p>
-            </blockquote>
-            
-            <table>
-                <tr><th>Col1</th><th>Col2</th></tr>
-                <tr><td>A</td><td>B</td></tr>
-            </table>
-        </body>
-        </html>
-        '''
-        
-        self.reader.extract_content(html, self.document)
-        
-        # Should have parsed multiple blocks
-        self.assertGreater(len(self.document.blocks), 5)
-        
-        # Should have different types of content
-        block_types = set(type(block).__name__ for block in self.document.blocks)
-        expected_types = {'Heading', 'Paragraph', 'HList', 'Quote', 'Table'}
-        self.assertTrue(expected_types.issubset(block_types))
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/tests/test_html_style.py
+++ b/tests/test_html_style.py
@ -1,181 +1,181 @@
 """
-Unit tests for HTML style management.
+Unit tests for pyWebLayout style objects.

-Tests the HTMLStyleManager class for CSS parsing, style stacks, and font creation.
+Tests the Font class and style enums for proper functionality and immutability.
 """

 import unittest
-from pyWebLayout.io.readers.html_style import HTMLStyleManager
-from pyWebLayout.style import FontStyle, FontWeight, TextDecoration
+from pyWebLayout.style import Font, FontStyle, FontWeight, TextDecoration, Alignment


-class TestHTMLStyleManager(unittest.TestCase):
-    """Test cases for HTMLStyleManager."""
+class TestStyleObjects(unittest.TestCase):
+    """Test cases for pyWebLayout style objects."""
    
-    def setUp(self):
-        """Set up test fixtures."""
-        self.style_manager = HTMLStyleManager()
+    def test_font_weight_enum(self):
+        """Test FontWeight enum values."""
+        self.assertEqual(FontWeight.NORMAL.value, "normal")
+        self.assertEqual(FontWeight.BOLD.value, "bold")
+        
+        # Test that all expected values exist
+        weights = [FontWeight.NORMAL, FontWeight.BOLD]
+        self.assertEqual(len(weights), 2)
    
-    def test_initialization(self):
-        """Test proper initialization of style manager."""
-        style = self.style_manager.get_current_style()
+    def test_font_style_enum(self):
+        """Test FontStyle enum values."""
+        self.assertEqual(FontStyle.NORMAL.value, "normal")
+        self.assertEqual(FontStyle.ITALIC.value, "italic")
        
-        self.assertEqual(style['font_size'], 12)
-        self.assertEqual(style['font_weight'], FontWeight.NORMAL)
-        self.assertEqual(style['font_style'], FontStyle.NORMAL)
-        self.assertEqual(style['decoration'], TextDecoration.NONE)
-        self.assertEqual(style['color'], (0, 0, 0))
-        self.assertIsNone(style['background'])
-        self.assertEqual(style['language'], 'en_US')
+        # Test that all expected values exist
+        styles = [FontStyle.NORMAL, FontStyle.ITALIC]
+        self.assertEqual(len(styles), 2)
    
-    def test_style_stack_operations(self):
-        """Test push and pop operations on style stack."""
-        # Initial state
-        initial_style = self.style_manager.get_current_style()
+    def test_text_decoration_enum(self):
+        """Test TextDecoration enum values."""
+        self.assertEqual(TextDecoration.NONE.value, "none")
+        self.assertEqual(TextDecoration.UNDERLINE.value, "underline")
+        self.assertEqual(TextDecoration.STRIKETHROUGH.value, "strikethrough")
        
-        # Push a new style
-        new_style = {'font_size': 16, 'font_weight': FontWeight.BOLD}
-        self.style_manager.push_style(new_style)
-        
-        current_style = self.style_manager.get_current_style()
-        self.assertEqual(current_style['font_size'], 16)
-        self.assertEqual(current_style['font_weight'], FontWeight.BOLD)
-        self.assertEqual(current_style['color'], (0, 0, 0))  # Unchanged
-        
-        # Pop the style
-        self.style_manager.pop_style()
-        restored_style = self.style_manager.get_current_style()
-        self.assertEqual(restored_style, initial_style)
+        # Test that all expected values exist
+        decorations = [TextDecoration.NONE, TextDecoration.UNDERLINE, TextDecoration.STRIKETHROUGH]
+        self.assertEqual(len(decorations), 3)
    
-    def test_tag_styles(self):
-        """Test default styles for HTML tags."""
-        h1_style = self.style_manager.get_tag_style('h1')
-        self.assertEqual(h1_style['font_size'], 24)
-        self.assertEqual(h1_style['font_weight'], FontWeight.BOLD)
-        
-        h6_style = self.style_manager.get_tag_style('h6')
-        self.assertEqual(h6_style['font_size'], 12)
-        self.assertEqual(h6_style['font_weight'], FontWeight.BOLD)
-        
-        em_style = self.style_manager.get_tag_style('em')
-        self.assertEqual(em_style['font_style'], FontStyle.ITALIC)
-        
-        unknown_style = self.style_manager.get_tag_style('unknown')
-        self.assertEqual(unknown_style, {})
+    def test_alignment_enum(self):
+        """Test Alignment enum values."""
+        self.assertEqual(Alignment.LEFT.value, 1)
+        self.assertEqual(Alignment.CENTER.value, 2)
+        self.assertEqual(Alignment.RIGHT.value, 3)
+        self.assertEqual(Alignment.TOP.value, 4)
+        self.assertEqual(Alignment.BOTTOM.value, 5)
+        self.assertEqual(Alignment.JUSTIFY.value, 6)
    
-    def test_inline_style_parsing(self):
-        """Test parsing of inline CSS styles."""
-        # Test font-size
-        style = self.style_manager.parse_inline_style('font-size: 18px')
-        self.assertEqual(style['font_size'], 18)
+    def test_font_initialization_defaults(self):
+        """Test Font initialization with default values."""
+        font = Font()
        
-        style = self.style_manager.parse_inline_style('font-size: 14pt')
-        self.assertEqual(style['font_size'], 14)
-        
-        # Test font-weight
-        style = self.style_manager.parse_inline_style('font-weight: bold')
-        self.assertEqual(style['font_weight'], FontWeight.BOLD)
-        
-        # Test font-style
-        style = self.style_manager.parse_inline_style('font-style: italic')
-        self.assertEqual(style['font_style'], FontStyle.ITALIC)
-        
-        # Test text-decoration
-        style = self.style_manager.parse_inline_style('text-decoration: underline')
-        self.assertEqual(style['decoration'], TextDecoration.UNDERLINE)
-        
-        # Test multiple properties
-        style = self.style_manager.parse_inline_style(
-            'font-size: 20px; font-weight: bold; color: red'
+        self.assertIsNone(font._font_path)
+        self.assertEqual(font.font_size, 12)
+        self.assertEqual(font.colour, (0, 0, 0))
+        self.assertEqual(font.color, (0, 0, 0))  # Alias
+        self.assertEqual(font.weight, FontWeight.NORMAL)
+        self.assertEqual(font.style, FontStyle.NORMAL)
+        self.assertEqual(font.decoration, TextDecoration.NONE)
+        self.assertEqual(font.background, (255, 255, 255, 0))  # Transparent
+        self.assertEqual(font.language, "en_EN")
+    
+    def test_font_initialization_custom(self):
+        """Test Font initialization with custom values."""
+        font = Font(
+            font_path="/path/to/font.ttf",
+            font_size=16,
+            colour=(255, 0, 0),
+            weight=FontWeight.BOLD,
+            style=FontStyle.ITALIC,
+            decoration=TextDecoration.UNDERLINE,
+            background=(255, 255, 0, 255),
+            langauge="fr_FR"
        )
-        self.assertEqual(style['font_size'], 20)
-        self.assertEqual(style['font_weight'], FontWeight.BOLD)
-        self.assertEqual(style['color'], (255, 0, 0))
-    
-    def test_color_parsing(self):
-        """Test CSS color parsing."""
-        # Named colors
-        self.assertEqual(self.style_manager.parse_color('red'), (255, 0, 0))
-        self.assertEqual(self.style_manager.parse_color('blue'), (0, 0, 255))
-        self.assertEqual(self.style_manager.parse_color('white'), (255, 255, 255))
-        self.assertEqual(self.style_manager.parse_color('gray'), (128, 128, 128))
-        self.assertEqual(self.style_manager.parse_color('grey'), (128, 128, 128))
-        
-        # Hex colors
-        self.assertEqual(self.style_manager.parse_color('#ff0000'), (255, 0, 0))
-        self.assertEqual(self.style_manager.parse_color('#00ff00'), (0, 255, 0))
-        self.assertEqual(self.style_manager.parse_color('#f00'), (255, 0, 0))
-        self.assertEqual(self.style_manager.parse_color('#0f0'), (0, 255, 0))
-        
-        # RGB colors
-        self.assertEqual(self.style_manager.parse_color('rgb(255, 0, 0)'), (255, 0, 0))
-        self.assertEqual(self.style_manager.parse_color('rgb(128, 128, 128)'), (128, 128, 128))
-        self.assertEqual(self.style_manager.parse_color('rgb( 255 , 255 , 255 )'), (255, 255, 255))
-        
-        # RGBA colors (alpha ignored)
-        self.assertEqual(self.style_manager.parse_color('rgba(255, 0, 0, 0.5)'), (255, 0, 0))
-        
-        # Invalid colors
-        self.assertIsNone(self.style_manager.parse_color('invalid'))
-        self.assertIsNone(self.style_manager.parse_color('#gg0000'))
-        self.assertIsNone(self.style_manager.parse_color('rgb(300, 0, 0)'))  # Invalid values return None
-    
-    def test_color_clamping(self):
-        """Test that RGB values outside valid range return None."""
-        # Values outside 0-255 range should return None
-        color = self.style_manager.parse_color('rgb(300, -10, 128)')
-        self.assertIsNone(color)  # Invalid values return None
-    
-    def test_apply_style_to_element(self):
-        """Test combining tag styles with inline styles."""
-        # Test h1 with inline style
-        attrs = {'style': 'color: blue; font-size: 30px'}
-        combined = self.style_manager.apply_style_to_element('h1', attrs)
-        
-        # Should have h1 defaults plus inline overrides
-        self.assertEqual(combined['font_size'], 30)  # Overridden
-        self.assertEqual(combined['font_weight'], FontWeight.BOLD)  # From h1
-        self.assertEqual(combined['color'], (0, 0, 255))  # Inline
-        
-        # Test without inline styles
-        combined = self.style_manager.apply_style_to_element('strong', {})
-        self.assertEqual(combined['font_weight'], FontWeight.BOLD)
-    
-    def test_reset(self):
-        """Test resetting the style manager."""
-        # Change the state
-        self.style_manager.push_style({'font_size': 20})
-        self.style_manager.push_style({'color': (255, 0, 0)})
-        
-        # Reset
-        self.style_manager.reset()
-        
-        # Should be back to initial state
-        style = self.style_manager.get_current_style()
-        self.assertEqual(style['font_size'], 12)
-        self.assertEqual(style['color'], (0, 0, 0))
-        self.assertEqual(len(self.style_manager._style_stack), 0)
-    
-    def test_font_creation(self):
-        """Test Font object creation from current style."""
-        # Set some specific styles
-        self.style_manager.push_style({
-            'font_size': 16,
-            'font_weight': FontWeight.BOLD,
-            'font_style': FontStyle.ITALIC,
-            'decoration': TextDecoration.UNDERLINE,
-            'color': (255, 0, 0),
-            'background': (255, 255, 0, 255)
-        })
-        
-        font = self.style_manager.create_font()
        
+        self.assertEqual(font._font_path, "/path/to/font.ttf")
        self.assertEqual(font.font_size, 16)
+        self.assertEqual(font.colour, (255, 0, 0))
        self.assertEqual(font.weight, FontWeight.BOLD)
        self.assertEqual(font.style, FontStyle.ITALIC)
        self.assertEqual(font.decoration, TextDecoration.UNDERLINE)
-        self.assertEqual(font.colour, (255, 0, 0))
        self.assertEqual(font.background, (255, 255, 0, 255))
+        self.assertEqual(font.language, "fr_FR")
+    
+    def test_font_with_methods(self):
+        """Test Font immutable modification methods."""
+        original_font = Font(
+            font_size=12,
+            colour=(0, 0, 0),
+            weight=FontWeight.NORMAL,
+            style=FontStyle.NORMAL,
+            decoration=TextDecoration.NONE
+        )
+        
+        # Test with_size
+        size_font = original_font.with_size(16)
+        self.assertEqual(size_font.font_size, 16)
+        self.assertEqual(original_font.font_size, 12)  # Original unchanged
+        self.assertEqual(size_font.colour, (0, 0, 0))  # Other properties preserved
+        
+        # Test with_colour
+        color_font = original_font.with_colour((255, 0, 0))
+        self.assertEqual(color_font.colour, (255, 0, 0))
+        self.assertEqual(original_font.colour, (0, 0, 0))  # Original unchanged
+        self.assertEqual(color_font.font_size, 12)  # Other properties preserved
+        
+        # Test with_weight
+        weight_font = original_font.with_weight(FontWeight.BOLD)
+        self.assertEqual(weight_font.weight, FontWeight.BOLD)
+        self.assertEqual(original_font.weight, FontWeight.NORMAL)  # Original unchanged
+        
+        # Test with_style
+        style_font = original_font.with_style(FontStyle.ITALIC)
+        self.assertEqual(style_font.style, FontStyle.ITALIC)
+        self.assertEqual(original_font.style, FontStyle.NORMAL)  # Original unchanged
+        
+        # Test with_decoration
+        decoration_font = original_font.with_decoration(TextDecoration.UNDERLINE)
+        self.assertEqual(decoration_font.decoration, TextDecoration.UNDERLINE)
+        self.assertEqual(original_font.decoration, TextDecoration.NONE)  # Original unchanged
+    
+    def test_font_property_access(self):
+        """Test Font property access methods."""
+        font = Font(
+            font_size=20,
+            colour=(128, 128, 128),
+            weight=FontWeight.BOLD,
+            style=FontStyle.ITALIC,
+            decoration=TextDecoration.STRIKETHROUGH
+        )
+        
+        # Test all property getters
+        self.assertEqual(font.font_size, 20)
+        self.assertEqual(font.colour, (128, 128, 128))
+        self.assertEqual(font.color, (128, 128, 128))  # Alias
+        self.assertEqual(font.weight, FontWeight.BOLD)
+        self.assertEqual(font.style, FontStyle.ITALIC)
+        self.assertEqual(font.decoration, TextDecoration.STRIKETHROUGH)
+        
+        # Test that font object is accessible
+        self.assertIsNotNone(font.font)
+    
+    def test_font_immutability(self):
+        """Test that Font objects behave immutably."""
+        font1 = Font(font_size=12, colour=(0, 0, 0))
+        font2 = font1.with_size(16)
+        font3 = font2.with_colour((255, 0, 0))
+        
+        # Each should be different objects
+        self.assertIsNot(font1, font2)
+        self.assertIsNot(font2, font3)
+        self.assertIsNot(font1, font3)
+        
+        # Original properties should be unchanged
+        self.assertEqual(font1.font_size, 12)
+        self.assertEqual(font1.colour, (0, 0, 0))
+        
+        self.assertEqual(font2.font_size, 16)
+        self.assertEqual(font2.colour, (0, 0, 0))
+        
+        self.assertEqual(font3.font_size, 16)
+        self.assertEqual(font3.colour, (255, 0, 0))
+    
+    def test_background_handling(self):
+        """Test background color handling."""
+        # Test default transparent background
+        font1 = Font()
+        self.assertEqual(font1.background, (255, 255, 255, 0))
+        
+        # Test explicit background
+        font2 = Font(background=(255, 0, 0, 128))
+        self.assertEqual(font2.background, (255, 0, 0, 128))
+        
+        # Test None background becomes transparent
+        font3 = Font(background=None)
+        self.assertEqual(font3.background, (255, 255, 255, 0))


 if __name__ == '__main__':
--- a/tests/test_html_text.py
+++ b/tests/test_html_text.py
@ -1,247 +0,0 @@
-"""
-Unit tests for HTML text processing.
-
-Tests the HTMLTextProcessor class for text buffering, entity handling, and word creation.
-"""
-
-import unittest
-from unittest.mock import Mock, MagicMock
-from pyWebLayout.io.readers.html_text import HTMLTextProcessor
-from pyWebLayout.io.readers.html_style import HTMLStyleManager
-from pyWebLayout.abstract.block import Paragraph
-from pyWebLayout.abstract.inline import Word
-
-
-class TestHTMLTextProcessor(unittest.TestCase):
-    """Test cases for HTMLTextProcessor."""
-    
-    def setUp(self):
-        """Set up test fixtures."""
-        self.style_manager = HTMLStyleManager()
-        self.text_processor = HTMLTextProcessor(self.style_manager)
-        
-        # Create a mock paragraph
-        self.mock_paragraph = Mock(spec=Paragraph)
-        self.mock_paragraph.add_word = Mock()
-    
-    def test_initialization(self):
-        """Test proper initialization of text processor."""
-        self.assertEqual(self.text_processor._text_buffer, "")
-        self.assertIsNone(self.text_processor._current_paragraph)
-        self.assertEqual(self.text_processor._style_manager, self.style_manager)
-    
-    def test_add_text(self):
-        """Test adding text to buffer."""
-        self.text_processor.add_text("Hello")
-        self.assertEqual(self.text_processor.get_buffer_content(), "Hello")
-        
-        self.text_processor.add_text(" World")
-        self.assertEqual(self.text_processor.get_buffer_content(), "Hello World")
-    
-    def test_entity_references(self):
-        """Test HTML entity reference handling."""
-        test_cases = [
-            ('lt', '<'),
-            ('gt', '>'),
-            ('amp', '&'),
-            ('quot', '"'),
-            ('apos', "'"),
-            ('nbsp', ' '),
-            ('copy', '©'),
-            ('reg', '®'),
-            ('trade', '™'),
-            ('mdash', '—'),
-            ('ndash', '–'),
-            ('hellip', '…'),
-            ('euro', '€'),
-            ('unknown', '&unknown;')  # Unknown entities should be preserved
-        ]
-        
-        for entity, expected in test_cases:
-            with self.subTest(entity=entity):
-                self.text_processor.clear_buffer()
-                self.text_processor.add_entity_reference(entity)
-                self.assertEqual(self.text_processor.get_buffer_content(), expected)
-    
-    def test_character_references(self):
-        """Test character reference handling."""
-        # Decimal character references
-        self.text_processor.clear_buffer()
-        self.text_processor.add_character_reference('65')  # 'A'
-        self.assertEqual(self.text_processor.get_buffer_content(), 'A')
-        
-        # Hexadecimal character references
-        self.text_processor.clear_buffer()
-        self.text_processor.add_character_reference('x41')  # 'A'
-        self.assertEqual(self.text_processor.get_buffer_content(), 'A')
-        
-        # Unicode character
-        self.text_processor.clear_buffer()
-        self.text_processor.add_character_reference('8364')  # Euro symbol
-        self.assertEqual(self.text_processor.get_buffer_content(), '€')
-        
-        # Invalid character reference
-        self.text_processor.clear_buffer()
-        self.text_processor.add_character_reference('invalid')
-        self.assertEqual(self.text_processor.get_buffer_content(), '&#invalid;')
-        
-        # Out of range character
-        self.text_processor.clear_buffer()
-        self.text_processor.add_character_reference('99999999999')
-        self.assertTrue(self.text_processor.get_buffer_content().startswith('&#'))
-    
-    def test_buffer_operations(self):
-        """Test buffer state operations."""
-        # Test has_pending_text
-        self.assertFalse(self.text_processor.has_pending_text())
-        
-        self.text_processor.add_text("Some text")
-        self.assertTrue(self.text_processor.has_pending_text())
-        
-        # Test clear_buffer
-        self.text_processor.clear_buffer()
-        self.assertFalse(self.text_processor.has_pending_text())
-        self.assertEqual(self.text_processor.get_buffer_content(), "")
-        
-        # Test with whitespace only
-        self.text_processor.add_text("   \n\t  ")
-        self.assertFalse(self.text_processor.has_pending_text())  # Should ignore whitespace
-    
-    def test_paragraph_management(self):
-        """Test current paragraph setting."""
-        # Initially no paragraph
-        self.assertIsNone(self.text_processor._current_paragraph)
-        
-        # Set paragraph
-        self.text_processor.set_current_paragraph(self.mock_paragraph)
-        self.assertEqual(self.text_processor._current_paragraph, self.mock_paragraph)
-        
-        # Clear paragraph
-        self.text_processor.set_current_paragraph(None)
-        self.assertIsNone(self.text_processor._current_paragraph)
-    
-    def test_flush_text_with_paragraph(self):
-        """Test flushing text when paragraph is set."""
-        self.text_processor.set_current_paragraph(self.mock_paragraph)
-        self.text_processor.add_text("Hello world test")
-        
-        # Mock the style manager to return a specific font
-        mock_font = Mock()
-        self.style_manager.create_font = Mock(return_value=mock_font)
-        
-        result = self.text_processor.flush_text()
-        
-        # Should return True (text was flushed)
-        self.assertTrue(result)
-        
-        # Should have created words
-        self.assertEqual(self.mock_paragraph.add_word.call_count, 3)  # "Hello", "world", "test"
-        
-        # Verify the words were created with correct text
-        calls = self.mock_paragraph.add_word.call_args_list
-        word_texts = [call[0][0].text for call in calls]
-        self.assertEqual(word_texts, ["Hello", "world", "test"])
-        
-        # Buffer should be empty after flush
-        self.assertEqual(self.text_processor.get_buffer_content(), "")
-    
-    def test_flush_text_without_paragraph(self):
-        """Test flushing text when no paragraph is set."""
-        self.text_processor.add_text("Hello world")
-        
-        result = self.text_processor.flush_text()
-        
-        # Should return False (no paragraph to flush to)
-        self.assertFalse(result)
-        
-        # Buffer should be cleared anyway
-        self.assertEqual(self.text_processor.get_buffer_content(), "")
-    
-    def test_flush_empty_buffer(self):
-        """Test flushing when buffer is empty."""
-        self.text_processor.set_current_paragraph(self.mock_paragraph)
-        
-        result = self.text_processor.flush_text()
-        
-        # Should return False (nothing to flush)
-        self.assertFalse(result)
-        
-        # No words should be added
-        self.mock_paragraph.add_word.assert_not_called()
-    
-    def test_flush_whitespace_only(self):
-        """Test flushing when buffer contains only whitespace."""
-        self.text_processor.set_current_paragraph(self.mock_paragraph)
-        self.text_processor.add_text("   \n\t  ")
-        
-        result = self.text_processor.flush_text()
-        
-        # Should return False (no meaningful content)
-        self.assertFalse(result)
-        
-        # No words should be added
-        self.mock_paragraph.add_word.assert_not_called()
-    
-    def test_word_creation_with_styling(self):
-        """Test that words are created with proper styling."""
-        self.text_processor.set_current_paragraph(self.mock_paragraph)
-        self.text_processor.add_text("styled text")
-        
-        # Set up style manager to return specific font
-        mock_font = Mock()
-        mock_font.font_size = 16
-        mock_font.weight = "bold"
-        self.style_manager.create_font = Mock(return_value=mock_font)
-        
-        self.text_processor.flush_text()
-        
-        # Verify font was created
-        self.style_manager.create_font.assert_called()
-        
-        # Verify words were created with the font
-        calls = self.mock_paragraph.add_word.call_args_list
-        for call in calls:
-            word = call[0][0]
-            self.assertEqual(word.style, mock_font)
-    
-    def test_reset(self):
-        """Test resetting the text processor."""
-        # Set up some state
-        self.text_processor.set_current_paragraph(self.mock_paragraph)
-        self.text_processor.add_text("Some text")
-        
-        # Reset
-        self.text_processor.reset()
-        
-        # Should be back to initial state
-        self.assertEqual(self.text_processor._text_buffer, "")
-        self.assertIsNone(self.text_processor._current_paragraph)
-    
-    def test_complex_text_processing(self):
-        """Test processing text with mixed content."""
-        self.text_processor.set_current_paragraph(self.mock_paragraph)
-        
-        # Mock font creation
-        mock_font = Mock()
-        self.style_manager.create_font = Mock(return_value=mock_font)
-        
-        # Add mixed content
-        self.text_processor.add_text("Hello ")
-        self.text_processor.add_entity_reference('amp')
-        self.text_processor.add_text(" world")
-        self.text_processor.add_character_reference('33')  # '!'
-        
-        # Should have "Hello & world!"
-        expected_content = "Hello & world!"
-        self.assertEqual(self.text_processor.get_buffer_content(), expected_content)
-        
-        # Flush and verify words
-        self.text_processor.flush_text()
-        
-        calls = self.mock_paragraph.add_word.call_args_list
-        word_texts = [call[0][0].text for call in calls]
-        self.assertEqual(word_texts, ["Hello", "&", "world!"])
-
-
-if __name__ == '__main__':
-    unittest.main()