fix flake8

2025-11-04 22:41:05 +01:00 · 2025-11-04 22:41:05 +01:00 · 55fdcbcb6d
commit 55fdcbcb6d
parent 37505d3dcc
4 changed files with 8 additions and 336 deletions
--- a/examples/html_multipage_demo.py
+++ b/examples/html_multipage_demo.py
@ -1,326 +0,0 @@
 #!/usr/bin/env python3
 """
 HTML Multi-Page Rendering Demo
 This example demonstrates how to:
 1. Parse HTML content using pyWebLayout's HTML extraction system
 2. Layout the parsed content across multiple pages using the ereader layout system
 3. Render each page as an image file
 The demo shows the complete pipeline from HTML to multi-page layout.
 """
 import os
 import sys
 from pathlib import Path
 from typing import List, Tuple
 from PIL import Image, ImageDraw
 # Add pyWebLayout to path
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from pyWebLayout.io.readers.html_extraction import parse_html_string
 from pyWebLayout.layout.ereader_layout import BidirectionalLayouter, RenderingPosition
 from pyWebLayout.concrete.page import Page
 from pyWebLayout.style.page_style import PageStyle
 from pyWebLayout.style import Font
 from pyWebLayout.abstract.block import Block
 def create_sample_html() -> str:
    """Create a sample HTML document with various elements for testing."""
    return """
    <!DOCTYPE html>
    <html>
    <head>
        <title>Sample Document</title>
    </head>
    <body>
        <h1>Chapter 1: Introduction to Multi-Page Layout</h1>
        <p>This is the first paragraph of our sample document. It demonstrates how HTML content 
        can be parsed and then laid out across multiple pages using the pyWebLayout system. 
        The system handles various HTML elements including headings, paragraphs, lists, and more.</p>
        <p>Here's another paragraph with <strong>bold text</strong> and <em>italic text</em> 
        to show how inline formatting is preserved during the conversion process. The layout 
        engine will automatically handle word wrapping and page breaks as needed.</p>
        <h2>Section 1.1: Features</h2>
        <p>The multi-page layout system includes several key features:</p>
        <ul>
            <li>Automatic page breaking when content exceeds page boundaries</li>
            <li>Font scaling support for different reading preferences</li>
            <li>Position tracking for bookmarks and navigation</li>
            <li>Support for various HTML elements and styling</li>
        </ul>
        <p>Each of these features works together to provide a seamless reading experience 
        that adapts to different page sizes and user preferences.</p>
        <h2>Section 1.2: Technical Implementation</h2>
        <p>The implementation uses a sophisticated layout engine that processes abstract 
        document elements and renders them onto concrete pages. This separation allows 
        for flexible styling and layout while maintaining the semantic structure of 
        the original content.</p>
        <blockquote>
            "The best way to understand a complex system is to see it in action with 
            real examples and practical demonstrations."
        </blockquote>
        <p>This quote illustrates the philosophy behind this demo - showing how the 
        various components work together in practice.</p>
        <h1>Chapter 2: Advanced Layout Concepts</h1>
        <p>Moving into more advanced territory, we can explore how the layout system 
        handles complex scenarios such as page breaks within paragraphs, font scaling 
        effects on layout, and position tracking across multiple pages.</p>
        <p>The system maintains precise position information that allows for features 
        like bookmarking, search result highlighting, and seamless navigation between 
        different views of the same content.</p>
        <h2>Section 2.1: Position Tracking</h2>
        <p>Position tracking is implemented using a hierarchical system that can 
        reference any point in the document structure. This includes not just 
        paragraph and word positions, but also positions within tables, lists, 
        and other complex structures.</p>
        <p>The position system is designed to be stable across different rendering 
        parameters, so a bookmark created with one font size will still be valid 
        when the user changes to a different font size.</p>
        <h2>Section 2.2: Multi-Page Rendering</h2>
        <p>The multi-page rendering system can generate pages both forward and 
        backward from any given position. This bidirectional capability is 
        essential for smooth navigation in ereader applications.</p>
        <p>Each page is rendered independently, which allows for efficient 
        caching and parallel processing of multiple pages when needed.</p>
        <p>This concludes our sample document. The layout system will automatically 
        determine how many pages are needed to display all this content based on 
        the page size and font settings used during rendering.</p>
    </body>
    </html>
    """
 class HTMLMultiPageRenderer:
    """
    Renderer that converts HTML to multiple page images.
    """
    def __init__(self, page_size: Tuple[int, int] = (600, 800), font_scale: float = 1.0):
        """
        Initialize the renderer.
        Args:
            page_size: Size of each page in pixels (width, height)
            font_scale: Font scaling factor
        """
        self.page_size = page_size
        self.font_scale = font_scale
        self.page_style = PageStyle()
    def parse_html_to_blocks(self, html_content: str) -> List[Block]:
        """
        Parse HTML content into abstract blocks.
        Args:
            html_content: HTML string to parse
        Returns:
            List of abstract Block objects
        """
        base_font = Font(font_size=14)  # Base font for the document
        blocks = parse_html_string(html_content, base_font=base_font)
        return blocks
    def render_pages(self, blocks: List[Block], max_pages: int = 20) -> List[Image.Image]:
        """
        Render blocks into multiple page images.
        Args:
            blocks: List of abstract blocks to render
            max_pages: Maximum number of pages to render (safety limit)
        Returns:
            List of PIL Image objects, one per page
        """
        if not blocks:
            return []
        # Create the bidirectional layouter
        layouter = BidirectionalLayouter(blocks, self.page_style, self.page_size)
        pages = []
        current_position = RenderingPosition()  # Start at beginning
        page_count = 0
        while page_count < max_pages:
            try:
                # Render the next page
                page, next_position = layouter.render_page_forward(current_position, self.font_scale)
                # Convert page to image
                page_image = self._page_to_image(page)
                pages.append(page_image)
                page_count += 1
                # Check if we've reached the end
                if self._is_end_position(next_position, current_position, blocks):
                    break
                current_position = next_position
            except Exception as e:
                print(f"Error rendering page {page_count + 1}: {e}")
                break
        return pages
    def _page_to_image(self, page: Page) -> Image.Image:
        """
        Convert a Page object to a PIL Image.
        Args:
            page: Page object to convert
        Returns:
            PIL Image object
        """
        # Create a white background image
        image = Image.new('RGB', self.page_size, 'white')
        draw = ImageDraw.Draw(image)
        # Draw page border
        border_color = (200, 200, 200)
        draw.rectangle([0, 0, self.page_size[0]-1, self.page_size[1]-1], outline=border_color)
        # The page object should have already been rendered with its draw context
        # For this demo, we'll create a simple representation
        # Add page number at bottom
        try:
            from PIL import ImageFont
            font = ImageFont.load_default()
        except:
            font = None
        page_num_text = f"Page {len(pages) + 1}" if 'pages' in locals() else "Page"
        text_bbox = draw.textbbox((0, 0), page_num_text, font=font)
        text_width = text_bbox[2] - text_bbox[0]
        text_x = (self.page_size[0] - text_width) // 2
        text_y = self.page_size[1] - 30
        draw.text((text_x, text_y), page_num_text, fill='black', font=font)
        return image
    def _is_end_position(self, current_pos: RenderingPosition, previous_pos: RenderingPosition, blocks: List[Block]) -> bool:
        """
        Check if we've reached the end of the document.
        Args:
            current_pos: Current rendering position
            previous_pos: Previous rendering position
            blocks: List of all blocks in document
        Returns:
            True if at end of document
        """
        # If position hasn't advanced, we're likely at the end
        if (current_pos.block_index == previous_pos.block_index and 
            current_pos.word_index == previous_pos.word_index):
            return True
        # If we've processed all blocks
        if current_pos.block_index >= len(blocks):
            return True
        return False
    def save_pages(self, pages: List[Image.Image], output_dir: str = "output/html_multipage"):
        """
        Save rendered pages as image files.
        Args:
            pages: List of page images
            output_dir: Directory to save images
        """
        # Create output directory
        os.makedirs(output_dir, exist_ok=True)
        for i, page_image in enumerate(pages, 1):
            filename = f"page_{i:03d}.png"
            filepath = os.path.join(output_dir, filename)
            page_image.save(filepath)
            print(f"Saved {filepath}")
        print(f"\nRendered {len(pages)} pages to {output_dir}/")
 def main():
    """Main demo function."""
    print("HTML Multi-Page Rendering Demo")
    print("=" * 40)
    # Create sample HTML content
    print("1. Creating sample HTML content...")
    html_content = create_sample_html()
    print(f"   Created HTML document ({len(html_content)} characters)")
    # Initialize renderer
    print("\n2. Initializing renderer...")
    renderer = HTMLMultiPageRenderer(page_size=(600, 800), font_scale=1.0)
    print("   Renderer initialized")
    # Parse HTML to blocks
    print("\n3. Parsing HTML to abstract blocks...")
    blocks = renderer.parse_html_to_blocks(html_content)
    print(f"   Parsed {len(blocks)} blocks")
    # Print block summary
    block_types = {}
    for block in blocks:
        block_type = type(block).__name__
        block_types[block_type] = block_types.get(block_type, 0) + 1
    print("   Block types found:")
    for block_type, count in block_types.items():
        print(f"     - {block_type}: {count}")
    # Render pages
    print("\n4. Rendering pages...")
    pages = renderer.render_pages(blocks, max_pages=10)
    print(f"   Rendered {len(pages)} pages")
    # Save pages
    print("\n5. Saving pages...")
    renderer.save_pages(pages)
    print("\n✓ Demo completed successfully!")
    print("\nTo view the results:")
    print("  - Check the output/html_multipage/ directory")
    print("  - Open the PNG files to see each rendered page")
    # Show some statistics
    print(f"\nStatistics:")
    print(f"  - Original HTML: {len(html_content)} characters")
    print(f"  - Abstract blocks: {len(blocks)}")
    print(f"  - Rendered pages: {len(pages)}")
    print(f"  - Page size: {renderer.page_size[0]}x{renderer.page_size[1]} pixels")
    print(f"  - Font scale: {renderer.font_scale}x")
 if __name__ == "__main__":
    main()
--- a/pyWebLayout/abstract/inline.py
+++ b/pyWebLayout/abstract/inline.py
@ -2,9 +2,12 @@ from __future__ import annotations
 from pyWebLayout.core.base import Queriable
 from pyWebLayout.style import Font
 from pyWebLayout.style.abstract_style import AbstractStyle
-from typing import Tuple, Union, List, Optional, Dict, Any
+from typing import Tuple, Union, List, Optional, Dict, Any, Callable
 import pyphen
 # Import LinkType for type hints (imported at module level to avoid F821 linting error)
 from pyWebLayout.abstract.functional import LinkType
 class Word:
@ -279,7 +282,7 @@ class LinkedWord(Word):
    """
    def __init__(self, text: str, style: Union[Font, 'AbstractStyle'],
-                 location: str, link_type: 'LinkType' = None,
+                 location: str, link_type: Optional['LinkType'] = None,
                 callback: Optional[Callable] = None,
                 background=None, previous: Optional[Word] = None,
                 params: Optional[Dict[str, Any]] = None,
@ -302,7 +305,6 @@ class LinkedWord(Word):
        super().__init__(text, style, background, previous)
        # Store link properties
        from pyWebLayout.abstract.functional import LinkType
        self._location = location
        self._link_type = link_type or LinkType.EXTERNAL
        self._callback = callback
@ -344,8 +346,6 @@ class LinkedWord(Word):
        Returns:
            The result of the link execution
        """
        from pyWebLayout.abstract.functional import LinkType
        # Add word text to context
        full_context = {**self._params, 'text': self._text}
        if context:
--- a/pyWebLayout/concrete/viewport.py
+++ b/pyWebLayout/concrete/viewport.py
@ -387,10 +387,10 @@ class Viewport(Box, Layoutable):
        }
-class ScrollablePageContent(Container):
+class ScrollablePageContent(Box):
    """
    A specialized container for page content that's designed to work with viewports.
-    This extends the regular Page functionality but allows for much larger content areas.
+    This extends the regular Box functionality but allows for much larger content areas.
    """
    def __init__(self, content_width: int = 800, initial_height: int = 1000, 
--- a/pyWebLayout/layout/document_layouter.py
+++ b/pyWebLayout/layout/document_layouter.py
@ -9,6 +9,7 @@ from pyWebLayout.abstract import Paragraph, Word, Link
 from pyWebLayout.abstract.block import Image as AbstractImage
 from pyWebLayout.abstract.inline import LinkedWord
 from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver
 from pyWebLayout.style import Font, Alignment
 def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None, alignment_override: Optional['Alignment'] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
    """
@ -40,7 +41,6 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
    # paragraph.style is already a Font object (concrete), not AbstractStyle
    # We need to get word spacing constraints from the Font's abstract style if available
    # For now, use reasonable defaults based on font size
    from pyWebLayout.style import Font, Alignment
    if isinstance(paragraph.style, Font):
        # paragraph.style is already a Font (concrete style)
@ -228,8 +228,6 @@ def image_layouter(image: AbstractImage, page: Page, max_width: Optional[int] =
    Returns:
        bool: True if image was successfully laid out, False if page ran out of space
    """
    from pyWebLayout.style import Alignment
    # Use page available width if max_width not specified
    if max_width is None:
        max_width = page.available_width