fix flake8

2025-11-04 22:41:05 +01:00 · 2025-11-04 22:41:05 +01:00 · 55fdcbcb6d
commit 55fdcbcb6d
parent 37505d3dcc
4 changed files with 8 additions and 336 deletions
--- a/examples/html_multipage_demo.py
+++ b/examples/html_multipage_demo.py
@ -1,326 +0,0 @@
-#!/usr/bin/env python3
-"""
-HTML Multi-Page Rendering Demo
-
-This example demonstrates how to:
-1. Parse HTML content using pyWebLayout's HTML extraction system
-2. Layout the parsed content across multiple pages using the ereader layout system
-3. Render each page as an image file
-
-The demo shows the complete pipeline from HTML to multi-page layout.
-"""
-
-import os
-import sys
-from pathlib import Path
-from typing import List, Tuple
-from PIL import Image, ImageDraw
-
-# Add pyWebLayout to path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-
-from pyWebLayout.io.readers.html_extraction import parse_html_string
-from pyWebLayout.layout.ereader_layout import BidirectionalLayouter, RenderingPosition
-from pyWebLayout.concrete.page import Page
-from pyWebLayout.style.page_style import PageStyle
-from pyWebLayout.style import Font
-from pyWebLayout.abstract.block import Block
-
-
-def create_sample_html() -> str:
-    """Create a sample HTML document with various elements for testing."""
-    return """
-    <!DOCTYPE html>
-    <html>
-    <head>
-        <title>Sample Document</title>
-    </head>
-    <body>
-        <h1>Chapter 1: Introduction to Multi-Page Layout</h1>
-        
-        <p>This is the first paragraph of our sample document. It demonstrates how HTML content 
-        can be parsed and then laid out across multiple pages using the pyWebLayout system. 
-        The system handles various HTML elements including headings, paragraphs, lists, and more.</p>
-        
-        <p>Here's another paragraph with <strong>bold text</strong> and <em>italic text</em> 
-        to show how inline formatting is preserved during the conversion process. The layout 
-        engine will automatically handle word wrapping and page breaks as needed.</p>
-        
-        <h2>Section 1.1: Features</h2>
-        
-        <p>The multi-page layout system includes several key features:</p>
-        
-        <ul>
-            <li>Automatic page breaking when content exceeds page boundaries</li>
-            <li>Font scaling support for different reading preferences</li>
-            <li>Position tracking for bookmarks and navigation</li>
-            <li>Support for various HTML elements and styling</li>
-        </ul>
-        
-        <p>Each of these features works together to provide a seamless reading experience 
-        that adapts to different page sizes and user preferences.</p>
-        
-        <h2>Section 1.2: Technical Implementation</h2>
-        
-        <p>The implementation uses a sophisticated layout engine that processes abstract 
-        document elements and renders them onto concrete pages. This separation allows 
-        for flexible styling and layout while maintaining the semantic structure of 
-        the original content.</p>
-        
-        <blockquote>
-            "The best way to understand a complex system is to see it in action with 
-            real examples and practical demonstrations."
-        </blockquote>
-        
-        <p>This quote illustrates the philosophy behind this demo - showing how the 
-        various components work together in practice.</p>
-        
-        <h1>Chapter 2: Advanced Layout Concepts</h1>
-        
-        <p>Moving into more advanced territory, we can explore how the layout system 
-        handles complex scenarios such as page breaks within paragraphs, font scaling 
-        effects on layout, and position tracking across multiple pages.</p>
-        
-        <p>The system maintains precise position information that allows for features 
-        like bookmarking, search result highlighting, and seamless navigation between 
-        different views of the same content.</p>
-        
-        <h2>Section 2.1: Position Tracking</h2>
-        
-        <p>Position tracking is implemented using a hierarchical system that can 
-        reference any point in the document structure. This includes not just 
-        paragraph and word positions, but also positions within tables, lists, 
-        and other complex structures.</p>
-        
-        <p>The position system is designed to be stable across different rendering 
-        parameters, so a bookmark created with one font size will still be valid 
-        when the user changes to a different font size.</p>
-        
-        <h2>Section 2.2: Multi-Page Rendering</h2>
-        
-        <p>The multi-page rendering system can generate pages both forward and 
-        backward from any given position. This bidirectional capability is 
-        essential for smooth navigation in ereader applications.</p>
-        
-        <p>Each page is rendered independently, which allows for efficient 
-        caching and parallel processing of multiple pages when needed.</p>
-        
-        <p>This concludes our sample document. The layout system will automatically 
-        determine how many pages are needed to display all this content based on 
-        the page size and font settings used during rendering.</p>
-    </body>
-    </html>
-    """
-
-
-class HTMLMultiPageRenderer:
-    """
-    Renderer that converts HTML to multiple page images.
-    """
-    
-    def __init__(self, page_size: Tuple[int, int] = (600, 800), font_scale: float = 1.0):
-        """
-        Initialize the renderer.
-        
-        Args:
-            page_size: Size of each page in pixels (width, height)
-            font_scale: Font scaling factor
-        """
-        self.page_size = page_size
-        self.font_scale = font_scale
-        self.page_style = PageStyle()
-        
-    def parse_html_to_blocks(self, html_content: str) -> List[Block]:
-        """
-        Parse HTML content into abstract blocks.
-        
-        Args:
-            html_content: HTML string to parse
-            
-        Returns:
-            List of abstract Block objects
-        """
-        base_font = Font(font_size=14)  # Base font for the document
-        blocks = parse_html_string(html_content, base_font=base_font)
-        return blocks
-    
-    def render_pages(self, blocks: List[Block], max_pages: int = 20) -> List[Image.Image]:
-        """
-        Render blocks into multiple page images.
-        
-        Args:
-            blocks: List of abstract blocks to render
-            max_pages: Maximum number of pages to render (safety limit)
-            
-        Returns:
-            List of PIL Image objects, one per page
-        """
-        if not blocks:
-            return []
-        
-        # Create the bidirectional layouter
-        layouter = BidirectionalLayouter(blocks, self.page_style, self.page_size)
-        
-        pages = []
-        current_position = RenderingPosition()  # Start at beginning
-        page_count = 0
-        
-        while page_count < max_pages:
-            try:
-                # Render the next page
-                page, next_position = layouter.render_page_forward(current_position, self.font_scale)
-                
-                # Convert page to image
-                page_image = self._page_to_image(page)
-                pages.append(page_image)
-                
-                page_count += 1
-                
-                # Check if we've reached the end
-                if self._is_end_position(next_position, current_position, blocks):
-                    break
-                
-                current_position = next_position
-                
-            except Exception as e:
-                print(f"Error rendering page {page_count + 1}: {e}")
-                break
-        
-        return pages
-    
-    def _page_to_image(self, page: Page) -> Image.Image:
-        """
-        Convert a Page object to a PIL Image.
-        
-        Args:
-            page: Page object to convert
-            
-        Returns:
-            PIL Image object
-        """
-        # Create a white background image
-        image = Image.new('RGB', self.page_size, 'white')
-        draw = ImageDraw.Draw(image)
-        
-        # Draw page border
-        border_color = (200, 200, 200)
-        draw.rectangle([0, 0, self.page_size[0]-1, self.page_size[1]-1], outline=border_color)
-        
-        # The page object should have already been rendered with its draw context
-        # For this demo, we'll create a simple representation
-        
-        # Add page number at bottom
-        try:
-            from PIL import ImageFont
-            font = ImageFont.load_default()
-        except:
-            font = None
-        
-        page_num_text = f"Page {len(pages) + 1}" if 'pages' in locals() else "Page"
-        text_bbox = draw.textbbox((0, 0), page_num_text, font=font)
-        text_width = text_bbox[2] - text_bbox[0]
-        text_x = (self.page_size[0] - text_width) // 2
-        text_y = self.page_size[1] - 30
-        
-        draw.text((text_x, text_y), page_num_text, fill='black', font=font)
-        
-        return image
-    
-    def _is_end_position(self, current_pos: RenderingPosition, previous_pos: RenderingPosition, blocks: List[Block]) -> bool:
-        """
-        Check if we've reached the end of the document.
-        
-        Args:
-            current_pos: Current rendering position
-            previous_pos: Previous rendering position
-            blocks: List of all blocks in document
-            
-        Returns:
-            True if at end of document
-        """
-        # If position hasn't advanced, we're likely at the end
-        if (current_pos.block_index == previous_pos.block_index and 
-            current_pos.word_index == previous_pos.word_index):
-            return True
-        
-        # If we've processed all blocks
-        if current_pos.block_index >= len(blocks):
-            return True
-        
-        return False
-    
-    def save_pages(self, pages: List[Image.Image], output_dir: str = "output/html_multipage"):
-        """
-        Save rendered pages as image files.
-        
-        Args:
-            pages: List of page images
-            output_dir: Directory to save images
-        """
-        # Create output directory
-        os.makedirs(output_dir, exist_ok=True)
-        
-        for i, page_image in enumerate(pages, 1):
-            filename = f"page_{i:03d}.png"
-            filepath = os.path.join(output_dir, filename)
-            page_image.save(filepath)
-            print(f"Saved {filepath}")
-        
-        print(f"\nRendered {len(pages)} pages to {output_dir}/")
-
-
-def main():
-    """Main demo function."""
-    print("HTML Multi-Page Rendering Demo")
-    print("=" * 40)
-    
-    # Create sample HTML content
-    print("1. Creating sample HTML content...")
-    html_content = create_sample_html()
-    print(f"   Created HTML document ({len(html_content)} characters)")
-    
-    # Initialize renderer
-    print("\n2. Initializing renderer...")
-    renderer = HTMLMultiPageRenderer(page_size=(600, 800), font_scale=1.0)
-    print("   Renderer initialized")
-    
-    # Parse HTML to blocks
-    print("\n3. Parsing HTML to abstract blocks...")
-    blocks = renderer.parse_html_to_blocks(html_content)
-    print(f"   Parsed {len(blocks)} blocks")
-    
-    # Print block summary
-    block_types = {}
-    for block in blocks:
-        block_type = type(block).__name__
-        block_types[block_type] = block_types.get(block_type, 0) + 1
-    
-    print("   Block types found:")
-    for block_type, count in block_types.items():
-        print(f"     - {block_type}: {count}")
-    
-    # Render pages
-    print("\n4. Rendering pages...")
-    pages = renderer.render_pages(blocks, max_pages=10)
-    print(f"   Rendered {len(pages)} pages")
-    
-    # Save pages
-    print("\n5. Saving pages...")
-    renderer.save_pages(pages)
-    
-    print("\n✓ Demo completed successfully!")
-    print("\nTo view the results:")
-    print("  - Check the output/html_multipage/ directory")
-    print("  - Open the PNG files to see each rendered page")
-    
-    # Show some statistics
-    print(f"\nStatistics:")
-    print(f"  - Original HTML: {len(html_content)} characters")
-    print(f"  - Abstract blocks: {len(blocks)}")
-    print(f"  - Rendered pages: {len(pages)}")
-    print(f"  - Page size: {renderer.page_size[0]}x{renderer.page_size[1]} pixels")
-    print(f"  - Font scale: {renderer.font_scale}x")
-
-
-if __name__ == "__main__":
-    main()
--- a/pyWebLayout/abstract/inline.py
+++ b/pyWebLayout/abstract/inline.py
@ -2,9 +2,12 @@ from __future__ import annotations
 from pyWebLayout.core.base import Queriable
 from pyWebLayout.style import Font
 from pyWebLayout.style.abstract_style import AbstractStyle
-from typing import Tuple, Union, List, Optional, Dict, Any
+from typing import Tuple, Union, List, Optional, Dict, Any, Callable
 import pyphen

+# Import LinkType for type hints (imported at module level to avoid F821 linting error)
+from pyWebLayout.abstract.functional import LinkType
+


 class Word:
@ -279,7 +282,7 @@ class LinkedWord(Word):
    """
    
    def __init__(self, text: str, style: Union[Font, 'AbstractStyle'],
-                 location: str, link_type: 'LinkType' = None,
+                 location: str, link_type: Optional['LinkType'] = None,
                 callback: Optional[Callable] = None,
                 background=None, previous: Optional[Word] = None,
                 params: Optional[Dict[str, Any]] = None,
@ -302,7 +305,6 @@ class LinkedWord(Word):
        super().__init__(text, style, background, previous)
        
        # Store link properties
-        from pyWebLayout.abstract.functional import LinkType
        self._location = location
        self._link_type = link_type or LinkType.EXTERNAL
        self._callback = callback
@ -344,8 +346,6 @@ class LinkedWord(Word):
        Returns:
            The result of the link execution
        """
-        from pyWebLayout.abstract.functional import LinkType
-        
        # Add word text to context
        full_context = {**self._params, 'text': self._text}
        if context:
--- a/pyWebLayout/concrete/viewport.py
+++ b/pyWebLayout/concrete/viewport.py
@ -387,10 +387,10 @@ class Viewport(Box, Layoutable):
        }


-class ScrollablePageContent(Container):
+class ScrollablePageContent(Box):
    """
    A specialized container for page content that's designed to work with viewports.
-    This extends the regular Page functionality but allows for much larger content areas.
+    This extends the regular Box functionality but allows for much larger content areas.
    """
    
    def __init__(self, content_width: int = 800, initial_height: int = 1000, 
--- a/pyWebLayout/layout/document_layouter.py
+++ b/pyWebLayout/layout/document_layouter.py
@ -9,6 +9,7 @@ from pyWebLayout.abstract import Paragraph, Word, Link
 from pyWebLayout.abstract.block import Image as AbstractImage
 from pyWebLayout.abstract.inline import LinkedWord
 from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver
+from pyWebLayout.style import Font, Alignment

 def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None, alignment_override: Optional['Alignment'] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
    """
@ -40,7 +41,6 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
    # paragraph.style is already a Font object (concrete), not AbstractStyle
    # We need to get word spacing constraints from the Font's abstract style if available
    # For now, use reasonable defaults based on font size
-    from pyWebLayout.style import Font, Alignment
    
    if isinstance(paragraph.style, Font):
        # paragraph.style is already a Font (concrete style)
@ -228,8 +228,6 @@ def image_layouter(image: AbstractImage, page: Page, max_width: Optional[int] =
    Returns:
        bool: True if image was successfully laid out, False if page ran out of space
    """
-    from pyWebLayout.style import Alignment
-    
    # Use page available width if max_width not specified
    if max_width is None:
        max_width = page.available_width