diff --git a/examples/html_multipage_demo.py b/examples/html_multipage_demo.py deleted file mode 100644 index 449535f..0000000 --- a/examples/html_multipage_demo.py +++ /dev/null @@ -1,326 +0,0 @@ -#!/usr/bin/env python3 -""" -HTML Multi-Page Rendering Demo - -This example demonstrates how to: -1. Parse HTML content using pyWebLayout's HTML extraction system -2. Layout the parsed content across multiple pages using the ereader layout system -3. Render each page as an image file - -The demo shows the complete pipeline from HTML to multi-page layout. -""" - -import os -import sys -from pathlib import Path -from typing import List, Tuple -from PIL import Image, ImageDraw - -# Add pyWebLayout to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from pyWebLayout.io.readers.html_extraction import parse_html_string -from pyWebLayout.layout.ereader_layout import BidirectionalLayouter, RenderingPosition -from pyWebLayout.concrete.page import Page -from pyWebLayout.style.page_style import PageStyle -from pyWebLayout.style import Font -from pyWebLayout.abstract.block import Block - - -def create_sample_html() -> str: - """Create a sample HTML document with various elements for testing.""" - return """ - - - - Sample Document - - -

Chapter 1: Introduction to Multi-Page Layout

- -

This is the first paragraph of our sample document. It demonstrates how HTML content - can be parsed and then laid out across multiple pages using the pyWebLayout system. - The system handles various HTML elements including headings, paragraphs, lists, and more.

- -

Here's another paragraph with bold text and italic text - to show how inline formatting is preserved during the conversion process. The layout - engine will automatically handle word wrapping and page breaks as needed.

- -

Section 1.1: Features

- -

The multi-page layout system includes several key features:

- - - -

Each of these features works together to provide a seamless reading experience - that adapts to different page sizes and user preferences.

- -

Section 1.2: Technical Implementation

- -

The implementation uses a sophisticated layout engine that processes abstract - document elements and renders them onto concrete pages. This separation allows - for flexible styling and layout while maintaining the semantic structure of - the original content.

- -
- "The best way to understand a complex system is to see it in action with - real examples and practical demonstrations." -
- -

This quote illustrates the philosophy behind this demo - showing how the - various components work together in practice.

- -

Chapter 2: Advanced Layout Concepts

- -

Moving into more advanced territory, we can explore how the layout system - handles complex scenarios such as page breaks within paragraphs, font scaling - effects on layout, and position tracking across multiple pages.

- -

The system maintains precise position information that allows for features - like bookmarking, search result highlighting, and seamless navigation between - different views of the same content.

- -

Section 2.1: Position Tracking

- -

Position tracking is implemented using a hierarchical system that can - reference any point in the document structure. This includes not just - paragraph and word positions, but also positions within tables, lists, - and other complex structures.

- -

The position system is designed to be stable across different rendering - parameters, so a bookmark created with one font size will still be valid - when the user changes to a different font size.

- -

Section 2.2: Multi-Page Rendering

- -

The multi-page rendering system can generate pages both forward and - backward from any given position. This bidirectional capability is - essential for smooth navigation in ereader applications.

- -

Each page is rendered independently, which allows for efficient - caching and parallel processing of multiple pages when needed.

- -

This concludes our sample document. The layout system will automatically - determine how many pages are needed to display all this content based on - the page size and font settings used during rendering.

- - - """ - - -class HTMLMultiPageRenderer: - """ - Renderer that converts HTML to multiple page images. - """ - - def __init__(self, page_size: Tuple[int, int] = (600, 800), font_scale: float = 1.0): - """ - Initialize the renderer. - - Args: - page_size: Size of each page in pixels (width, height) - font_scale: Font scaling factor - """ - self.page_size = page_size - self.font_scale = font_scale - self.page_style = PageStyle() - - def parse_html_to_blocks(self, html_content: str) -> List[Block]: - """ - Parse HTML content into abstract blocks. - - Args: - html_content: HTML string to parse - - Returns: - List of abstract Block objects - """ - base_font = Font(font_size=14) # Base font for the document - blocks = parse_html_string(html_content, base_font=base_font) - return blocks - - def render_pages(self, blocks: List[Block], max_pages: int = 20) -> List[Image.Image]: - """ - Render blocks into multiple page images. - - Args: - blocks: List of abstract blocks to render - max_pages: Maximum number of pages to render (safety limit) - - Returns: - List of PIL Image objects, one per page - """ - if not blocks: - return [] - - # Create the bidirectional layouter - layouter = BidirectionalLayouter(blocks, self.page_style, self.page_size) - - pages = [] - current_position = RenderingPosition() # Start at beginning - page_count = 0 - - while page_count < max_pages: - try: - # Render the next page - page, next_position = layouter.render_page_forward(current_position, self.font_scale) - - # Convert page to image - page_image = self._page_to_image(page) - pages.append(page_image) - - page_count += 1 - - # Check if we've reached the end - if self._is_end_position(next_position, current_position, blocks): - break - - current_position = next_position - - except Exception as e: - print(f"Error rendering page {page_count + 1}: {e}") - break - - return pages - - def _page_to_image(self, page: Page) -> Image.Image: - """ - Convert a Page object to a PIL Image. - - Args: - page: Page object to convert - - Returns: - PIL Image object - """ - # Create a white background image - image = Image.new('RGB', self.page_size, 'white') - draw = ImageDraw.Draw(image) - - # Draw page border - border_color = (200, 200, 200) - draw.rectangle([0, 0, self.page_size[0]-1, self.page_size[1]-1], outline=border_color) - - # The page object should have already been rendered with its draw context - # For this demo, we'll create a simple representation - - # Add page number at bottom - try: - from PIL import ImageFont - font = ImageFont.load_default() - except: - font = None - - page_num_text = f"Page {len(pages) + 1}" if 'pages' in locals() else "Page" - text_bbox = draw.textbbox((0, 0), page_num_text, font=font) - text_width = text_bbox[2] - text_bbox[0] - text_x = (self.page_size[0] - text_width) // 2 - text_y = self.page_size[1] - 30 - - draw.text((text_x, text_y), page_num_text, fill='black', font=font) - - return image - - def _is_end_position(self, current_pos: RenderingPosition, previous_pos: RenderingPosition, blocks: List[Block]) -> bool: - """ - Check if we've reached the end of the document. - - Args: - current_pos: Current rendering position - previous_pos: Previous rendering position - blocks: List of all blocks in document - - Returns: - True if at end of document - """ - # If position hasn't advanced, we're likely at the end - if (current_pos.block_index == previous_pos.block_index and - current_pos.word_index == previous_pos.word_index): - return True - - # If we've processed all blocks - if current_pos.block_index >= len(blocks): - return True - - return False - - def save_pages(self, pages: List[Image.Image], output_dir: str = "output/html_multipage"): - """ - Save rendered pages as image files. - - Args: - pages: List of page images - output_dir: Directory to save images - """ - # Create output directory - os.makedirs(output_dir, exist_ok=True) - - for i, page_image in enumerate(pages, 1): - filename = f"page_{i:03d}.png" - filepath = os.path.join(output_dir, filename) - page_image.save(filepath) - print(f"Saved {filepath}") - - print(f"\nRendered {len(pages)} pages to {output_dir}/") - - -def main(): - """Main demo function.""" - print("HTML Multi-Page Rendering Demo") - print("=" * 40) - - # Create sample HTML content - print("1. Creating sample HTML content...") - html_content = create_sample_html() - print(f" Created HTML document ({len(html_content)} characters)") - - # Initialize renderer - print("\n2. Initializing renderer...") - renderer = HTMLMultiPageRenderer(page_size=(600, 800), font_scale=1.0) - print(" Renderer initialized") - - # Parse HTML to blocks - print("\n3. Parsing HTML to abstract blocks...") - blocks = renderer.parse_html_to_blocks(html_content) - print(f" Parsed {len(blocks)} blocks") - - # Print block summary - block_types = {} - for block in blocks: - block_type = type(block).__name__ - block_types[block_type] = block_types.get(block_type, 0) + 1 - - print(" Block types found:") - for block_type, count in block_types.items(): - print(f" - {block_type}: {count}") - - # Render pages - print("\n4. Rendering pages...") - pages = renderer.render_pages(blocks, max_pages=10) - print(f" Rendered {len(pages)} pages") - - # Save pages - print("\n5. Saving pages...") - renderer.save_pages(pages) - - print("\n✓ Demo completed successfully!") - print("\nTo view the results:") - print(" - Check the output/html_multipage/ directory") - print(" - Open the PNG files to see each rendered page") - - # Show some statistics - print(f"\nStatistics:") - print(f" - Original HTML: {len(html_content)} characters") - print(f" - Abstract blocks: {len(blocks)}") - print(f" - Rendered pages: {len(pages)}") - print(f" - Page size: {renderer.page_size[0]}x{renderer.page_size[1]} pixels") - print(f" - Font scale: {renderer.font_scale}x") - - -if __name__ == "__main__": - main() diff --git a/pyWebLayout/abstract/inline.py b/pyWebLayout/abstract/inline.py index a7008d3..9e66a87 100644 --- a/pyWebLayout/abstract/inline.py +++ b/pyWebLayout/abstract/inline.py @@ -2,9 +2,12 @@ from __future__ import annotations from pyWebLayout.core.base import Queriable from pyWebLayout.style import Font from pyWebLayout.style.abstract_style import AbstractStyle -from typing import Tuple, Union, List, Optional, Dict, Any +from typing import Tuple, Union, List, Optional, Dict, Any, Callable import pyphen +# Import LinkType for type hints (imported at module level to avoid F821 linting error) +from pyWebLayout.abstract.functional import LinkType + class Word: @@ -279,7 +282,7 @@ class LinkedWord(Word): """ def __init__(self, text: str, style: Union[Font, 'AbstractStyle'], - location: str, link_type: 'LinkType' = None, + location: str, link_type: Optional['LinkType'] = None, callback: Optional[Callable] = None, background=None, previous: Optional[Word] = None, params: Optional[Dict[str, Any]] = None, @@ -302,7 +305,6 @@ class LinkedWord(Word): super().__init__(text, style, background, previous) # Store link properties - from pyWebLayout.abstract.functional import LinkType self._location = location self._link_type = link_type or LinkType.EXTERNAL self._callback = callback @@ -344,8 +346,6 @@ class LinkedWord(Word): Returns: The result of the link execution """ - from pyWebLayout.abstract.functional import LinkType - # Add word text to context full_context = {**self._params, 'text': self._text} if context: diff --git a/pyWebLayout/concrete/viewport.py b/pyWebLayout/concrete/viewport.py index 364a9e9..dbff7f3 100644 --- a/pyWebLayout/concrete/viewport.py +++ b/pyWebLayout/concrete/viewport.py @@ -387,10 +387,10 @@ class Viewport(Box, Layoutable): } -class ScrollablePageContent(Container): +class ScrollablePageContent(Box): """ A specialized container for page content that's designed to work with viewports. - This extends the regular Page functionality but allows for much larger content areas. + This extends the regular Box functionality but allows for much larger content areas. """ def __init__(self, content_width: int = 800, initial_height: int = 1000, diff --git a/pyWebLayout/layout/document_layouter.py b/pyWebLayout/layout/document_layouter.py index a84692d..e666026 100644 --- a/pyWebLayout/layout/document_layouter.py +++ b/pyWebLayout/layout/document_layouter.py @@ -9,6 +9,7 @@ from pyWebLayout.abstract import Paragraph, Word, Link from pyWebLayout.abstract.block import Image as AbstractImage from pyWebLayout.abstract.inline import LinkedWord from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver +from pyWebLayout.style import Font, Alignment def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None, alignment_override: Optional['Alignment'] = None) -> Tuple[bool, Optional[int], Optional[Text]]: """ @@ -40,7 +41,6 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr # paragraph.style is already a Font object (concrete), not AbstractStyle # We need to get word spacing constraints from the Font's abstract style if available # For now, use reasonable defaults based on font size - from pyWebLayout.style import Font, Alignment if isinstance(paragraph.style, Font): # paragraph.style is already a Font (concrete style) @@ -228,8 +228,6 @@ def image_layouter(image: AbstractImage, page: Page, max_width: Optional[int] = Returns: bool: True if image was successfully laid out, False if page ran out of space """ - from pyWebLayout.style import Alignment - # Use page available width if max_width not specified if max_width is None: max_width = page.available_width