fix flake8
Some checks failed
Python CI / test (push) Has been cancelled

This commit is contained in:
Duncan Tourolle 2025-11-04 22:41:05 +01:00
parent 37505d3dcc
commit 55fdcbcb6d
4 changed files with 8 additions and 336 deletions

View File

@ -1,326 +0,0 @@
#!/usr/bin/env python3
"""
HTML Multi-Page Rendering Demo
This example demonstrates how to:
1. Parse HTML content using pyWebLayout's HTML extraction system
2. Layout the parsed content across multiple pages using the ereader layout system
3. Render each page as an image file
The demo shows the complete pipeline from HTML to multi-page layout.
"""
import os
import sys
from pathlib import Path
from typing import List, Tuple
from PIL import Image, ImageDraw
# Add pyWebLayout to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from pyWebLayout.io.readers.html_extraction import parse_html_string
from pyWebLayout.layout.ereader_layout import BidirectionalLayouter, RenderingPosition
from pyWebLayout.concrete.page import Page
from pyWebLayout.style.page_style import PageStyle
from pyWebLayout.style import Font
from pyWebLayout.abstract.block import Block
def create_sample_html() -> str:
"""Create a sample HTML document with various elements for testing."""
return """
<!DOCTYPE html>
<html>
<head>
<title>Sample Document</title>
</head>
<body>
<h1>Chapter 1: Introduction to Multi-Page Layout</h1>
<p>This is the first paragraph of our sample document. It demonstrates how HTML content
can be parsed and then laid out across multiple pages using the pyWebLayout system.
The system handles various HTML elements including headings, paragraphs, lists, and more.</p>
<p>Here's another paragraph with <strong>bold text</strong> and <em>italic text</em>
to show how inline formatting is preserved during the conversion process. The layout
engine will automatically handle word wrapping and page breaks as needed.</p>
<h2>Section 1.1: Features</h2>
<p>The multi-page layout system includes several key features:</p>
<ul>
<li>Automatic page breaking when content exceeds page boundaries</li>
<li>Font scaling support for different reading preferences</li>
<li>Position tracking for bookmarks and navigation</li>
<li>Support for various HTML elements and styling</li>
</ul>
<p>Each of these features works together to provide a seamless reading experience
that adapts to different page sizes and user preferences.</p>
<h2>Section 1.2: Technical Implementation</h2>
<p>The implementation uses a sophisticated layout engine that processes abstract
document elements and renders them onto concrete pages. This separation allows
for flexible styling and layout while maintaining the semantic structure of
the original content.</p>
<blockquote>
"The best way to understand a complex system is to see it in action with
real examples and practical demonstrations."
</blockquote>
<p>This quote illustrates the philosophy behind this demo - showing how the
various components work together in practice.</p>
<h1>Chapter 2: Advanced Layout Concepts</h1>
<p>Moving into more advanced territory, we can explore how the layout system
handles complex scenarios such as page breaks within paragraphs, font scaling
effects on layout, and position tracking across multiple pages.</p>
<p>The system maintains precise position information that allows for features
like bookmarking, search result highlighting, and seamless navigation between
different views of the same content.</p>
<h2>Section 2.1: Position Tracking</h2>
<p>Position tracking is implemented using a hierarchical system that can
reference any point in the document structure. This includes not just
paragraph and word positions, but also positions within tables, lists,
and other complex structures.</p>
<p>The position system is designed to be stable across different rendering
parameters, so a bookmark created with one font size will still be valid
when the user changes to a different font size.</p>
<h2>Section 2.2: Multi-Page Rendering</h2>
<p>The multi-page rendering system can generate pages both forward and
backward from any given position. This bidirectional capability is
essential for smooth navigation in ereader applications.</p>
<p>Each page is rendered independently, which allows for efficient
caching and parallel processing of multiple pages when needed.</p>
<p>This concludes our sample document. The layout system will automatically
determine how many pages are needed to display all this content based on
the page size and font settings used during rendering.</p>
</body>
</html>
"""
class HTMLMultiPageRenderer:
"""
Renderer that converts HTML to multiple page images.
"""
def __init__(self, page_size: Tuple[int, int] = (600, 800), font_scale: float = 1.0):
"""
Initialize the renderer.
Args:
page_size: Size of each page in pixels (width, height)
font_scale: Font scaling factor
"""
self.page_size = page_size
self.font_scale = font_scale
self.page_style = PageStyle()
def parse_html_to_blocks(self, html_content: str) -> List[Block]:
"""
Parse HTML content into abstract blocks.
Args:
html_content: HTML string to parse
Returns:
List of abstract Block objects
"""
base_font = Font(font_size=14) # Base font for the document
blocks = parse_html_string(html_content, base_font=base_font)
return blocks
def render_pages(self, blocks: List[Block], max_pages: int = 20) -> List[Image.Image]:
"""
Render blocks into multiple page images.
Args:
blocks: List of abstract blocks to render
max_pages: Maximum number of pages to render (safety limit)
Returns:
List of PIL Image objects, one per page
"""
if not blocks:
return []
# Create the bidirectional layouter
layouter = BidirectionalLayouter(blocks, self.page_style, self.page_size)
pages = []
current_position = RenderingPosition() # Start at beginning
page_count = 0
while page_count < max_pages:
try:
# Render the next page
page, next_position = layouter.render_page_forward(current_position, self.font_scale)
# Convert page to image
page_image = self._page_to_image(page)
pages.append(page_image)
page_count += 1
# Check if we've reached the end
if self._is_end_position(next_position, current_position, blocks):
break
current_position = next_position
except Exception as e:
print(f"Error rendering page {page_count + 1}: {e}")
break
return pages
def _page_to_image(self, page: Page) -> Image.Image:
"""
Convert a Page object to a PIL Image.
Args:
page: Page object to convert
Returns:
PIL Image object
"""
# Create a white background image
image = Image.new('RGB', self.page_size, 'white')
draw = ImageDraw.Draw(image)
# Draw page border
border_color = (200, 200, 200)
draw.rectangle([0, 0, self.page_size[0]-1, self.page_size[1]-1], outline=border_color)
# The page object should have already been rendered with its draw context
# For this demo, we'll create a simple representation
# Add page number at bottom
try:
from PIL import ImageFont
font = ImageFont.load_default()
except:
font = None
page_num_text = f"Page {len(pages) + 1}" if 'pages' in locals() else "Page"
text_bbox = draw.textbbox((0, 0), page_num_text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_x = (self.page_size[0] - text_width) // 2
text_y = self.page_size[1] - 30
draw.text((text_x, text_y), page_num_text, fill='black', font=font)
return image
def _is_end_position(self, current_pos: RenderingPosition, previous_pos: RenderingPosition, blocks: List[Block]) -> bool:
"""
Check if we've reached the end of the document.
Args:
current_pos: Current rendering position
previous_pos: Previous rendering position
blocks: List of all blocks in document
Returns:
True if at end of document
"""
# If position hasn't advanced, we're likely at the end
if (current_pos.block_index == previous_pos.block_index and
current_pos.word_index == previous_pos.word_index):
return True
# If we've processed all blocks
if current_pos.block_index >= len(blocks):
return True
return False
def save_pages(self, pages: List[Image.Image], output_dir: str = "output/html_multipage"):
"""
Save rendered pages as image files.
Args:
pages: List of page images
output_dir: Directory to save images
"""
# Create output directory
os.makedirs(output_dir, exist_ok=True)
for i, page_image in enumerate(pages, 1):
filename = f"page_{i:03d}.png"
filepath = os.path.join(output_dir, filename)
page_image.save(filepath)
print(f"Saved {filepath}")
print(f"\nRendered {len(pages)} pages to {output_dir}/")
def main():
"""Main demo function."""
print("HTML Multi-Page Rendering Demo")
print("=" * 40)
# Create sample HTML content
print("1. Creating sample HTML content...")
html_content = create_sample_html()
print(f" Created HTML document ({len(html_content)} characters)")
# Initialize renderer
print("\n2. Initializing renderer...")
renderer = HTMLMultiPageRenderer(page_size=(600, 800), font_scale=1.0)
print(" Renderer initialized")
# Parse HTML to blocks
print("\n3. Parsing HTML to abstract blocks...")
blocks = renderer.parse_html_to_blocks(html_content)
print(f" Parsed {len(blocks)} blocks")
# Print block summary
block_types = {}
for block in blocks:
block_type = type(block).__name__
block_types[block_type] = block_types.get(block_type, 0) + 1
print(" Block types found:")
for block_type, count in block_types.items():
print(f" - {block_type}: {count}")
# Render pages
print("\n4. Rendering pages...")
pages = renderer.render_pages(blocks, max_pages=10)
print(f" Rendered {len(pages)} pages")
# Save pages
print("\n5. Saving pages...")
renderer.save_pages(pages)
print("\n✓ Demo completed successfully!")
print("\nTo view the results:")
print(" - Check the output/html_multipage/ directory")
print(" - Open the PNG files to see each rendered page")
# Show some statistics
print(f"\nStatistics:")
print(f" - Original HTML: {len(html_content)} characters")
print(f" - Abstract blocks: {len(blocks)}")
print(f" - Rendered pages: {len(pages)}")
print(f" - Page size: {renderer.page_size[0]}x{renderer.page_size[1]} pixels")
print(f" - Font scale: {renderer.font_scale}x")
if __name__ == "__main__":
main()

View File

@ -2,9 +2,12 @@ from __future__ import annotations
from pyWebLayout.core.base import Queriable from pyWebLayout.core.base import Queriable
from pyWebLayout.style import Font from pyWebLayout.style import Font
from pyWebLayout.style.abstract_style import AbstractStyle from pyWebLayout.style.abstract_style import AbstractStyle
from typing import Tuple, Union, List, Optional, Dict, Any from typing import Tuple, Union, List, Optional, Dict, Any, Callable
import pyphen import pyphen
# Import LinkType for type hints (imported at module level to avoid F821 linting error)
from pyWebLayout.abstract.functional import LinkType
class Word: class Word:
@ -279,7 +282,7 @@ class LinkedWord(Word):
""" """
def __init__(self, text: str, style: Union[Font, 'AbstractStyle'], def __init__(self, text: str, style: Union[Font, 'AbstractStyle'],
location: str, link_type: 'LinkType' = None, location: str, link_type: Optional['LinkType'] = None,
callback: Optional[Callable] = None, callback: Optional[Callable] = None,
background=None, previous: Optional[Word] = None, background=None, previous: Optional[Word] = None,
params: Optional[Dict[str, Any]] = None, params: Optional[Dict[str, Any]] = None,
@ -302,7 +305,6 @@ class LinkedWord(Word):
super().__init__(text, style, background, previous) super().__init__(text, style, background, previous)
# Store link properties # Store link properties
from pyWebLayout.abstract.functional import LinkType
self._location = location self._location = location
self._link_type = link_type or LinkType.EXTERNAL self._link_type = link_type or LinkType.EXTERNAL
self._callback = callback self._callback = callback
@ -344,8 +346,6 @@ class LinkedWord(Word):
Returns: Returns:
The result of the link execution The result of the link execution
""" """
from pyWebLayout.abstract.functional import LinkType
# Add word text to context # Add word text to context
full_context = {**self._params, 'text': self._text} full_context = {**self._params, 'text': self._text}
if context: if context:

View File

@ -387,10 +387,10 @@ class Viewport(Box, Layoutable):
} }
class ScrollablePageContent(Container): class ScrollablePageContent(Box):
""" """
A specialized container for page content that's designed to work with viewports. A specialized container for page content that's designed to work with viewports.
This extends the regular Page functionality but allows for much larger content areas. This extends the regular Box functionality but allows for much larger content areas.
""" """
def __init__(self, content_width: int = 800, initial_height: int = 1000, def __init__(self, content_width: int = 800, initial_height: int = 1000,

View File

@ -9,6 +9,7 @@ from pyWebLayout.abstract import Paragraph, Word, Link
from pyWebLayout.abstract.block import Image as AbstractImage from pyWebLayout.abstract.block import Image as AbstractImage
from pyWebLayout.abstract.inline import LinkedWord from pyWebLayout.abstract.inline import LinkedWord
from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver
from pyWebLayout.style import Font, Alignment
def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None, alignment_override: Optional['Alignment'] = None) -> Tuple[bool, Optional[int], Optional[Text]]: def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None, alignment_override: Optional['Alignment'] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
""" """
@ -40,7 +41,6 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
# paragraph.style is already a Font object (concrete), not AbstractStyle # paragraph.style is already a Font object (concrete), not AbstractStyle
# We need to get word spacing constraints from the Font's abstract style if available # We need to get word spacing constraints from the Font's abstract style if available
# For now, use reasonable defaults based on font size # For now, use reasonable defaults based on font size
from pyWebLayout.style import Font, Alignment
if isinstance(paragraph.style, Font): if isinstance(paragraph.style, Font):
# paragraph.style is already a Font (concrete style) # paragraph.style is already a Font (concrete style)
@ -228,8 +228,6 @@ def image_layouter(image: AbstractImage, page: Page, max_width: Optional[int] =
Returns: Returns:
bool: True if image was successfully laid out, False if page ran out of space bool: True if image was successfully laid out, False if page ran out of space
""" """
from pyWebLayout.style import Alignment
# Use page available width if max_width not specified # Use page available width if max_width not specified
if max_width is None: if max_width is None:
max_width = page.available_width max_width = page.available_width