This commit is contained in:
parent
37505d3dcc
commit
55fdcbcb6d
@ -1,326 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
HTML Multi-Page Rendering Demo
|
||||
|
||||
This example demonstrates how to:
|
||||
1. Parse HTML content using pyWebLayout's HTML extraction system
|
||||
2. Layout the parsed content across multiple pages using the ereader layout system
|
||||
3. Render each page as an image file
|
||||
|
||||
The demo shows the complete pipeline from HTML to multi-page layout.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
# Add pyWebLayout to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from pyWebLayout.io.readers.html_extraction import parse_html_string
|
||||
from pyWebLayout.layout.ereader_layout import BidirectionalLayouter, RenderingPosition
|
||||
from pyWebLayout.concrete.page import Page
|
||||
from pyWebLayout.style.page_style import PageStyle
|
||||
from pyWebLayout.style import Font
|
||||
from pyWebLayout.abstract.block import Block
|
||||
|
||||
|
||||
def create_sample_html() -> str:
|
||||
"""Create a sample HTML document with various elements for testing."""
|
||||
return """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Sample Document</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Chapter 1: Introduction to Multi-Page Layout</h1>
|
||||
|
||||
<p>This is the first paragraph of our sample document. It demonstrates how HTML content
|
||||
can be parsed and then laid out across multiple pages using the pyWebLayout system.
|
||||
The system handles various HTML elements including headings, paragraphs, lists, and more.</p>
|
||||
|
||||
<p>Here's another paragraph with <strong>bold text</strong> and <em>italic text</em>
|
||||
to show how inline formatting is preserved during the conversion process. The layout
|
||||
engine will automatically handle word wrapping and page breaks as needed.</p>
|
||||
|
||||
<h2>Section 1.1: Features</h2>
|
||||
|
||||
<p>The multi-page layout system includes several key features:</p>
|
||||
|
||||
<ul>
|
||||
<li>Automatic page breaking when content exceeds page boundaries</li>
|
||||
<li>Font scaling support for different reading preferences</li>
|
||||
<li>Position tracking for bookmarks and navigation</li>
|
||||
<li>Support for various HTML elements and styling</li>
|
||||
</ul>
|
||||
|
||||
<p>Each of these features works together to provide a seamless reading experience
|
||||
that adapts to different page sizes and user preferences.</p>
|
||||
|
||||
<h2>Section 1.2: Technical Implementation</h2>
|
||||
|
||||
<p>The implementation uses a sophisticated layout engine that processes abstract
|
||||
document elements and renders them onto concrete pages. This separation allows
|
||||
for flexible styling and layout while maintaining the semantic structure of
|
||||
the original content.</p>
|
||||
|
||||
<blockquote>
|
||||
"The best way to understand a complex system is to see it in action with
|
||||
real examples and practical demonstrations."
|
||||
</blockquote>
|
||||
|
||||
<p>This quote illustrates the philosophy behind this demo - showing how the
|
||||
various components work together in practice.</p>
|
||||
|
||||
<h1>Chapter 2: Advanced Layout Concepts</h1>
|
||||
|
||||
<p>Moving into more advanced territory, we can explore how the layout system
|
||||
handles complex scenarios such as page breaks within paragraphs, font scaling
|
||||
effects on layout, and position tracking across multiple pages.</p>
|
||||
|
||||
<p>The system maintains precise position information that allows for features
|
||||
like bookmarking, search result highlighting, and seamless navigation between
|
||||
different views of the same content.</p>
|
||||
|
||||
<h2>Section 2.1: Position Tracking</h2>
|
||||
|
||||
<p>Position tracking is implemented using a hierarchical system that can
|
||||
reference any point in the document structure. This includes not just
|
||||
paragraph and word positions, but also positions within tables, lists,
|
||||
and other complex structures.</p>
|
||||
|
||||
<p>The position system is designed to be stable across different rendering
|
||||
parameters, so a bookmark created with one font size will still be valid
|
||||
when the user changes to a different font size.</p>
|
||||
|
||||
<h2>Section 2.2: Multi-Page Rendering</h2>
|
||||
|
||||
<p>The multi-page rendering system can generate pages both forward and
|
||||
backward from any given position. This bidirectional capability is
|
||||
essential for smooth navigation in ereader applications.</p>
|
||||
|
||||
<p>Each page is rendered independently, which allows for efficient
|
||||
caching and parallel processing of multiple pages when needed.</p>
|
||||
|
||||
<p>This concludes our sample document. The layout system will automatically
|
||||
determine how many pages are needed to display all this content based on
|
||||
the page size and font settings used during rendering.</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class HTMLMultiPageRenderer:
|
||||
"""
|
||||
Renderer that converts HTML to multiple page images.
|
||||
"""
|
||||
|
||||
def __init__(self, page_size: Tuple[int, int] = (600, 800), font_scale: float = 1.0):
|
||||
"""
|
||||
Initialize the renderer.
|
||||
|
||||
Args:
|
||||
page_size: Size of each page in pixels (width, height)
|
||||
font_scale: Font scaling factor
|
||||
"""
|
||||
self.page_size = page_size
|
||||
self.font_scale = font_scale
|
||||
self.page_style = PageStyle()
|
||||
|
||||
def parse_html_to_blocks(self, html_content: str) -> List[Block]:
|
||||
"""
|
||||
Parse HTML content into abstract blocks.
|
||||
|
||||
Args:
|
||||
html_content: HTML string to parse
|
||||
|
||||
Returns:
|
||||
List of abstract Block objects
|
||||
"""
|
||||
base_font = Font(font_size=14) # Base font for the document
|
||||
blocks = parse_html_string(html_content, base_font=base_font)
|
||||
return blocks
|
||||
|
||||
def render_pages(self, blocks: List[Block], max_pages: int = 20) -> List[Image.Image]:
|
||||
"""
|
||||
Render blocks into multiple page images.
|
||||
|
||||
Args:
|
||||
blocks: List of abstract blocks to render
|
||||
max_pages: Maximum number of pages to render (safety limit)
|
||||
|
||||
Returns:
|
||||
List of PIL Image objects, one per page
|
||||
"""
|
||||
if not blocks:
|
||||
return []
|
||||
|
||||
# Create the bidirectional layouter
|
||||
layouter = BidirectionalLayouter(blocks, self.page_style, self.page_size)
|
||||
|
||||
pages = []
|
||||
current_position = RenderingPosition() # Start at beginning
|
||||
page_count = 0
|
||||
|
||||
while page_count < max_pages:
|
||||
try:
|
||||
# Render the next page
|
||||
page, next_position = layouter.render_page_forward(current_position, self.font_scale)
|
||||
|
||||
# Convert page to image
|
||||
page_image = self._page_to_image(page)
|
||||
pages.append(page_image)
|
||||
|
||||
page_count += 1
|
||||
|
||||
# Check if we've reached the end
|
||||
if self._is_end_position(next_position, current_position, blocks):
|
||||
break
|
||||
|
||||
current_position = next_position
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error rendering page {page_count + 1}: {e}")
|
||||
break
|
||||
|
||||
return pages
|
||||
|
||||
def _page_to_image(self, page: Page) -> Image.Image:
|
||||
"""
|
||||
Convert a Page object to a PIL Image.
|
||||
|
||||
Args:
|
||||
page: Page object to convert
|
||||
|
||||
Returns:
|
||||
PIL Image object
|
||||
"""
|
||||
# Create a white background image
|
||||
image = Image.new('RGB', self.page_size, 'white')
|
||||
draw = ImageDraw.Draw(image)
|
||||
|
||||
# Draw page border
|
||||
border_color = (200, 200, 200)
|
||||
draw.rectangle([0, 0, self.page_size[0]-1, self.page_size[1]-1], outline=border_color)
|
||||
|
||||
# The page object should have already been rendered with its draw context
|
||||
# For this demo, we'll create a simple representation
|
||||
|
||||
# Add page number at bottom
|
||||
try:
|
||||
from PIL import ImageFont
|
||||
font = ImageFont.load_default()
|
||||
except:
|
||||
font = None
|
||||
|
||||
page_num_text = f"Page {len(pages) + 1}" if 'pages' in locals() else "Page"
|
||||
text_bbox = draw.textbbox((0, 0), page_num_text, font=font)
|
||||
text_width = text_bbox[2] - text_bbox[0]
|
||||
text_x = (self.page_size[0] - text_width) // 2
|
||||
text_y = self.page_size[1] - 30
|
||||
|
||||
draw.text((text_x, text_y), page_num_text, fill='black', font=font)
|
||||
|
||||
return image
|
||||
|
||||
def _is_end_position(self, current_pos: RenderingPosition, previous_pos: RenderingPosition, blocks: List[Block]) -> bool:
|
||||
"""
|
||||
Check if we've reached the end of the document.
|
||||
|
||||
Args:
|
||||
current_pos: Current rendering position
|
||||
previous_pos: Previous rendering position
|
||||
blocks: List of all blocks in document
|
||||
|
||||
Returns:
|
||||
True if at end of document
|
||||
"""
|
||||
# If position hasn't advanced, we're likely at the end
|
||||
if (current_pos.block_index == previous_pos.block_index and
|
||||
current_pos.word_index == previous_pos.word_index):
|
||||
return True
|
||||
|
||||
# If we've processed all blocks
|
||||
if current_pos.block_index >= len(blocks):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def save_pages(self, pages: List[Image.Image], output_dir: str = "output/html_multipage"):
|
||||
"""
|
||||
Save rendered pages as image files.
|
||||
|
||||
Args:
|
||||
pages: List of page images
|
||||
output_dir: Directory to save images
|
||||
"""
|
||||
# Create output directory
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
for i, page_image in enumerate(pages, 1):
|
||||
filename = f"page_{i:03d}.png"
|
||||
filepath = os.path.join(output_dir, filename)
|
||||
page_image.save(filepath)
|
||||
print(f"Saved {filepath}")
|
||||
|
||||
print(f"\nRendered {len(pages)} pages to {output_dir}/")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main demo function."""
|
||||
print("HTML Multi-Page Rendering Demo")
|
||||
print("=" * 40)
|
||||
|
||||
# Create sample HTML content
|
||||
print("1. Creating sample HTML content...")
|
||||
html_content = create_sample_html()
|
||||
print(f" Created HTML document ({len(html_content)} characters)")
|
||||
|
||||
# Initialize renderer
|
||||
print("\n2. Initializing renderer...")
|
||||
renderer = HTMLMultiPageRenderer(page_size=(600, 800), font_scale=1.0)
|
||||
print(" Renderer initialized")
|
||||
|
||||
# Parse HTML to blocks
|
||||
print("\n3. Parsing HTML to abstract blocks...")
|
||||
blocks = renderer.parse_html_to_blocks(html_content)
|
||||
print(f" Parsed {len(blocks)} blocks")
|
||||
|
||||
# Print block summary
|
||||
block_types = {}
|
||||
for block in blocks:
|
||||
block_type = type(block).__name__
|
||||
block_types[block_type] = block_types.get(block_type, 0) + 1
|
||||
|
||||
print(" Block types found:")
|
||||
for block_type, count in block_types.items():
|
||||
print(f" - {block_type}: {count}")
|
||||
|
||||
# Render pages
|
||||
print("\n4. Rendering pages...")
|
||||
pages = renderer.render_pages(blocks, max_pages=10)
|
||||
print(f" Rendered {len(pages)} pages")
|
||||
|
||||
# Save pages
|
||||
print("\n5. Saving pages...")
|
||||
renderer.save_pages(pages)
|
||||
|
||||
print("\n✓ Demo completed successfully!")
|
||||
print("\nTo view the results:")
|
||||
print(" - Check the output/html_multipage/ directory")
|
||||
print(" - Open the PNG files to see each rendered page")
|
||||
|
||||
# Show some statistics
|
||||
print(f"\nStatistics:")
|
||||
print(f" - Original HTML: {len(html_content)} characters")
|
||||
print(f" - Abstract blocks: {len(blocks)}")
|
||||
print(f" - Rendered pages: {len(pages)}")
|
||||
print(f" - Page size: {renderer.page_size[0]}x{renderer.page_size[1]} pixels")
|
||||
print(f" - Font scale: {renderer.font_scale}x")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -2,9 +2,12 @@ from __future__ import annotations
|
||||
from pyWebLayout.core.base import Queriable
|
||||
from pyWebLayout.style import Font
|
||||
from pyWebLayout.style.abstract_style import AbstractStyle
|
||||
from typing import Tuple, Union, List, Optional, Dict, Any
|
||||
from typing import Tuple, Union, List, Optional, Dict, Any, Callable
|
||||
import pyphen
|
||||
|
||||
# Import LinkType for type hints (imported at module level to avoid F821 linting error)
|
||||
from pyWebLayout.abstract.functional import LinkType
|
||||
|
||||
|
||||
|
||||
class Word:
|
||||
@ -279,7 +282,7 @@ class LinkedWord(Word):
|
||||
"""
|
||||
|
||||
def __init__(self, text: str, style: Union[Font, 'AbstractStyle'],
|
||||
location: str, link_type: 'LinkType' = None,
|
||||
location: str, link_type: Optional['LinkType'] = None,
|
||||
callback: Optional[Callable] = None,
|
||||
background=None, previous: Optional[Word] = None,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
@ -302,7 +305,6 @@ class LinkedWord(Word):
|
||||
super().__init__(text, style, background, previous)
|
||||
|
||||
# Store link properties
|
||||
from pyWebLayout.abstract.functional import LinkType
|
||||
self._location = location
|
||||
self._link_type = link_type or LinkType.EXTERNAL
|
||||
self._callback = callback
|
||||
@ -344,8 +346,6 @@ class LinkedWord(Word):
|
||||
Returns:
|
||||
The result of the link execution
|
||||
"""
|
||||
from pyWebLayout.abstract.functional import LinkType
|
||||
|
||||
# Add word text to context
|
||||
full_context = {**self._params, 'text': self._text}
|
||||
if context:
|
||||
|
||||
@ -387,10 +387,10 @@ class Viewport(Box, Layoutable):
|
||||
}
|
||||
|
||||
|
||||
class ScrollablePageContent(Container):
|
||||
class ScrollablePageContent(Box):
|
||||
"""
|
||||
A specialized container for page content that's designed to work with viewports.
|
||||
This extends the regular Page functionality but allows for much larger content areas.
|
||||
This extends the regular Box functionality but allows for much larger content areas.
|
||||
"""
|
||||
|
||||
def __init__(self, content_width: int = 800, initial_height: int = 1000,
|
||||
|
||||
@ -9,6 +9,7 @@ from pyWebLayout.abstract import Paragraph, Word, Link
|
||||
from pyWebLayout.abstract.block import Image as AbstractImage
|
||||
from pyWebLayout.abstract.inline import LinkedWord
|
||||
from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver
|
||||
from pyWebLayout.style import Font, Alignment
|
||||
|
||||
def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None, alignment_override: Optional['Alignment'] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
|
||||
"""
|
||||
@ -40,7 +41,6 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
|
||||
# paragraph.style is already a Font object (concrete), not AbstractStyle
|
||||
# We need to get word spacing constraints from the Font's abstract style if available
|
||||
# For now, use reasonable defaults based on font size
|
||||
from pyWebLayout.style import Font, Alignment
|
||||
|
||||
if isinstance(paragraph.style, Font):
|
||||
# paragraph.style is already a Font (concrete style)
|
||||
@ -228,8 +228,6 @@ def image_layouter(image: AbstractImage, page: Page, max_width: Optional[int] =
|
||||
Returns:
|
||||
bool: True if image was successfully laid out, False if page ran out of space
|
||||
"""
|
||||
from pyWebLayout.style import Alignment
|
||||
|
||||
# Use page available width if max_width not specified
|
||||
if max_width is None:
|
||||
max_width = page.available_width
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user