This commit is contained in:
parent
37505d3dcc
commit
55fdcbcb6d
@ -1,326 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
HTML Multi-Page Rendering Demo
|
|
||||||
|
|
||||||
This example demonstrates how to:
|
|
||||||
1. Parse HTML content using pyWebLayout's HTML extraction system
|
|
||||||
2. Layout the parsed content across multiple pages using the ereader layout system
|
|
||||||
3. Render each page as an image file
|
|
||||||
|
|
||||||
The demo shows the complete pipeline from HTML to multi-page layout.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import List, Tuple
|
|
||||||
from PIL import Image, ImageDraw
|
|
||||||
|
|
||||||
# Add pyWebLayout to path
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
||||||
|
|
||||||
from pyWebLayout.io.readers.html_extraction import parse_html_string
|
|
||||||
from pyWebLayout.layout.ereader_layout import BidirectionalLayouter, RenderingPosition
|
|
||||||
from pyWebLayout.concrete.page import Page
|
|
||||||
from pyWebLayout.style.page_style import PageStyle
|
|
||||||
from pyWebLayout.style import Font
|
|
||||||
from pyWebLayout.abstract.block import Block
|
|
||||||
|
|
||||||
|
|
||||||
def create_sample_html() -> str:
|
|
||||||
"""Create a sample HTML document with various elements for testing."""
|
|
||||||
return """
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Sample Document</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<h1>Chapter 1: Introduction to Multi-Page Layout</h1>
|
|
||||||
|
|
||||||
<p>This is the first paragraph of our sample document. It demonstrates how HTML content
|
|
||||||
can be parsed and then laid out across multiple pages using the pyWebLayout system.
|
|
||||||
The system handles various HTML elements including headings, paragraphs, lists, and more.</p>
|
|
||||||
|
|
||||||
<p>Here's another paragraph with <strong>bold text</strong> and <em>italic text</em>
|
|
||||||
to show how inline formatting is preserved during the conversion process. The layout
|
|
||||||
engine will automatically handle word wrapping and page breaks as needed.</p>
|
|
||||||
|
|
||||||
<h2>Section 1.1: Features</h2>
|
|
||||||
|
|
||||||
<p>The multi-page layout system includes several key features:</p>
|
|
||||||
|
|
||||||
<ul>
|
|
||||||
<li>Automatic page breaking when content exceeds page boundaries</li>
|
|
||||||
<li>Font scaling support for different reading preferences</li>
|
|
||||||
<li>Position tracking for bookmarks and navigation</li>
|
|
||||||
<li>Support for various HTML elements and styling</li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
<p>Each of these features works together to provide a seamless reading experience
|
|
||||||
that adapts to different page sizes and user preferences.</p>
|
|
||||||
|
|
||||||
<h2>Section 1.2: Technical Implementation</h2>
|
|
||||||
|
|
||||||
<p>The implementation uses a sophisticated layout engine that processes abstract
|
|
||||||
document elements and renders them onto concrete pages. This separation allows
|
|
||||||
for flexible styling and layout while maintaining the semantic structure of
|
|
||||||
the original content.</p>
|
|
||||||
|
|
||||||
<blockquote>
|
|
||||||
"The best way to understand a complex system is to see it in action with
|
|
||||||
real examples and practical demonstrations."
|
|
||||||
</blockquote>
|
|
||||||
|
|
||||||
<p>This quote illustrates the philosophy behind this demo - showing how the
|
|
||||||
various components work together in practice.</p>
|
|
||||||
|
|
||||||
<h1>Chapter 2: Advanced Layout Concepts</h1>
|
|
||||||
|
|
||||||
<p>Moving into more advanced territory, we can explore how the layout system
|
|
||||||
handles complex scenarios such as page breaks within paragraphs, font scaling
|
|
||||||
effects on layout, and position tracking across multiple pages.</p>
|
|
||||||
|
|
||||||
<p>The system maintains precise position information that allows for features
|
|
||||||
like bookmarking, search result highlighting, and seamless navigation between
|
|
||||||
different views of the same content.</p>
|
|
||||||
|
|
||||||
<h2>Section 2.1: Position Tracking</h2>
|
|
||||||
|
|
||||||
<p>Position tracking is implemented using a hierarchical system that can
|
|
||||||
reference any point in the document structure. This includes not just
|
|
||||||
paragraph and word positions, but also positions within tables, lists,
|
|
||||||
and other complex structures.</p>
|
|
||||||
|
|
||||||
<p>The position system is designed to be stable across different rendering
|
|
||||||
parameters, so a bookmark created with one font size will still be valid
|
|
||||||
when the user changes to a different font size.</p>
|
|
||||||
|
|
||||||
<h2>Section 2.2: Multi-Page Rendering</h2>
|
|
||||||
|
|
||||||
<p>The multi-page rendering system can generate pages both forward and
|
|
||||||
backward from any given position. This bidirectional capability is
|
|
||||||
essential for smooth navigation in ereader applications.</p>
|
|
||||||
|
|
||||||
<p>Each page is rendered independently, which allows for efficient
|
|
||||||
caching and parallel processing of multiple pages when needed.</p>
|
|
||||||
|
|
||||||
<p>This concludes our sample document. The layout system will automatically
|
|
||||||
determine how many pages are needed to display all this content based on
|
|
||||||
the page size and font settings used during rendering.</p>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class HTMLMultiPageRenderer:
|
|
||||||
"""
|
|
||||||
Renderer that converts HTML to multiple page images.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, page_size: Tuple[int, int] = (600, 800), font_scale: float = 1.0):
|
|
||||||
"""
|
|
||||||
Initialize the renderer.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
page_size: Size of each page in pixels (width, height)
|
|
||||||
font_scale: Font scaling factor
|
|
||||||
"""
|
|
||||||
self.page_size = page_size
|
|
||||||
self.font_scale = font_scale
|
|
||||||
self.page_style = PageStyle()
|
|
||||||
|
|
||||||
def parse_html_to_blocks(self, html_content: str) -> List[Block]:
|
|
||||||
"""
|
|
||||||
Parse HTML content into abstract blocks.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
html_content: HTML string to parse
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of abstract Block objects
|
|
||||||
"""
|
|
||||||
base_font = Font(font_size=14) # Base font for the document
|
|
||||||
blocks = parse_html_string(html_content, base_font=base_font)
|
|
||||||
return blocks
|
|
||||||
|
|
||||||
def render_pages(self, blocks: List[Block], max_pages: int = 20) -> List[Image.Image]:
|
|
||||||
"""
|
|
||||||
Render blocks into multiple page images.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
blocks: List of abstract blocks to render
|
|
||||||
max_pages: Maximum number of pages to render (safety limit)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of PIL Image objects, one per page
|
|
||||||
"""
|
|
||||||
if not blocks:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Create the bidirectional layouter
|
|
||||||
layouter = BidirectionalLayouter(blocks, self.page_style, self.page_size)
|
|
||||||
|
|
||||||
pages = []
|
|
||||||
current_position = RenderingPosition() # Start at beginning
|
|
||||||
page_count = 0
|
|
||||||
|
|
||||||
while page_count < max_pages:
|
|
||||||
try:
|
|
||||||
# Render the next page
|
|
||||||
page, next_position = layouter.render_page_forward(current_position, self.font_scale)
|
|
||||||
|
|
||||||
# Convert page to image
|
|
||||||
page_image = self._page_to_image(page)
|
|
||||||
pages.append(page_image)
|
|
||||||
|
|
||||||
page_count += 1
|
|
||||||
|
|
||||||
# Check if we've reached the end
|
|
||||||
if self._is_end_position(next_position, current_position, blocks):
|
|
||||||
break
|
|
||||||
|
|
||||||
current_position = next_position
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error rendering page {page_count + 1}: {e}")
|
|
||||||
break
|
|
||||||
|
|
||||||
return pages
|
|
||||||
|
|
||||||
def _page_to_image(self, page: Page) -> Image.Image:
|
|
||||||
"""
|
|
||||||
Convert a Page object to a PIL Image.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
page: Page object to convert
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
PIL Image object
|
|
||||||
"""
|
|
||||||
# Create a white background image
|
|
||||||
image = Image.new('RGB', self.page_size, 'white')
|
|
||||||
draw = ImageDraw.Draw(image)
|
|
||||||
|
|
||||||
# Draw page border
|
|
||||||
border_color = (200, 200, 200)
|
|
||||||
draw.rectangle([0, 0, self.page_size[0]-1, self.page_size[1]-1], outline=border_color)
|
|
||||||
|
|
||||||
# The page object should have already been rendered with its draw context
|
|
||||||
# For this demo, we'll create a simple representation
|
|
||||||
|
|
||||||
# Add page number at bottom
|
|
||||||
try:
|
|
||||||
from PIL import ImageFont
|
|
||||||
font = ImageFont.load_default()
|
|
||||||
except:
|
|
||||||
font = None
|
|
||||||
|
|
||||||
page_num_text = f"Page {len(pages) + 1}" if 'pages' in locals() else "Page"
|
|
||||||
text_bbox = draw.textbbox((0, 0), page_num_text, font=font)
|
|
||||||
text_width = text_bbox[2] - text_bbox[0]
|
|
||||||
text_x = (self.page_size[0] - text_width) // 2
|
|
||||||
text_y = self.page_size[1] - 30
|
|
||||||
|
|
||||||
draw.text((text_x, text_y), page_num_text, fill='black', font=font)
|
|
||||||
|
|
||||||
return image
|
|
||||||
|
|
||||||
def _is_end_position(self, current_pos: RenderingPosition, previous_pos: RenderingPosition, blocks: List[Block]) -> bool:
|
|
||||||
"""
|
|
||||||
Check if we've reached the end of the document.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
current_pos: Current rendering position
|
|
||||||
previous_pos: Previous rendering position
|
|
||||||
blocks: List of all blocks in document
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if at end of document
|
|
||||||
"""
|
|
||||||
# If position hasn't advanced, we're likely at the end
|
|
||||||
if (current_pos.block_index == previous_pos.block_index and
|
|
||||||
current_pos.word_index == previous_pos.word_index):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# If we've processed all blocks
|
|
||||||
if current_pos.block_index >= len(blocks):
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def save_pages(self, pages: List[Image.Image], output_dir: str = "output/html_multipage"):
|
|
||||||
"""
|
|
||||||
Save rendered pages as image files.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pages: List of page images
|
|
||||||
output_dir: Directory to save images
|
|
||||||
"""
|
|
||||||
# Create output directory
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
|
|
||||||
for i, page_image in enumerate(pages, 1):
|
|
||||||
filename = f"page_{i:03d}.png"
|
|
||||||
filepath = os.path.join(output_dir, filename)
|
|
||||||
page_image.save(filepath)
|
|
||||||
print(f"Saved {filepath}")
|
|
||||||
|
|
||||||
print(f"\nRendered {len(pages)} pages to {output_dir}/")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main demo function."""
|
|
||||||
print("HTML Multi-Page Rendering Demo")
|
|
||||||
print("=" * 40)
|
|
||||||
|
|
||||||
# Create sample HTML content
|
|
||||||
print("1. Creating sample HTML content...")
|
|
||||||
html_content = create_sample_html()
|
|
||||||
print(f" Created HTML document ({len(html_content)} characters)")
|
|
||||||
|
|
||||||
# Initialize renderer
|
|
||||||
print("\n2. Initializing renderer...")
|
|
||||||
renderer = HTMLMultiPageRenderer(page_size=(600, 800), font_scale=1.0)
|
|
||||||
print(" Renderer initialized")
|
|
||||||
|
|
||||||
# Parse HTML to blocks
|
|
||||||
print("\n3. Parsing HTML to abstract blocks...")
|
|
||||||
blocks = renderer.parse_html_to_blocks(html_content)
|
|
||||||
print(f" Parsed {len(blocks)} blocks")
|
|
||||||
|
|
||||||
# Print block summary
|
|
||||||
block_types = {}
|
|
||||||
for block in blocks:
|
|
||||||
block_type = type(block).__name__
|
|
||||||
block_types[block_type] = block_types.get(block_type, 0) + 1
|
|
||||||
|
|
||||||
print(" Block types found:")
|
|
||||||
for block_type, count in block_types.items():
|
|
||||||
print(f" - {block_type}: {count}")
|
|
||||||
|
|
||||||
# Render pages
|
|
||||||
print("\n4. Rendering pages...")
|
|
||||||
pages = renderer.render_pages(blocks, max_pages=10)
|
|
||||||
print(f" Rendered {len(pages)} pages")
|
|
||||||
|
|
||||||
# Save pages
|
|
||||||
print("\n5. Saving pages...")
|
|
||||||
renderer.save_pages(pages)
|
|
||||||
|
|
||||||
print("\n✓ Demo completed successfully!")
|
|
||||||
print("\nTo view the results:")
|
|
||||||
print(" - Check the output/html_multipage/ directory")
|
|
||||||
print(" - Open the PNG files to see each rendered page")
|
|
||||||
|
|
||||||
# Show some statistics
|
|
||||||
print(f"\nStatistics:")
|
|
||||||
print(f" - Original HTML: {len(html_content)} characters")
|
|
||||||
print(f" - Abstract blocks: {len(blocks)}")
|
|
||||||
print(f" - Rendered pages: {len(pages)}")
|
|
||||||
print(f" - Page size: {renderer.page_size[0]}x{renderer.page_size[1]} pixels")
|
|
||||||
print(f" - Font scale: {renderer.font_scale}x")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -2,9 +2,12 @@ from __future__ import annotations
|
|||||||
from pyWebLayout.core.base import Queriable
|
from pyWebLayout.core.base import Queriable
|
||||||
from pyWebLayout.style import Font
|
from pyWebLayout.style import Font
|
||||||
from pyWebLayout.style.abstract_style import AbstractStyle
|
from pyWebLayout.style.abstract_style import AbstractStyle
|
||||||
from typing import Tuple, Union, List, Optional, Dict, Any
|
from typing import Tuple, Union, List, Optional, Dict, Any, Callable
|
||||||
import pyphen
|
import pyphen
|
||||||
|
|
||||||
|
# Import LinkType for type hints (imported at module level to avoid F821 linting error)
|
||||||
|
from pyWebLayout.abstract.functional import LinkType
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Word:
|
class Word:
|
||||||
@ -279,7 +282,7 @@ class LinkedWord(Word):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, text: str, style: Union[Font, 'AbstractStyle'],
|
def __init__(self, text: str, style: Union[Font, 'AbstractStyle'],
|
||||||
location: str, link_type: 'LinkType' = None,
|
location: str, link_type: Optional['LinkType'] = None,
|
||||||
callback: Optional[Callable] = None,
|
callback: Optional[Callable] = None,
|
||||||
background=None, previous: Optional[Word] = None,
|
background=None, previous: Optional[Word] = None,
|
||||||
params: Optional[Dict[str, Any]] = None,
|
params: Optional[Dict[str, Any]] = None,
|
||||||
@ -302,7 +305,6 @@ class LinkedWord(Word):
|
|||||||
super().__init__(text, style, background, previous)
|
super().__init__(text, style, background, previous)
|
||||||
|
|
||||||
# Store link properties
|
# Store link properties
|
||||||
from pyWebLayout.abstract.functional import LinkType
|
|
||||||
self._location = location
|
self._location = location
|
||||||
self._link_type = link_type or LinkType.EXTERNAL
|
self._link_type = link_type or LinkType.EXTERNAL
|
||||||
self._callback = callback
|
self._callback = callback
|
||||||
@ -344,8 +346,6 @@ class LinkedWord(Word):
|
|||||||
Returns:
|
Returns:
|
||||||
The result of the link execution
|
The result of the link execution
|
||||||
"""
|
"""
|
||||||
from pyWebLayout.abstract.functional import LinkType
|
|
||||||
|
|
||||||
# Add word text to context
|
# Add word text to context
|
||||||
full_context = {**self._params, 'text': self._text}
|
full_context = {**self._params, 'text': self._text}
|
||||||
if context:
|
if context:
|
||||||
|
|||||||
@ -387,10 +387,10 @@ class Viewport(Box, Layoutable):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ScrollablePageContent(Container):
|
class ScrollablePageContent(Box):
|
||||||
"""
|
"""
|
||||||
A specialized container for page content that's designed to work with viewports.
|
A specialized container for page content that's designed to work with viewports.
|
||||||
This extends the regular Page functionality but allows for much larger content areas.
|
This extends the regular Box functionality but allows for much larger content areas.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, content_width: int = 800, initial_height: int = 1000,
|
def __init__(self, content_width: int = 800, initial_height: int = 1000,
|
||||||
|
|||||||
@ -9,6 +9,7 @@ from pyWebLayout.abstract import Paragraph, Word, Link
|
|||||||
from pyWebLayout.abstract.block import Image as AbstractImage
|
from pyWebLayout.abstract.block import Image as AbstractImage
|
||||||
from pyWebLayout.abstract.inline import LinkedWord
|
from pyWebLayout.abstract.inline import LinkedWord
|
||||||
from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver
|
from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver
|
||||||
|
from pyWebLayout.style import Font, Alignment
|
||||||
|
|
||||||
def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None, alignment_override: Optional['Alignment'] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
|
def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None, alignment_override: Optional['Alignment'] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
|
||||||
"""
|
"""
|
||||||
@ -40,7 +41,6 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
|
|||||||
# paragraph.style is already a Font object (concrete), not AbstractStyle
|
# paragraph.style is already a Font object (concrete), not AbstractStyle
|
||||||
# We need to get word spacing constraints from the Font's abstract style if available
|
# We need to get word spacing constraints from the Font's abstract style if available
|
||||||
# For now, use reasonable defaults based on font size
|
# For now, use reasonable defaults based on font size
|
||||||
from pyWebLayout.style import Font, Alignment
|
|
||||||
|
|
||||||
if isinstance(paragraph.style, Font):
|
if isinstance(paragraph.style, Font):
|
||||||
# paragraph.style is already a Font (concrete style)
|
# paragraph.style is already a Font (concrete style)
|
||||||
@ -228,8 +228,6 @@ def image_layouter(image: AbstractImage, page: Page, max_width: Optional[int] =
|
|||||||
Returns:
|
Returns:
|
||||||
bool: True if image was successfully laid out, False if page ran out of space
|
bool: True if image was successfully laid out, False if page ran out of space
|
||||||
"""
|
"""
|
||||||
from pyWebLayout.style import Alignment
|
|
||||||
|
|
||||||
# Use page available width if max_width not specified
|
# Use page available width if max_width not specified
|
||||||
if max_width is None:
|
if max_width is None:
|
||||||
max_width = page.available_width
|
max_width = page.available_width
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user