From 65ab46556f66c9723a106ea15c67ac3fbae61ce9 Mon Sep 17 00:00:00 2001 From: Duncan Tourolle Date: Wed, 27 Aug 2025 22:22:54 +0200 Subject: [PATCH] big update with ok rendering --- RECURSIVE_POSITION_SYSTEM.md | 371 +++++++++++ debug_text_positioning.py | 74 +++ .../my_novel_chapter_1_start.json | 42 ++ .../my_novel_current_position.json | 42 ++ .../my_novel_interesting_quote.json | 42 ++ ereader_bookmarks/my_novel_logo_image.json | 39 ++ ereader_bookmarks/my_novel_sales_table.json | 56 ++ examples/README_HTML_MULTIPAGE.md | 201 ++++++ examples/html_line_breaking_demo.py | 292 +++++++++ examples/html_multipage_demo.py | 326 ++++++++++ examples/html_multipage_demo_final.py | 451 ++++++++++++++ examples/html_multipage_simple.py | 365 +++++++++++ examples/recursive_position_demo.py | 386 ++++++++++++ positions/my_novel_chapter3_climax.json | 42 ++ pyWebLayout/__init__.py | 2 +- pyWebLayout/abstract/block.py | 7 +- pyWebLayout/concrete/box.py | 13 +- pyWebLayout/concrete/image.py | 2 +- pyWebLayout/concrete/page.py | 142 +++-- pyWebLayout/concrete/text.py | 189 +++--- pyWebLayout/concrete/viewport.py | 2 +- pyWebLayout/core/base.py | 4 +- .../examples/demo_alignment_refactor.py | 2 +- pyWebLayout/examples/demo_viewport_system.py | 2 +- pyWebLayout/examples/html_browser.py | 2 +- .../examples/html_browser_with_viewport.py | 2 +- pyWebLayout/io/readers/epub_reader.py | 10 +- pyWebLayout/io/readers/html_extraction.py | 3 +- pyWebLayout/layout/document_layouter.py | 102 ++-- pyWebLayout/layout/ereader_layout.py | 450 ++++++++++++++ pyWebLayout/layout/ereader_manager.py | 493 +++++++++++++++ pyWebLayout/layout/page_buffer.py | 411 +++++++++++++ pyWebLayout/layout/recursive_position.py | 481 +++++++++++++++ pyWebLayout/style/__init__.py | 36 +- pyWebLayout/style/abstract_style.py | 11 +- pyWebLayout/style/alignment.py | 18 + pyWebLayout/style/concrete_style.py | 3 +- pyWebLayout/style/layout.py | 18 +- pyWebLayout/style/page_style.py | 25 +- tests/concrete/test_alignment_handlers.py | 2 +- tests/concrete/test_concrete_box.py | 2 +- tests/concrete/test_concrete_functional.py | 2 +- tests/concrete/test_concrete_image.py | 2 +- tests/concrete/test_concrete_text.py | 6 +- .../concrete/test_new_page_implementation.py | 373 ++++++----- tests/io_tests/__init__.py | 0 tests/{io => io_tests}/test_epub_reader.py | 0 .../{io => io_tests}/test_html_extraction.py | 0 .../test_html_extraction_functions.py | 3 +- .../{io => io_tests}/test_html_file_loader.py | 0 tests/layout/__init__.py | 9 + tests/layout/test_ereader_system.py | 456 ++++++++++++++ tests/layout/test_recursive_position.py | 578 ++++++++++++++++++ tests/style/test_html_style.py | 3 +- 54 files changed, 6157 insertions(+), 438 deletions(-) create mode 100644 RECURSIVE_POSITION_SYSTEM.md create mode 100644 debug_text_positioning.py create mode 100644 ereader_bookmarks/my_novel_chapter_1_start.json create mode 100644 ereader_bookmarks/my_novel_current_position.json create mode 100644 ereader_bookmarks/my_novel_interesting_quote.json create mode 100644 ereader_bookmarks/my_novel_logo_image.json create mode 100644 ereader_bookmarks/my_novel_sales_table.json create mode 100644 examples/README_HTML_MULTIPAGE.md create mode 100644 examples/html_line_breaking_demo.py create mode 100644 examples/html_multipage_demo.py create mode 100644 examples/html_multipage_demo_final.py create mode 100644 examples/html_multipage_simple.py create mode 100644 examples/recursive_position_demo.py create mode 100644 positions/my_novel_chapter3_climax.json create mode 100644 pyWebLayout/layout/ereader_layout.py create mode 100644 pyWebLayout/layout/ereader_manager.py create mode 100644 pyWebLayout/layout/page_buffer.py create mode 100644 pyWebLayout/layout/recursive_position.py create mode 100644 pyWebLayout/style/alignment.py create mode 100644 tests/io_tests/__init__.py rename tests/{io => io_tests}/test_epub_reader.py (100%) rename tests/{io => io_tests}/test_html_extraction.py (100%) rename tests/{io => io_tests}/test_html_extraction_functions.py (99%) rename tests/{io => io_tests}/test_html_file_loader.py (100%) create mode 100644 tests/layout/__init__.py create mode 100644 tests/layout/test_ereader_system.py create mode 100644 tests/layout/test_recursive_position.py diff --git a/RECURSIVE_POSITION_SYSTEM.md b/RECURSIVE_POSITION_SYSTEM.md new file mode 100644 index 0000000..11be962 --- /dev/null +++ b/RECURSIVE_POSITION_SYSTEM.md @@ -0,0 +1,371 @@ +# Recursive Position System + +A flexible, hierarchical position tracking system for dynamic content positioning in document layout applications. + +## Overview + +The Recursive Position System provides a powerful way to track positions within complex, nested document structures. Unlike traditional flat position systems that only track basic coordinates, this system can reference any type of content (words, images, table cells, list items, etc.) with full hierarchical context. + +## Key Features + +- **Hierarchical Position Tracking**: Navigate through nested document structures with precision +- **Dynamic Content Type Support**: Handle words, images, tables, lists, forms, and more +- **Flexible Serialization**: Save positions as JSON or Python shelf objects +- **Position Relationships**: Query ancestor/descendant relationships between positions +- **Fluent Builder Pattern**: Easy position creation with method chaining +- **Metadata Support**: Store rendering context (font scale, themes, etc.) +- **Real-world Applications**: Perfect for ereaders, document editors, and CMS systems + +## Architecture + +### Core Components + +1. **ContentType Enum**: Defines all supported content types +2. **LocationNode**: Represents a single position within a content type +3. **RecursivePosition**: Hierarchical position with a path of LocationNodes +4. **PositionBuilder**: Fluent interface for creating positions +5. **PositionStorage**: Persistent storage with JSON and shelf support + +### Position Hierarchy + +Positions are represented as paths from document root to specific locations: + +``` +Document → Chapter[2] → Block[5] → Paragraph → Word[12] → Character[3] +Document → Chapter[1] → Block[3] → Table → Row[2] → Cell[1] → Word[0] +Document → Chapter[0] → Block[1] → Image +``` + +## Usage Examples + +### Basic Position Creation + +```python +from pyWebLayout.layout.recursive_position import PositionBuilder + +# Create a word position with character-level precision +position = (PositionBuilder() + .chapter(2) + .block(5) + .paragraph() + .word(12, offset=3) + .with_rendering_metadata(font_scale=1.5, theme="dark") + .build()) + +print(position) # document[0] -> chapter[2] -> block[5] -> paragraph[0] -> word[12]+3 +``` + +### Different Content Types + +```python +from pyWebLayout.layout.recursive_position import ( + create_word_position, create_image_position, + create_table_cell_position, create_list_item_position +) + +# Word in a paragraph +word_pos = create_word_position(chapter=1, block=3, word=15, char_offset=2) + +# Image in a block +image_pos = create_image_position(chapter=2, block=1, image_index=0) + +# Cell in a table +table_pos = create_table_cell_position(chapter=0, block=4, row=2, col=1, word=5) + +# Item in a list +list_pos = create_list_item_position(chapter=1, block=2, item=3, word=0) +``` + +### Complex Nested Structures + +```python +# Position in a nested list +nested_pos = (PositionBuilder() + .chapter(2) + .block(5) + .list(0, list_type="ordered") + .list_item(2) + .list(1, list_type="unordered") # Nested list + .list_item(1) + .word(3) + .build()) + +# Position in a table cell with metadata +table_pos = (PositionBuilder() + .chapter(3) + .block(10) + .table(0, table_type="financial", columns=5) + .table_row(2, row_type="data") + .table_cell(1, cell_type="currency", format="USD") + .word(0, text="$1,234.56") + .build()) +``` + +### Position Relationships + +```python +# Check ancestor/descendant relationships +chapter_pos = PositionBuilder().chapter(1).block(2).build() +word_pos = PositionBuilder().chapter(1).block(2).paragraph().word(5).build() + +print(chapter_pos.is_ancestor_of(word_pos)) # True +print(word_pos.is_descendant_of(chapter_pos)) # True + +# Find common ancestors +other_pos = create_word_position(1, 3, 0) # Different block +common = word_pos.get_common_ancestor(other_pos) +print(common) # document[0] -> chapter[1] +``` + +### Serialization and Storage + +```python +from pyWebLayout.layout.recursive_position import PositionStorage + +# JSON storage +storage = PositionStorage("bookmarks", use_shelf=False) + +# Save positions +storage.save_position("my_document", "bookmark1", position) +storage.save_position("my_document", "bookmark2", other_position) + +# Load positions +loaded = storage.load_position("my_document", "bookmark1") +all_bookmarks = storage.list_positions("my_document") + +# Shelf storage (binary, more efficient for large datasets) +shelf_storage = PositionStorage("bookmarks", use_shelf=True) +shelf_storage.save_position("my_document", "bookmark1", position) +``` + +## Content Types + +The system supports the following content types: + +| Type | Description | Example Usage | +|------|-------------|---------------| +| `DOCUMENT` | Document root | Always present as root node | +| `CHAPTER` | Document chapters/sections | Chapter navigation | +| `BLOCK` | Block-level elements | Paragraphs, headings, tables | +| `PARAGRAPH` | Text paragraphs | Text content | +| `HEADING` | Section headings | H1-H6 elements | +| `TABLE` | Table structures | Data tables | +| `TABLE_ROW` | Table rows | Row navigation | +| `TABLE_CELL` | Table cells | Cell-specific content | +| `LIST` | List structures | Ordered/unordered lists | +| `LIST_ITEM` | List items | Individual list entries | +| `WORD` | Individual words | Word-level precision | +| `IMAGE` | Images | Visual content | +| `LINK` | Hyperlinks | Interactive links | +| `BUTTON` | Interactive buttons | Form controls | +| `FORM_FIELD` | Form input fields | User input | +| `LINE` | Rendered text lines | Layout-specific | +| `PAGE` | Rendered pages | Pagination | + +## Ereader Integration + +The system is designed for ereader applications with features like: + +### Bookmark Management + +```python +# Save reading position with context +reading_pos = (PositionBuilder() + .chapter(3) + .block(15) + .paragraph() + .word(23, offset=7) + .with_rendering_metadata( + font_scale=1.2, + page_size=[600, 800], + theme="sepia" + ) + .build()) + +storage.save_position("novel", "chapter3_climax", reading_pos) +``` + +### Chapter Navigation + +```python +# Jump to chapter start +chapter_start = PositionBuilder().chapter(5).block(0).paragraph().word(0).build() + +# Navigate within chapter +current_pos = PositionBuilder().chapter(5).block(12).paragraph().word(45).build() + +# Check if positions are in same chapter +same_chapter = chapter_start.get_common_ancestor(current_pos) +chapter_node = same_chapter.get_node(ContentType.CHAPTER) +print(f"Both in chapter {chapter_node.index}") +``` + +### Font Scaling Support + +```python +# Position with rendering metadata +position = (PositionBuilder() + .chapter(2) + .block(8) + .paragraph() + .word(15) + .with_rendering_metadata( + font_scale=1.5, + page_size=[800, 600], + line_height=24, + theme="dark" + ) + .build()) + +# Metadata persists through serialization +json_str = position.to_json() +restored = RecursivePosition.from_json(json_str) +print(restored.rendering_metadata["font_scale"]) # 1.5 +``` + +## Advanced Features + +### Position Navigation + +```python +# Truncate position to specific level +word_pos = create_word_position(2, 5, 12, 3) +block_pos = word_pos.copy().truncate_to_type(ContentType.BLOCK) +print(block_pos) # document[0] -> chapter[2] -> block[5] + +# Navigate between related positions +table_cell_pos = create_table_cell_position(1, 3, 2, 1, 0) +next_cell_pos = table_cell_pos.copy() +cell_node = next_cell_pos.get_node(ContentType.TABLE_CELL) +cell_node.index = 2 # Move to next column +``` + +### Metadata Usage + +```python +# Rich metadata support +position = (PositionBuilder() + .chapter(1) + .block(5) + .table(0, + table_type="financial", + columns=5, + rows=20, + title="Q3 Results") + .table_row(3, + row_type="data", + category="revenue") + .table_cell(2, + cell_type="currency", + format="USD", + precision=2) + .word(0, text="$1,234,567.89") + .build()) + +# Access metadata +table_node = position.get_node(ContentType.TABLE) +print(table_node.metadata["title"]) # "Q3 Results" + +cell_node = position.get_node(ContentType.TABLE_CELL) +print(cell_node.metadata["format"]) # "USD" +``` + +## Performance Considerations + +### Memory Usage + +- Positions are lightweight (typically < 1KB serialized) +- Path-based structure minimizes memory overhead +- Metadata is optional and only stored when needed + +### Serialization Performance + +- **JSON**: Human-readable, cross-platform, ~2-3x larger +- **Shelf**: Binary format, faster for large datasets, Python-specific + +### Comparison Operations + +- Position equality: O(n) where n is path depth +- Ancestor/descendant checks: O(min(depth1, depth2)) +- Common ancestor finding: O(min(depth1, depth2)) + +## Integration with Existing Systems + +### Backward Compatibility + +The system can coexist with existing position tracking: + +```python +# Convert from old RenderingPosition +def convert_old_position(old_pos): + return (PositionBuilder() + .chapter(old_pos.chapter_index) + .block(old_pos.block_index) + .paragraph() + .word(old_pos.word_index) + .build()) + +# Convert to old format (lossy) +def convert_to_old(recursive_pos): + chapter_node = recursive_pos.get_node(ContentType.CHAPTER) + block_node = recursive_pos.get_node(ContentType.BLOCK) + word_node = recursive_pos.get_node(ContentType.WORD) + + return RenderingPosition( + chapter_index=chapter_node.index if chapter_node else 0, + block_index=block_node.index if block_node else 0, + word_index=word_node.index if word_node else 0 + ) +``` + +### Migration Strategy + +1. **Phase 1**: Implement recursive system alongside existing system +2. **Phase 2**: Update bookmark storage to use new format +3. **Phase 3**: Migrate existing bookmarks +4. **Phase 4**: Update layout engines to generate recursive positions +5. **Phase 5**: Remove old position system + +## Testing + +Comprehensive test suite covers: + +- Position creation and manipulation +- Serialization/deserialization +- Storage systems (JSON and shelf) +- Position relationships +- Real-world scenarios +- Performance benchmarks + +Run tests with: +```bash +python -m pytest tests/layout/test_recursive_position.py -v +``` + +## Examples + +See `examples/recursive_position_demo.py` for a complete demonstration of all features. + +## Future Enhancements + +Potential improvements: + +1. **Position Comparison**: Implement `<`, `>`, `<=`, `>=` operators for sorting +2. **Path Compression**: Optimize storage for deep hierarchies +3. **Query Language**: SQL-like queries for position sets +4. **Indexing**: B-tree indexing for large position collections +5. **Diff Operations**: Calculate differences between positions +6. **Batch Operations**: Efficient bulk position updates + +## Conclusion + +The Recursive Position System provides a robust, flexible foundation for position tracking in complex document structures. Its hierarchical approach, rich metadata support, and efficient serialization make it ideal for modern ereader applications and document management systems. + +The system's design prioritizes: +- **Flexibility**: Handle any content type or nesting level +- **Performance**: Efficient operations and minimal memory usage +- **Usability**: Intuitive builder pattern and clear APIs +- **Persistence**: Reliable serialization and storage options +- **Extensibility**: Easy to add new content types and features + +This makes it a significant improvement over traditional flat position systems and provides a solid foundation for advanced document navigation features. diff --git a/debug_text_positioning.py b/debug_text_positioning.py new file mode 100644 index 0000000..3389f74 --- /dev/null +++ b/debug_text_positioning.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +""" +Debug script to test text positioning in the line breaking system +""" + +import sys +from pathlib import Path +from PIL import Image, ImageDraw, ImageFont + +# Add pyWebLayout to path +sys.path.insert(0, str(Path(__file__).parent)) + +from pyWebLayout.style import Font +from pyWebLayout.concrete.text import Text, Line +from pyWebLayout.style.layout import Alignment + +def test_simple_text_rendering(): + """Test basic text rendering to debug positioning issues""" + + # Create a simple image + width, height = 300, 200 + image = Image.new('RGB', (width, height), 'white') + draw = ImageDraw.Draw(image) + + # Draw a border for reference + draw.rectangle([0, 0, width-1, height-1], outline=(200, 200, 200), width=2) + + # Create a font + font = Font(font_size=12) + + # Test 1: Direct PIL text rendering + print("Test 1: Direct PIL text rendering") + draw.text((10, 30), "Direct PIL text", font=font.font, fill=(0, 0, 0)) + + # Test 2: Using our Text class + print("Test 2: Using Text class") + text_obj = Text("Text class rendering", font, draw) + text_obj.set_origin([10, 60]) # Set position + print(f"Text origin: {text_obj.origin}") + text_obj.render() + + # Test 3: Using Line class + print("Test 3: Using Line class") + line = Line( + spacing=(2, 6), + origin=(10, 100), + size=(280, 20), + draw=draw, + font=font, + halign=Alignment.LEFT + ) + + # Create a simple word to add to the line + from pyWebLayout.abstract.inline import Word + word = Word("Line class rendering", font) + + success, overflow = line.add_word(word) + print(f"Word added successfully: {success}") + print(f"Line origin: {line.origin}") + print(f"Line baseline: {line._baseline}") + print(f"Text objects in line: {len(line.text_objects)}") + + if line.text_objects: + for i, text in enumerate(line.text_objects): + print(f" Text {i}: '{text.text}' at origin {text.origin}") + + line.render() + + # Save the debug image + image.save("debug_text_positioning.png") + print("Debug image saved as debug_text_positioning.png") + +if __name__ == "__main__": + test_simple_text_rendering() diff --git a/ereader_bookmarks/my_novel_chapter_1_start.json b/ereader_bookmarks/my_novel_chapter_1_start.json new file mode 100644 index 0000000..428b861 --- /dev/null +++ b/ereader_bookmarks/my_novel_chapter_1_start.json @@ -0,0 +1,42 @@ +{ + "path": [ + { + "content_type": "document", + "index": 0, + "offset": 0, + "metadata": {} + }, + { + "content_type": "chapter", + "index": 1, + "offset": 0, + "metadata": {} + }, + { + "content_type": "block", + "index": 0, + "offset": 0, + "metadata": {} + }, + { + "content_type": "paragraph", + "index": 0, + "offset": 0, + "metadata": {} + }, + { + "content_type": "word", + "index": 0, + "offset": 0, + "metadata": {} + } + ], + "rendering_metadata": { + "font_scale": 1.0, + "page_size": [ + 600, + 800 + ], + "theme": "light" + } +} \ No newline at end of file diff --git a/ereader_bookmarks/my_novel_current_position.json b/ereader_bookmarks/my_novel_current_position.json new file mode 100644 index 0000000..e46f154 --- /dev/null +++ b/ereader_bookmarks/my_novel_current_position.json @@ -0,0 +1,42 @@ +{ + "path": [ + { + "content_type": "document", + "index": 0, + "offset": 0, + "metadata": {} + }, + { + "content_type": "chapter", + "index": 5, + "offset": 0, + "metadata": {} + }, + { + "content_type": "block", + "index": 12, + "offset": 0, + "metadata": {} + }, + { + "content_type": "paragraph", + "index": 0, + "offset": 0, + "metadata": {} + }, + { + "content_type": "word", + "index": 23, + "offset": 7, + "metadata": {} + } + ], + "rendering_metadata": { + "font_scale": 1.3, + "page_size": [ + 600, + 800 + ], + "theme": "dark" + } +} \ No newline at end of file diff --git a/ereader_bookmarks/my_novel_interesting_quote.json b/ereader_bookmarks/my_novel_interesting_quote.json new file mode 100644 index 0000000..a412452 --- /dev/null +++ b/ereader_bookmarks/my_novel_interesting_quote.json @@ -0,0 +1,42 @@ +{ + "path": [ + { + "content_type": "document", + "index": 0, + "offset": 0, + "metadata": {} + }, + { + "content_type": "chapter", + "index": 2, + "offset": 0, + "metadata": {} + }, + { + "content_type": "block", + "index": 15, + "offset": 0, + "metadata": {} + }, + { + "content_type": "paragraph", + "index": 0, + "offset": 0, + "metadata": {} + }, + { + "content_type": "word", + "index": 8, + "offset": 0, + "metadata": {} + } + ], + "rendering_metadata": { + "font_scale": 1.2, + "page_size": [ + 600, + 800 + ], + "theme": "sepia" + } +} \ No newline at end of file diff --git a/ereader_bookmarks/my_novel_logo_image.json b/ereader_bookmarks/my_novel_logo_image.json new file mode 100644 index 0000000..d6f8b72 --- /dev/null +++ b/ereader_bookmarks/my_novel_logo_image.json @@ -0,0 +1,39 @@ +{ + "path": [ + { + "content_type": "document", + "index": 0, + "offset": 0, + "metadata": {} + }, + { + "content_type": "chapter", + "index": 4, + "offset": 0, + "metadata": {} + }, + { + "content_type": "block", + "index": 8, + "offset": 0, + "metadata": {} + }, + { + "content_type": "image", + "index": 0, + "offset": 0, + "metadata": { + "alt_text": "Company Logo", + "caption": "Figure 4.1: Corporate Identity" + } + } + ], + "rendering_metadata": { + "font_scale": 1.0, + "page_size": [ + 600, + 800 + ], + "theme": "light" + } +} \ No newline at end of file diff --git a/ereader_bookmarks/my_novel_sales_table.json b/ereader_bookmarks/my_novel_sales_table.json new file mode 100644 index 0000000..a0a8b44 --- /dev/null +++ b/ereader_bookmarks/my_novel_sales_table.json @@ -0,0 +1,56 @@ +{ + "path": [ + { + "content_type": "document", + "index": 0, + "offset": 0, + "metadata": {} + }, + { + "content_type": "chapter", + "index": 3, + "offset": 0, + "metadata": {} + }, + { + "content_type": "block", + "index": 22, + "offset": 0, + "metadata": {} + }, + { + "content_type": "table", + "index": 0, + "offset": 0, + "metadata": { + "table_type": "data", + "title": "Sales Figures" + } + }, + { + "content_type": "table_row", + "index": 1, + "offset": 0, + "metadata": { + "row_type": "header" + } + }, + { + "content_type": "table_cell", + "index": 0, + "offset": 0, + "metadata": { + "cell_type": "header", + "text": "Quarter" + } + } + ], + "rendering_metadata": { + "font_scale": 1.1, + "page_size": [ + 600, + 800 + ], + "theme": "dark" + } +} \ No newline at end of file diff --git a/examples/README_HTML_MULTIPAGE.md b/examples/README_HTML_MULTIPAGE.md new file mode 100644 index 0000000..9a42f96 --- /dev/null +++ b/examples/README_HTML_MULTIPAGE.md @@ -0,0 +1,201 @@ +# HTML Multi-Page Rendering Examples + +This directory contains working examples that demonstrate how to render HTML content across multiple pages using the pyWebLayout system. The examples show the complete pipeline from HTML parsing to multi-page layout. + +## Overview + +The pyWebLayout system provides a sophisticated HTML-to-multi-page rendering pipeline that: + +1. **Parses HTML** using the `pyWebLayout.io.readers.html_extraction` module +2. **Converts to abstract blocks** (paragraphs, headings, lists, etc.) +3. **Layouts content across pages** using the `pyWebLayout.layout.document_layouter` +4. **Renders pages as images** for visualization + +## Examples + +### 1. `html_multipage_simple.py` - Basic Example + +A simple demonstration that shows the core functionality: + +```bash +python examples/html_multipage_simple.py +``` + +**Features:** +- Parses basic HTML with headings and paragraphs +- Uses 600x800 pixel pages +- Demonstrates single-page layout +- Outputs to `output/html_simple/` + +**Results:** +- Parsed 11 paragraphs from HTML +- Rendered 1 page with 20 lines +- Created `page_001.png` (19KB) + +### 2. `html_multipage_demo_final.py` - Complete Multi-Page Demo + +A comprehensive demonstration with true multi-page functionality: + +```bash +python examples/html_multipage_demo_final.py +``` + +**Features:** +- Longer HTML document with multiple chapters +- Smaller pages (400x500 pixels) to force multi-page layout +- Enhanced page formatting with headers and footers +- Smart heading placement (avoids orphaned headings) +- Outputs to `output/html_multipage_final/` + +**Results:** +- Parsed 22 paragraphs (6 headings, 16 regular paragraphs) +- Rendered 7 pages with 67 total lines +- Average 9.6 lines per page +- Created 7 PNG files (4.9KB - 10KB each) + +## Technical Details + +### HTML Parsing + +The system uses BeautifulSoup to parse HTML and converts elements to pyWebLayout abstract blocks: + +- `

-

` → `Heading` blocks +- `

` → `Paragraph` blocks +- `