From 993095caf95ec036724fe37bfaffad7e97792d5d Mon Sep 17 00:00:00 2001 From: Duncan Tourolle Date: Tue, 4 Nov 2025 19:25:44 +0100 Subject: [PATCH] Update repo --- .gitignore | 14 + README.md | 132 ++++--- examples/README.md | 71 ++++ examples/README_EREADER.md | 363 ++++++++++++++++++ examples/ereader_demo.py | 4 +- .../debug_text_positioning.py | 0 {examples => scripts}/epub_page_renderer.py | 0 .../epub_page_renderer_documentlayouter.py | 0 .../run_coverage_gutters.py | 0 .../update_coverage_gutters.py | 0 10 files changed, 525 insertions(+), 59 deletions(-) create mode 100644 examples/README.md create mode 100644 examples/README_EREADER.md rename debug_text_positioning.py => scripts/debug_text_positioning.py (100%) rename {examples => scripts}/epub_page_renderer.py (100%) rename {examples => scripts}/epub_page_renderer_documentlayouter.py (100%) rename run_coverage_gutters.py => scripts/run_coverage_gutters.py (100%) rename update_coverage_gutters.py => scripts/update_coverage_gutters.py (100%) diff --git a/.gitignore b/.gitignore index f68d435..cec9ace 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,17 @@ htmlcov/ *.jpeg *.gif *.svg + +# Output directories +output/ +my_output/ +test_output/ +*_output/ +examples/output/ + +# Generated data +ereader_bookmarks/ +positions/ + +# Debug scripts output +debug_*.png diff --git a/README.md b/README.md index 8903a3b..e83a7f3 100644 --- a/README.md +++ b/README.md @@ -12,16 +12,26 @@ A Python library for HTML-like layout and rendering. > 📋 **Note**: Badges show results from the commit referenced in the URLs. Red "error" badges indicate build failures for that specific step. ## Description -PyWebLayout provides classes for rendering HTML-like content to images using a box-based layout system. It includes support for text, tables, and containers, as well as an HTML parser for converting HTML to layout objects. +PyWebLayout is a Python library for rendering HTML and EPUB content to paginated images. The library provides a high-level **EbookReader** API for building interactive ebook reader applications, along with powerful HTML-to-page rendering capabilities. -## Features +## Key Features -- HTML-like layout system -- Text rendering with font support -- Table layouts -- Container elements -- HTML parsing -- Image output +### EbookReader - High-Level API +- 📖 **EPUB Support** - Load and render EPUB files +- 📄 **Page Rendering** - Render pages as PIL Images +- ⬅️➡️ **Navigation** - Forward and backward page navigation +- 🔖 **Bookmarks** - Save and load reading positions +- 📑 **Chapter Navigation** - Jump to chapters by title or index +- 🔤 **Font Control** - Adjust font size dynamically +- 📏 **Spacing Control** - Customize line and paragraph spacing +- 📊 **Progress Tracking** - Monitor reading progress + +### Core Capabilities +- HTML-to-page layout system +- Multi-page document rendering +- Advanced text rendering with font support +- Position tracking across layout changes +- Intelligent line breaking and pagination ## Installation @@ -29,69 +39,77 @@ PyWebLayout provides classes for rendering HTML-like content to images using a b pip install pyWebLayout ``` -## Usage +## Quick Start -### Basic Example +### EbookReader - Recommended API ```python -from pyWebLayout.concrete.page import Page, Container -from pyWebLayout.abstract.inline import Line -from pyWebLayout.layout import Alignment -from PIL import ImageFont +from pyWebLayout.layout.ereader_application import EbookReader -# Create a page -page = Page(size=(800, 600), background_color=(240, 240, 240)) - -# Add a title container -title_container = Container( - origin=(0, 0), - size=(780, 60), - direction='horizontal', - spacing=10, - padding=(10, 10, 10, 10), - halign=Alignment.CENTER, - valign=Alignment.CENTER -) -page.add_child(title_container) - -# Create a title line with text -title_font = ImageFont.load_default() -title_line = Line( - spacing=(8, 15), - origin=(0, 0), - size=(760, 40), - font=title_font, - text_color=(0, 0, 0), - halign=Alignment.CENTER -) -title_container.add_child(title_line) -title_line.add_word("PyWebLayout", title_font) -title_line.add_word("Example", title_font) - -# Layout and render the page -page.layout() -image = page.render() -image.save("example.png") +# Create an ebook reader +with EbookReader(page_size=(800, 1000)) as reader: + # Load an EPUB file + reader.load_epub("mybook.epub") + + # Get current page as PIL Image + page = reader.get_current_page() + page.save("page_001.png") + + # Navigate through pages + reader.next_page() + reader.previous_page() + + # Save reading position + reader.save_position("chapter_3") + + # Jump to a chapter + reader.jump_to_chapter("Chapter 5") + + # Adjust font size + reader.increase_font_size() + + # Get progress + progress = reader.get_reading_progress() + print(f"Progress: {progress*100:.1f}%") ``` -### HTML Example +### HTML Multi-Page Rendering ```python -from pyWebLayout.html_parser import html_to_image +from pyWebLayout.io.readers.html_extraction import html_to_blocks +from pyWebLayout.layout.document_layouter import paragraph_layouter +from pyWebLayout.concrete.page import Page +# Parse HTML to blocks html = """ -
-

PyWebLayout HTML Example

-

This is a paragraph rendered from HTML.

-

The library supports bold, italic, and underlined text.

-
+

Document Title

+

First paragraph with bold text.

+

Second paragraph with more content.

""" +blocks = html_to_blocks(html) -# Render HTML to an image -image = html_to_image(html, page_size=(800, 600)) -image.save("html_example.png") +# Render to pages +page = Page(size=(600, 800)) +# Layout blocks onto pages using document_layouter +# See examples/ directory for complete multi-page examples ``` +## Examples + +Check out the `examples/` directory for complete working examples: + +- **`simple_ereader_example.py`** - Quick start with EbookReader +- **`ereader_demo.py`** - Comprehensive EbookReader feature demo +- **`html_multipage_demo.py`** - HTML to multi-page rendering +- See `examples/README.md` for full list + +## Documentation + +- **EbookReader API**: `examples/README_EREADER.md` +- **HTML Rendering**: `examples/README_HTML_MULTIPAGE.md` +- **Architecture**: `ARCHITECTURE.md` +- **Examples**: `examples/README.md` + ## License MIT License diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..0ce28d8 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,71 @@ +# PyWebLayout Examples + +This directory contains example scripts demonstrating the pyWebLayout library. + +## EbookReader Examples + +The EbookReader provides a high-level, user-friendly API for building ebook reader applications. + +### Quick Start Example + +**`simple_ereader_example.py`** - Simple example showing basic EbookReader usage: +```bash +python simple_ereader_example.py path/to/book.epub +``` + +This demonstrates: +- Loading an EPUB file +- Rendering pages to images +- Basic navigation (next/previous page) +- Saving positions +- Chapter navigation +- Font size adjustment + +### Comprehensive Demo + +**`ereader_demo.py`** - Full feature demonstration: +```bash +python ereader_demo.py path/to/book.epub +``` + +This showcases all EbookReader features: +- Page navigation (forward/backward) +- Position save/load with bookmarks +- Chapter navigation (by index or title) +- Font size control +- Line and block spacing adjustments +- Reading progress tracking +- Book information retrieval + +**Tip:** You can use the test EPUB files in `tests/data/` for testing: +```bash +python simple_ereader_example.py tests/data/test.epub +python ereader_demo.py tests/data/test.epub +``` + +## Other Examples + +### HTML Rendering + +These examples demonstrate rendering HTML content to multi-page layouts: + +**`html_line_breaking_demo.py`** - Basic HTML line breaking demonstration +**`html_multipage_simple.py`** - Simple single-page HTML rendering +**`html_multipage_demo.py`** - Multi-page HTML layout +**`html_multipage_demo_final.py`** - Complete multi-page HTML rendering with headers/footers + +For detailed information about HTML rendering, see `README_HTML_MULTIPAGE.md`. + +### Advanced Topics + +**`recursive_position_demo.py`** - Demonstrates the recursive position tracking system + +## Documentation + +- `README_EREADER.md` - Detailed EbookReader API documentation +- `README_HTML_MULTIPAGE.md` - HTML multi-page rendering guide +- `pyWebLayout/layout/README_EREADER_API.md` - EbookReader API reference (in source) + +## Debug/Development Scripts + +Low-level debug and rendering scripts have been moved to the `scripts/` directory. diff --git a/examples/README_EREADER.md b/examples/README_EREADER.md new file mode 100644 index 0000000..a031acf --- /dev/null +++ b/examples/README_EREADER.md @@ -0,0 +1,363 @@ +# EbookReader - Simple EPUB Reader Application + +The `EbookReader` class provides a complete, user-friendly interface for building ebook reader applications with pyWebLayout. It wraps all the complex ereader infrastructure into a simple API. + +## Features + +- 📖 **EPUB Loading** - Load EPUB files with automatic content extraction +- ⬅️➡️ **Page Navigation** - Forward and backward page navigation +- 🔖 **Position Management** - Save/load reading positions (stable across font changes) +- 📑 **Chapter Navigation** - Jump to chapters by title or index +- 🔤 **Font Size Control** - Increase/decrease font size with live re-rendering +- 📏 **Spacing Control** - Adjust line and block spacing +- 📊 **Progress Tracking** - Get reading progress and position information +- 💾 **Context Manager Support** - Automatic cleanup with `with` statement + +## Quick Start + +```python +from pyWebLayout.layout.ereader_application import EbookReader + +# Create reader +reader = EbookReader(page_size=(800, 1000)) + +# Load an EPUB +reader.load_epub("mybook.epub") + +# Get current page as PIL Image +page_image = reader.get_current_page() +page_image.save("current_page.png") + +# Navigate +reader.next_page() +reader.previous_page() + +# Close reader +reader.close() +``` + +## API Reference + +### Initialization + +```python +reader = EbookReader( + page_size=(800, 1000), # Page dimensions (width, height) in pixels + margin=40, # Page margin in pixels + background_color=(255, 255, 255), # RGB background color + line_spacing=5, # Line spacing in pixels + inter_block_spacing=15, # Space between blocks in pixels + bookmarks_dir="ereader_bookmarks", # Directory for bookmarks + buffer_size=5 # Number of pages to cache +) +``` + +### Loading EPUB + +```python +# Load EPUB file +success = reader.load_epub("path/to/book.epub") + +# Check if book is loaded +if reader.is_loaded(): + print("Book loaded successfully") + +# Get book information +book_info = reader.get_book_info() +# Returns: { +# 'title': 'Book Title', +# 'author': 'Author Name', +# 'document_id': 'book', +# 'total_blocks': 5000, +# 'total_chapters': 20, +# 'page_size': (800, 1000), +# 'font_scale': 1.0 +# } +``` + +### Page Navigation + +```python +# Get current page as PIL Image +page = reader.get_current_page() + +# Navigate to next page +page = reader.next_page() # Returns None at end of book + +# Navigate to previous page +page = reader.previous_page() # Returns None at beginning + +# Save current page to file +reader.render_to_file("page.png") +``` + +### Position Management + +Positions are saved based on abstract document structure (chapter/block/word indices), making them stable across font size and styling changes. + +```python +# Save current position +reader.save_position("my_bookmark") + +# Load saved position +page = reader.load_position("my_bookmark") + +# List all saved positions +positions = reader.list_saved_positions() +# Returns: ['my_bookmark', 'chapter_2', ...] + +# Delete a position +reader.delete_position("my_bookmark") + +# Get detailed position info +info = reader.get_position_info() +# Returns: { +# 'position': {'chapter_index': 0, 'block_index': 42, 'word_index': 15, ...}, +# 'chapter': {'title': 'Chapter 1', 'level': 'H1', ...}, +# 'progress': 0.15, # 15% through the book +# 'font_scale': 1.0, +# 'book_title': 'Book Title', +# 'book_author': 'Author Name' +# } + +# Get reading progress (0.0 to 1.0) +progress = reader.get_reading_progress() +print(f"You're {progress*100:.1f}% through the book") +``` + +### Chapter Navigation + +```python +# Get all chapters +chapters = reader.get_chapters() +# Returns: [('Chapter 1', 0), ('Chapter 2', 1), ...] + +# Get chapters with positions +chapter_positions = reader.get_chapter_positions() +# Returns: [('Chapter 1', RenderingPosition(...)), ...] + +# Jump to chapter by index +page = reader.jump_to_chapter(1) # Jump to second chapter + +# Jump to chapter by title +page = reader.jump_to_chapter("Chapter 1") + +# Get current chapter info +chapter_info = reader.get_current_chapter_info() +# Returns: {'title': 'Chapter 1', 'level': HeadingLevel.H1, 'block_index': 0} +``` + +### Font Size Control + +```python +# Get current font size scale +scale = reader.get_font_size() # Default: 1.0 + +# Set specific font size scale +page = reader.set_font_size(1.5) # 150% of normal size + +# Increase font size by 10% +page = reader.increase_font_size() + +# Decrease font size by 10% +page = reader.decrease_font_size() +``` + +### Spacing Control + +```python +# Set line spacing (spacing between lines within a paragraph) +page = reader.set_line_spacing(10) # 10 pixels + +# Set inter-block spacing (spacing between paragraphs, headings, etc.) +page = reader.set_inter_block_spacing(20) # 20 pixels +``` + +### Context Manager + +The reader supports Python's context manager protocol for automatic cleanup: + +```python +with EbookReader(page_size=(800, 1000)) as reader: + reader.load_epub("book.epub") + page = reader.get_current_page() + # ... do stuff +# Automatically saves position and cleans up resources +``` + +## Complete Example + +```python +from pyWebLayout.layout.ereader_application import EbookReader + +# Create reader with custom settings +with EbookReader( + page_size=(800, 1000), + margin=50, + line_spacing=8, + inter_block_spacing=20 +) as reader: + # Load EPUB + if not reader.load_epub("my_novel.epub"): + print("Failed to load EPUB") + exit(1) + + # Get book info + info = reader.get_book_info() + print(f"Reading: {info['title']} by {info['author']}") + print(f"Total chapters: {info['total_chapters']}") + + # Navigate through first few pages + for i in range(5): + page = reader.get_current_page() + page.save(f"page_{i+1:03d}.png") + reader.next_page() + + # Save current position + reader.save_position("page_5") + + # Jump to a chapter + chapters = reader.get_chapters() + if len(chapters) > 2: + print(f"Jumping to: {chapters[2][0]}") + reader.jump_to_chapter(2) + reader.render_to_file("chapter_3_start.png") + + # Return to saved position + reader.load_position("page_5") + + # Adjust font size + reader.increase_font_size() + reader.render_to_file("page_5_larger_font.png") + + # Get progress + progress = reader.get_reading_progress() + print(f"Reading progress: {progress*100:.1f}%") +``` + +## Demo Script + +Run the comprehensive demo to see all features in action: + +```bash +python examples/ereader_demo.py path/to/book.epub +``` + +This will demonstrate: +- Basic page navigation +- Position save/load +- Chapter navigation +- Font size adjustments +- Spacing adjustments +- Book information retrieval + +The demo generates multiple PNG files showing different pages and settings. + +## Position Storage Format + +Positions are stored as JSON files in the `bookmarks_dir` (default: `ereader_bookmarks/`): + +```json +{ + "chapter_index": 0, + "block_index": 42, + "word_index": 15, + "table_row": 0, + "table_col": 0, + "list_item_index": 0, + "remaining_pretext": null, + "page_y_offset": 0 +} +``` + +This format is tied to the abstract document structure, making positions stable across: +- Font size changes +- Line spacing changes +- Inter-block spacing changes +- Page size changes + +## Integration Example: Simple GUI + +Here's a minimal example of integrating with Tkinter: + +```python +import tkinter as tk +from tkinter import filedialog +from PIL import ImageTk +from pyWebLayout.layout.ereader_application import EbookReader + +class SimpleEreaderGUI: + def __init__(self, root): + self.root = root + self.reader = EbookReader(page_size=(600, 800)) + + # Create UI + self.image_label = tk.Label(root) + self.image_label.pack() + + btn_frame = tk.Frame(root) + btn_frame.pack() + + tk.Button(btn_frame, text="Open EPUB", command=self.open_epub).pack(side=tk.LEFT) + tk.Button(btn_frame, text="Previous", command=self.prev_page).pack(side=tk.LEFT) + tk.Button(btn_frame, text="Next", command=self.next_page).pack(side=tk.LEFT) + tk.Button(btn_frame, text="Font+", command=self.increase_font).pack(side=tk.LEFT) + tk.Button(btn_frame, text="Font-", command=self.decrease_font).pack(side=tk.LEFT) + + def open_epub(self): + filepath = filedialog.askopenfilename(filetypes=[("EPUB files", "*.epub")]) + if filepath: + self.reader.load_epub(filepath) + self.display_page() + + def display_page(self): + page = self.reader.get_current_page() + if page: + photo = ImageTk.PhotoImage(page) + self.image_label.config(image=photo) + self.image_label.image = photo + + def next_page(self): + if self.reader.next_page(): + self.display_page() + + def prev_page(self): + if self.reader.previous_page(): + self.display_page() + + def increase_font(self): + self.reader.increase_font_size() + self.display_page() + + def decrease_font(self): + self.reader.decrease_font_size() + self.display_page() + +root = tk.Tk() +root.title("Simple Ereader") +app = SimpleEreaderGUI(root) +root.mainloop() +``` + +## Performance Notes + +- The reader uses intelligent page caching for fast navigation +- First page load may take ~1 second, subsequent pages are typically < 0.1 seconds +- Background rendering attempts to pre-cache upcoming pages (you may see pickle warnings, which can be ignored) +- Font size changes invalidate the cache and require re-rendering from the current position +- Position save/load is nearly instantaneous + +## Limitations + +- Currently supports EPUB files only (no PDF, MOBI, etc.) +- Images in EPUBs may not render in some cases +- Tables are skipped in rendering +- Complex HTML layouts may not render perfectly +- No text selection or search functionality (these would need to be added separately) + +## See Also + +- `examples/ereader_demo.py` - Comprehensive feature demonstration +- `pyWebLayout/layout/ereader_manager.py` - Underlying manager class +- `pyWebLayout/layout/ereader_layout.py` - Core layout engine +- `examples/README_EPUB_RENDERERS.md` - Lower-level EPUB rendering diff --git a/examples/ereader_demo.py b/examples/ereader_demo.py index 4afdc8e..99ffd60 100644 --- a/examples/ereader_demo.py +++ b/examples/ereader_demo.py @@ -262,8 +262,8 @@ def main(): if len(sys.argv) < 2: print("Usage: python ereader_demo.py path/to/book.epub") print("\nExample EPUBs to try:") - print(" - test.epub (if available in project root)") - print(" - test2.epub (if available in project root)") + print(" - tests/data/test.epub") + print(" - tests/data/test2.epub") sys.exit(1) epub_path = sys.argv[1] diff --git a/debug_text_positioning.py b/scripts/debug_text_positioning.py similarity index 100% rename from debug_text_positioning.py rename to scripts/debug_text_positioning.py diff --git a/examples/epub_page_renderer.py b/scripts/epub_page_renderer.py similarity index 100% rename from examples/epub_page_renderer.py rename to scripts/epub_page_renderer.py diff --git a/examples/epub_page_renderer_documentlayouter.py b/scripts/epub_page_renderer_documentlayouter.py similarity index 100% rename from examples/epub_page_renderer_documentlayouter.py rename to scripts/epub_page_renderer_documentlayouter.py diff --git a/run_coverage_gutters.py b/scripts/run_coverage_gutters.py similarity index 100% rename from run_coverage_gutters.py rename to scripts/run_coverage_gutters.py diff --git a/update_coverage_gutters.py b/scripts/update_coverage_gutters.py similarity index 100% rename from update_coverage_gutters.py rename to scripts/update_coverage_gutters.py