#!/usr/bin/env python3 """ EPUB page renderer using DocumentLayouter. This tool uses pyWebLayout's DocumentLayouter to render EPUB content: 1. Load an EPUB file 2. Extract all blocks (paragraphs, images, etc.) 3. Use DocumentLayouter to layout blocks on pages 4. Save the pages as PNG images Usage: python epub_page_renderer_documentlayouter.py book.epub --pages 5 --output-dir rendered_pages """ import os import sys import argparse from pathlib import Path from typing import Optional, List, Tuple # Add the parent directory to sys.path to import pyWebLayout sys.path.insert(0, str(Path(__file__).parent.parent)) try: from pyWebLayout.io.readers.epub_reader import read_epub from pyWebLayout.layout.document_layouter import paragraph_layouter, image_layouter, pagebreak_layouter from pyWebLayout.concrete.page import Page from pyWebLayout.style.page_style import PageStyle from pyWebLayout.style.alignment import Alignment from pyWebLayout.abstract.block import Block, Paragraph, Heading, HList, Table, Image as AbstractImage, PageBreak from pyWebLayout.style.concrete_style import RenderingContext except ImportError as e: print(f"Error importing required modules: {e}") print("Make sure pyWebLayout is properly installed and PIL is available") sys.exit(1) def get_all_blocks_from_book(book) -> List[Block]: """ Extract all blocks from all chapters in the book. Args: book: The Book object from epub_reader Returns: List of all Block objects """ all_blocks = [] # Iterate through all chapters for chapter in book.chapters: # Get blocks from the chapter if hasattr(chapter, '_blocks'): all_blocks.extend(chapter._blocks) return all_blocks def create_page(page_style: PageStyle, page_size: Tuple[int, int]) -> Page: """ Create a new Page with the given style and size. Args: page_style: Style configuration for the page page_size: (width, height) tuple Returns: A new Page object """ page = Page( size=page_size, style=page_style ) return page def layout_blocks_on_pages(blocks: List[Block], page_style: PageStyle, page_size: Tuple[int, int], max_pages: int, alignment_override: Optional[Alignment] = None) -> List[Page]: """ Layout blocks across multiple pages using DocumentLayouter. Args: blocks: List of abstract blocks to layout page_style: Style configuration for pages page_size: (width, height) tuple for pages max_pages: Maximum number of pages to generate alignment_override: Optional alignment to override paragraph alignment Returns: List of rendered Page objects """ pages = [] current_block_index = 0 continuation_word_index = 0 continuation_pretext = None # Create rendering context _rendering_context = RenderingContext(base_font_size=16) while current_block_index < len(blocks) and len(pages) < max_pages: # Create a new page page = create_page(page_style, page_size) page_has_content = False # Try to layout blocks on this page while current_block_index < len(blocks): block = blocks[current_block_index] if isinstance(block, (Paragraph, Heading)): # Layout paragraph/heading success, failed_word_index, remaining_pretext = paragraph_layouter( block, page, start_word=continuation_word_index, pretext=continuation_pretext, alignment_override=alignment_override ) if success: # Block fully laid out, move to next block page_has_content = True current_block_index += 1 continuation_word_index = 0 continuation_pretext = None else: # Block partially laid out or page is full if failed_word_index is not None: # Partial layout - continue on next page page_has_content = True continuation_word_index = failed_word_index continuation_pretext = remaining_pretext # Break to create a new page break elif isinstance(block, AbstractImage): # Layout image success = image_layouter(block, page) if success: page_has_content = True current_block_index += 1 continuation_word_index = 0 continuation_pretext = None else: # Image doesn't fit, try on next page break elif isinstance(block, HList): # Layout list items as paragraphs try: list_items = list(block.items()) for item in list_items: if isinstance(item, Paragraph): success, failed_word_index, remaining_pretext = paragraph_layouter( item, page, start_word=continuation_word_index, pretext=continuation_pretext, alignment_override=alignment_override ) if not success: # Can't fit more on this page page_has_content = True break continuation_word_index = 0 continuation_pretext = None # Move to next block after processing list page_has_content = True current_block_index += 1 except Exception as e: print(f"Warning: Error processing list: {e}") current_block_index += 1 elif isinstance(block, PageBreak): # PageBreak forces a new page success = pagebreak_layouter(block, page) # Mark that we've seen this block current_block_index += 1 continuation_word_index = 0 continuation_pretext = None # PageBreak always returns False to force new page # Break to create a new page for subsequent content break elif isinstance(block, Table): # Skip tables for now (not implemented) print("Warning: Skipping table (not yet implemented)") current_block_index += 1 else: # Unknown block type, skip print(f"Warning: Skipping unknown block type: {type(block).__name__}") current_block_index += 1 # Add page if it has content if page_has_content: pages.append(page) else: # No content could be added to this page, stop break return pages def main(): """Main function to handle command line arguments and process the EPUB.""" parser = argparse.ArgumentParser( description='Render EPUB pages using DocumentLayouter', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python epub_page_renderer_documentlayouter.py book.epub --pages 5 python epub_page_renderer_documentlayouter.py book.epub --pages 10 --output-dir my_output --width 600 --height 800 """ ) parser.add_argument( 'epub_file', help='Path to the EPUB file to render' ) parser.add_argument( '--pages', '-p', type=int, default=5, help='Number of pages to render (default: 5)' ) parser.add_argument( '--output-dir', '-o', default='rendered_pages', help='Output directory for rendered images (default: rendered_pages)' ) parser.add_argument( '--width', '-w', type=int, default=800, help='Page width in pixels (default: 800)' ) parser.add_argument( '--height', '-t', type=int, default=1000, help='Page height in pixels (default: 1000)' ) parser.add_argument( '--margin', '-m', type=int, default=40, help='Page margin in pixels (default: 40)' ) parser.add_argument( '--align', '-a', choices=['left', 'justify'], default='left', help='Text alignment: left or justify (default: left)' ) args = parser.parse_args() # Validate arguments if not os.path.exists(args.epub_file): print(f"Error: EPUB file '{args.epub_file}' not found") return 1 if args.pages <= 0: print("Error: Number of pages must be positive") return 1 # Create output directory try: os.makedirs(args.output_dir, exist_ok=True) except OSError as e: print(f"Error creating output directory: {e}") return 1 print(f"Loading EPUB file: {args.epub_file}") # Load the EPUB file try: book = read_epub(args.epub_file) print(f"Successfully loaded EPUB: {book.get_title() or 'Unknown Title'}") # Print book information author = book.get_metadata('AUTHOR') if author: print(f"Author: {author}") print(f"Chapters: {len(book.chapters) if hasattr(book, 'chapters') else 'Unknown'}") except Exception as e: print(f"Error loading EPUB file: {e}") import traceback traceback.print_exc() return 1 # Extract all blocks from the book print("Extracting content blocks...") try: all_blocks = get_all_blocks_from_book(book) print(f"Extracted {len(all_blocks)} content blocks") if not all_blocks: print("No content blocks found in EPUB. The book might be empty.") return 1 except Exception as e: print(f"Error extracting blocks: {e}") import traceback traceback.print_exc() return 1 # Set up page style page_size = (args.width, args.height) page_style = PageStyle( background_color=(255, 255, 255), border_width=args.margin, border_color=(200, 200, 200), padding=(10, 10, 10, 10), # top, right, bottom, left line_spacing=5, inter_block_spacing=15 ) # Set alignment alignment = Alignment.JUSTIFY if args.align == 'justify' else Alignment.LEFT print(f"Setting up layouter with page size {page_size} and {args.align} alignment") # Layout blocks on pages print(f"Rendering up to {args.pages} pages using DocumentLayouter...") try: pages = layout_blocks_on_pages( all_blocks, page_style, page_size, args.pages, alignment_override=alignment ) if not pages: print("No pages were generated.") return 1 print(f"Generated {len(pages)} pages") # Save each page to an image for i, page in enumerate(pages): print(f"Saving page {i + 1}/{len(pages)}...") try: # Render the page image = page.render() # Save the image output_filename = f"page_{i + 1:03d}.png" output_path = os.path.join(args.output_dir, output_filename) image.save(output_path, 'PNG') print(f"Saved: {output_path}") except Exception as e: print(f"Error saving page {i + 1}: {e}") import traceback traceback.print_exc() continue print(f"\nCompleted! Rendered {len(pages)} pages to {args.output_dir}") except Exception as e: print(f"Error during pagination/rendering: {e}") import traceback traceback.print_exc() return 1 return 0 if __name__ == "__main__": sys.exit(main())