pyWebLayout/scripts/epub_page_renderer_documentlayouter.py
2025-11-12 12:03:27 +00:00

392 lines
12 KiB
Python

#!/usr/bin/env python3
"""
EPUB page renderer using DocumentLayouter.
This tool uses pyWebLayout's DocumentLayouter to render EPUB content:
1. Load an EPUB file
2. Extract all blocks (paragraphs, images, etc.)
3. Use DocumentLayouter to layout blocks on pages
4. Save the pages as PNG images
Usage:
python epub_page_renderer_documentlayouter.py book.epub --pages 5 --output-dir rendered_pages
"""
import os
import sys
import argparse
from pathlib import Path
from typing import Optional, List, Tuple
# Add the parent directory to sys.path to import pyWebLayout
sys.path.insert(0, str(Path(__file__).parent.parent))
try:
from pyWebLayout.io.readers.epub_reader import read_epub
from pyWebLayout.layout.document_layouter import paragraph_layouter, image_layouter, pagebreak_layouter
from pyWebLayout.concrete.page import Page
from pyWebLayout.style.page_style import PageStyle
from pyWebLayout.style.alignment import Alignment
from pyWebLayout.abstract.block import Block, Paragraph, Heading, HList, Table, Image as AbstractImage, PageBreak
from pyWebLayout.style.concrete_style import RenderingContext
except ImportError as e:
print(f"Error importing required modules: {e}")
print("Make sure pyWebLayout is properly installed and PIL is available")
sys.exit(1)
def get_all_blocks_from_book(book) -> List[Block]:
"""
Extract all blocks from all chapters in the book.
Args:
book: The Book object from epub_reader
Returns:
List of all Block objects
"""
all_blocks = []
# Iterate through all chapters
for chapter in book.chapters:
# Get blocks from the chapter
if hasattr(chapter, '_blocks'):
all_blocks.extend(chapter._blocks)
return all_blocks
def create_page(page_style: PageStyle, page_size: Tuple[int, int]) -> Page:
"""
Create a new Page with the given style and size.
Args:
page_style: Style configuration for the page
page_size: (width, height) tuple
Returns:
A new Page object
"""
page = Page(
size=page_size,
style=page_style
)
return page
def layout_blocks_on_pages(blocks: List[Block], page_style: PageStyle,
page_size: Tuple[int, int], max_pages: int,
alignment_override: Optional[Alignment] = None) -> List[Page]:
"""
Layout blocks across multiple pages using DocumentLayouter.
Args:
blocks: List of abstract blocks to layout
page_style: Style configuration for pages
page_size: (width, height) tuple for pages
max_pages: Maximum number of pages to generate
alignment_override: Optional alignment to override paragraph alignment
Returns:
List of rendered Page objects
"""
pages = []
current_block_index = 0
continuation_word_index = 0
continuation_pretext = None
# Create rendering context
_rendering_context = RenderingContext(base_font_size=16)
while current_block_index < len(blocks) and len(pages) < max_pages:
# Create a new page
page = create_page(page_style, page_size)
page_has_content = False
# Try to layout blocks on this page
while current_block_index < len(blocks):
block = blocks[current_block_index]
if isinstance(block, (Paragraph, Heading)):
# Layout paragraph/heading
success, failed_word_index, remaining_pretext = paragraph_layouter(
block,
page,
start_word=continuation_word_index,
pretext=continuation_pretext,
alignment_override=alignment_override
)
if success:
# Block fully laid out, move to next block
page_has_content = True
current_block_index += 1
continuation_word_index = 0
continuation_pretext = None
else:
# Block partially laid out or page is full
if failed_word_index is not None:
# Partial layout - continue on next page
page_has_content = True
continuation_word_index = failed_word_index
continuation_pretext = remaining_pretext
# Break to create a new page
break
elif isinstance(block, AbstractImage):
# Layout image
success = image_layouter(block, page)
if success:
page_has_content = True
current_block_index += 1
continuation_word_index = 0
continuation_pretext = None
else:
# Image doesn't fit, try on next page
break
elif isinstance(block, HList):
# Layout list items as paragraphs
try:
list_items = list(block.items())
for item in list_items:
if isinstance(item, Paragraph):
success, failed_word_index, remaining_pretext = paragraph_layouter(
item,
page,
start_word=continuation_word_index,
pretext=continuation_pretext,
alignment_override=alignment_override
)
if not success:
# Can't fit more on this page
page_has_content = True
break
continuation_word_index = 0
continuation_pretext = None
# Move to next block after processing list
page_has_content = True
current_block_index += 1
except Exception as e:
print(f"Warning: Error processing list: {e}")
current_block_index += 1
elif isinstance(block, PageBreak):
# PageBreak forces a new page
success = pagebreak_layouter(block, page)
# Mark that we've seen this block
current_block_index += 1
continuation_word_index = 0
continuation_pretext = None
# PageBreak always returns False to force new page
# Break to create a new page for subsequent content
break
elif isinstance(block, Table):
# Skip tables for now (not implemented)
print("Warning: Skipping table (not yet implemented)")
current_block_index += 1
else:
# Unknown block type, skip
print(f"Warning: Skipping unknown block type: {type(block).__name__}")
current_block_index += 1
# Add page if it has content
if page_has_content:
pages.append(page)
else:
# No content could be added to this page, stop
break
return pages
def main():
"""Main function to handle command line arguments and process the EPUB."""
parser = argparse.ArgumentParser(
description='Render EPUB pages using DocumentLayouter',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python epub_page_renderer_documentlayouter.py book.epub --pages 5
python epub_page_renderer_documentlayouter.py book.epub --pages 10 --output-dir my_output --width 600 --height 800
"""
)
parser.add_argument(
'epub_file',
help='Path to the EPUB file to render'
)
parser.add_argument(
'--pages', '-p',
type=int,
default=5,
help='Number of pages to render (default: 5)'
)
parser.add_argument(
'--output-dir', '-o',
default='rendered_pages',
help='Output directory for rendered images (default: rendered_pages)'
)
parser.add_argument(
'--width', '-w',
type=int,
default=800,
help='Page width in pixels (default: 800)'
)
parser.add_argument(
'--height', '-t',
type=int,
default=1000,
help='Page height in pixels (default: 1000)'
)
parser.add_argument(
'--margin', '-m',
type=int,
default=40,
help='Page margin in pixels (default: 40)'
)
parser.add_argument(
'--align', '-a',
choices=['left', 'justify'],
default='left',
help='Text alignment: left or justify (default: left)'
)
args = parser.parse_args()
# Validate arguments
if not os.path.exists(args.epub_file):
print(f"Error: EPUB file '{args.epub_file}' not found")
return 1
if args.pages <= 0:
print("Error: Number of pages must be positive")
return 1
# Create output directory
try:
os.makedirs(args.output_dir, exist_ok=True)
except OSError as e:
print(f"Error creating output directory: {e}")
return 1
print(f"Loading EPUB file: {args.epub_file}")
# Load the EPUB file
try:
book = read_epub(args.epub_file)
print(f"Successfully loaded EPUB: {book.get_title() or 'Unknown Title'}")
# Print book information
author = book.get_metadata('AUTHOR')
if author:
print(f"Author: {author}")
print(f"Chapters: {len(book.chapters) if hasattr(book, 'chapters') else 'Unknown'}")
except Exception as e:
print(f"Error loading EPUB file: {e}")
import traceback
traceback.print_exc()
return 1
# Extract all blocks from the book
print("Extracting content blocks...")
try:
all_blocks = get_all_blocks_from_book(book)
print(f"Extracted {len(all_blocks)} content blocks")
if not all_blocks:
print("No content blocks found in EPUB. The book might be empty.")
return 1
except Exception as e:
print(f"Error extracting blocks: {e}")
import traceback
traceback.print_exc()
return 1
# Set up page style
page_size = (args.width, args.height)
page_style = PageStyle(
background_color=(255, 255, 255),
border_width=args.margin,
border_color=(200, 200, 200),
padding=(10, 10, 10, 10), # top, right, bottom, left
line_spacing=5,
inter_block_spacing=15
)
# Set alignment
alignment = Alignment.JUSTIFY if args.align == 'justify' else Alignment.LEFT
print(f"Setting up layouter with page size {page_size} and {args.align} alignment")
# Layout blocks on pages
print(f"Rendering up to {args.pages} pages using DocumentLayouter...")
try:
pages = layout_blocks_on_pages(
all_blocks,
page_style,
page_size,
args.pages,
alignment_override=alignment
)
if not pages:
print("No pages were generated.")
return 1
print(f"Generated {len(pages)} pages")
# Save each page to an image
for i, page in enumerate(pages):
print(f"Saving page {i + 1}/{len(pages)}...")
try:
# Render the page
image = page.render()
# Save the image
output_filename = f"page_{i + 1:03d}.png"
output_path = os.path.join(args.output_dir, output_filename)
image.save(output_path, 'PNG')
print(f"Saved: {output_path}")
except Exception as e:
print(f"Error saving page {i + 1}: {e}")
import traceback
traceback.print_exc()
continue
print(f"\nCompleted! Rendered {len(pages)} pages to {args.output_dir}")
except Exception as e:
print(f"Error during pagination/rendering: {e}")
import traceback
traceback.print_exc()
return 1
return 0
if __name__ == "__main__":
sys.exit(main())