394 lines
13 KiB
Python
394 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
EPUB page renderer using DocumentLayouter.
|
|
|
|
This tool uses pyWebLayout's DocumentLayouter to render EPUB content:
|
|
1. Load an EPUB file
|
|
2. Extract all blocks (paragraphs, images, etc.)
|
|
3. Use DocumentLayouter to layout blocks on pages
|
|
4. Save the pages as PNG images
|
|
|
|
Usage:
|
|
python epub_page_renderer_documentlayouter.py book.epub --pages 5 --output-dir rendered_pages
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
from pathlib import Path
|
|
from typing import Optional, List, Tuple
|
|
|
|
# Add the parent directory to sys.path to import pyWebLayout
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
try:
|
|
from pyWebLayout.io.readers.epub_reader import read_epub
|
|
from pyWebLayout.layout.document_layouter import DocumentLayouter, paragraph_layouter, image_layouter, pagebreak_layouter
|
|
from pyWebLayout.concrete.page import Page
|
|
from pyWebLayout.style.page_style import PageStyle
|
|
from pyWebLayout.style.fonts import Font
|
|
from pyWebLayout.style.alignment import Alignment
|
|
from pyWebLayout.abstract.block import Block, Paragraph, Heading, HList, Table, Image as AbstractImage, PageBreak
|
|
from pyWebLayout.style.concrete_style import RenderingContext, StyleResolver
|
|
from PIL import Image, ImageDraw
|
|
except ImportError as e:
|
|
print(f"Error importing required modules: {e}")
|
|
print("Make sure pyWebLayout is properly installed and PIL is available")
|
|
sys.exit(1)
|
|
|
|
|
|
def get_all_blocks_from_book(book) -> List[Block]:
|
|
"""
|
|
Extract all blocks from all chapters in the book.
|
|
|
|
Args:
|
|
book: The Book object from epub_reader
|
|
|
|
Returns:
|
|
List of all Block objects
|
|
"""
|
|
all_blocks = []
|
|
|
|
# Iterate through all chapters
|
|
for chapter in book.chapters:
|
|
# Get blocks from the chapter
|
|
if hasattr(chapter, '_blocks'):
|
|
all_blocks.extend(chapter._blocks)
|
|
|
|
return all_blocks
|
|
|
|
|
|
def create_page(page_style: PageStyle, page_size: Tuple[int, int]) -> Page:
|
|
"""
|
|
Create a new Page with the given style and size.
|
|
|
|
Args:
|
|
page_style: Style configuration for the page
|
|
page_size: (width, height) tuple
|
|
|
|
Returns:
|
|
A new Page object
|
|
"""
|
|
page = Page(
|
|
size=page_size,
|
|
style=page_style
|
|
)
|
|
|
|
return page
|
|
|
|
|
|
def layout_blocks_on_pages(blocks: List[Block], page_style: PageStyle,
|
|
page_size: Tuple[int, int], max_pages: int,
|
|
alignment_override: Optional[Alignment] = None) -> List[Page]:
|
|
"""
|
|
Layout blocks across multiple pages using DocumentLayouter.
|
|
|
|
Args:
|
|
blocks: List of abstract blocks to layout
|
|
page_style: Style configuration for pages
|
|
page_size: (width, height) tuple for pages
|
|
max_pages: Maximum number of pages to generate
|
|
alignment_override: Optional alignment to override paragraph alignment
|
|
|
|
Returns:
|
|
List of rendered Page objects
|
|
"""
|
|
pages = []
|
|
current_block_index = 0
|
|
continuation_word_index = 0
|
|
continuation_pretext = None
|
|
|
|
# Create rendering context
|
|
rendering_context = RenderingContext(base_font_size=16)
|
|
|
|
while current_block_index < len(blocks) and len(pages) < max_pages:
|
|
# Create a new page
|
|
page = create_page(page_style, page_size)
|
|
page_has_content = False
|
|
|
|
# Try to layout blocks on this page
|
|
while current_block_index < len(blocks):
|
|
block = blocks[current_block_index]
|
|
|
|
if isinstance(block, (Paragraph, Heading)):
|
|
# Layout paragraph/heading
|
|
success, failed_word_index, remaining_pretext = paragraph_layouter(
|
|
block,
|
|
page,
|
|
start_word=continuation_word_index,
|
|
pretext=continuation_pretext,
|
|
alignment_override=alignment_override
|
|
)
|
|
|
|
if success:
|
|
# Block fully laid out, move to next block
|
|
page_has_content = True
|
|
current_block_index += 1
|
|
continuation_word_index = 0
|
|
continuation_pretext = None
|
|
else:
|
|
# Block partially laid out or page is full
|
|
if failed_word_index is not None:
|
|
# Partial layout - continue on next page
|
|
page_has_content = True
|
|
continuation_word_index = failed_word_index
|
|
continuation_pretext = remaining_pretext
|
|
# Break to create a new page
|
|
break
|
|
|
|
elif isinstance(block, AbstractImage):
|
|
# Layout image
|
|
success = image_layouter(block, page)
|
|
|
|
if success:
|
|
page_has_content = True
|
|
current_block_index += 1
|
|
continuation_word_index = 0
|
|
continuation_pretext = None
|
|
else:
|
|
# Image doesn't fit, try on next page
|
|
break
|
|
|
|
elif isinstance(block, HList):
|
|
# Layout list items as paragraphs
|
|
try:
|
|
list_items = list(block.items())
|
|
for item in list_items:
|
|
if isinstance(item, Paragraph):
|
|
success, failed_word_index, remaining_pretext = paragraph_layouter(
|
|
item,
|
|
page,
|
|
start_word=continuation_word_index,
|
|
pretext=continuation_pretext,
|
|
alignment_override=alignment_override
|
|
)
|
|
|
|
if not success:
|
|
# Can't fit more on this page
|
|
page_has_content = True
|
|
break
|
|
|
|
continuation_word_index = 0
|
|
continuation_pretext = None
|
|
|
|
# Move to next block after processing list
|
|
page_has_content = True
|
|
current_block_index += 1
|
|
|
|
except Exception as e:
|
|
print(f"Warning: Error processing list: {e}")
|
|
current_block_index += 1
|
|
|
|
elif isinstance(block, PageBreak):
|
|
# PageBreak forces a new page
|
|
success = pagebreak_layouter(block, page)
|
|
|
|
# Mark that we've seen this block
|
|
current_block_index += 1
|
|
continuation_word_index = 0
|
|
continuation_pretext = None
|
|
|
|
# PageBreak always returns False to force new page
|
|
# Break to create a new page for subsequent content
|
|
break
|
|
|
|
elif isinstance(block, Table):
|
|
# Skip tables for now (not implemented)
|
|
print(f"Warning: Skipping table (not yet implemented)")
|
|
current_block_index += 1
|
|
|
|
else:
|
|
# Unknown block type, skip
|
|
print(f"Warning: Skipping unknown block type: {type(block).__name__}")
|
|
current_block_index += 1
|
|
|
|
# Add page if it has content
|
|
if page_has_content:
|
|
pages.append(page)
|
|
else:
|
|
# No content could be added to this page, stop
|
|
break
|
|
|
|
return pages
|
|
|
|
|
|
def main():
|
|
"""Main function to handle command line arguments and process the EPUB."""
|
|
parser = argparse.ArgumentParser(
|
|
description='Render EPUB pages using DocumentLayouter',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python epub_page_renderer_documentlayouter.py book.epub --pages 5
|
|
python epub_page_renderer_documentlayouter.py book.epub --pages 10 --output-dir my_output --width 600 --height 800
|
|
"""
|
|
)
|
|
|
|
parser.add_argument(
|
|
'epub_file',
|
|
help='Path to the EPUB file to render'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--pages', '-p',
|
|
type=int,
|
|
default=5,
|
|
help='Number of pages to render (default: 5)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--output-dir', '-o',
|
|
default='rendered_pages',
|
|
help='Output directory for rendered images (default: rendered_pages)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--width', '-w',
|
|
type=int,
|
|
default=800,
|
|
help='Page width in pixels (default: 800)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--height', '-t',
|
|
type=int,
|
|
default=1000,
|
|
help='Page height in pixels (default: 1000)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--margin', '-m',
|
|
type=int,
|
|
default=40,
|
|
help='Page margin in pixels (default: 40)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--align', '-a',
|
|
choices=['left', 'justify'],
|
|
default='left',
|
|
help='Text alignment: left or justify (default: left)'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Validate arguments
|
|
if not os.path.exists(args.epub_file):
|
|
print(f"Error: EPUB file '{args.epub_file}' not found")
|
|
return 1
|
|
|
|
if args.pages <= 0:
|
|
print("Error: Number of pages must be positive")
|
|
return 1
|
|
|
|
# Create output directory
|
|
try:
|
|
os.makedirs(args.output_dir, exist_ok=True)
|
|
except OSError as e:
|
|
print(f"Error creating output directory: {e}")
|
|
return 1
|
|
|
|
print(f"Loading EPUB file: {args.epub_file}")
|
|
|
|
# Load the EPUB file
|
|
try:
|
|
book = read_epub(args.epub_file)
|
|
print(f"Successfully loaded EPUB: {book.get_title() or 'Unknown Title'}")
|
|
|
|
# Print book information
|
|
author = book.get_metadata('AUTHOR')
|
|
if author:
|
|
print(f"Author: {author}")
|
|
|
|
print(f"Chapters: {len(book.chapters) if hasattr(book, 'chapters') else 'Unknown'}")
|
|
|
|
except Exception as e:
|
|
print(f"Error loading EPUB file: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return 1
|
|
|
|
# Extract all blocks from the book
|
|
print("Extracting content blocks...")
|
|
try:
|
|
all_blocks = get_all_blocks_from_book(book)
|
|
print(f"Extracted {len(all_blocks)} content blocks")
|
|
|
|
if not all_blocks:
|
|
print("No content blocks found in EPUB. The book might be empty.")
|
|
return 1
|
|
|
|
except Exception as e:
|
|
print(f"Error extracting blocks: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return 1
|
|
|
|
# Set up page style
|
|
page_size = (args.width, args.height)
|
|
page_style = PageStyle(
|
|
background_color=(255, 255, 255),
|
|
border_width=args.margin,
|
|
border_color=(200, 200, 200),
|
|
padding=(10, 10, 10, 10), # top, right, bottom, left
|
|
line_spacing=5,
|
|
inter_block_spacing=15
|
|
)
|
|
|
|
# Set alignment
|
|
alignment = Alignment.JUSTIFY if args.align == 'justify' else Alignment.LEFT
|
|
print(f"Setting up layouter with page size {page_size} and {args.align} alignment")
|
|
|
|
# Layout blocks on pages
|
|
print(f"Rendering up to {args.pages} pages using DocumentLayouter...")
|
|
|
|
try:
|
|
pages = layout_blocks_on_pages(
|
|
all_blocks,
|
|
page_style,
|
|
page_size,
|
|
args.pages,
|
|
alignment_override=alignment
|
|
)
|
|
|
|
if not pages:
|
|
print("No pages were generated.")
|
|
return 1
|
|
|
|
print(f"Generated {len(pages)} pages")
|
|
|
|
# Save each page to an image
|
|
for i, page in enumerate(pages):
|
|
print(f"Saving page {i + 1}/{len(pages)}...")
|
|
|
|
try:
|
|
# Render the page
|
|
image = page.render()
|
|
|
|
# Save the image
|
|
output_filename = f"page_{i + 1:03d}.png"
|
|
output_path = os.path.join(args.output_dir, output_filename)
|
|
image.save(output_path, 'PNG')
|
|
|
|
print(f"Saved: {output_path}")
|
|
|
|
except Exception as e:
|
|
print(f"Error saving page {i + 1}: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
continue
|
|
|
|
print(f"\nCompleted! Rendered {len(pages)} pages to {args.output_dir}")
|
|
|
|
except Exception as e:
|
|
print(f"Error during pagination/rendering: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|