This commit is contained in:
parent
b1c4a1c125
commit
36281be77a
345
examples/epub_page_renderer.py
Executable file
345
examples/epub_page_renderer.py
Executable file
@ -0,0 +1,345 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple EPUB page renderer tool.
|
||||
|
||||
This tool uses the pyWebLayout epub_reader and typesetting modules to:
|
||||
1. Load an EPUB file
|
||||
2. Render the first X pages according to command line arguments
|
||||
3. Save the pages as PNG images
|
||||
|
||||
Usage:
|
||||
python epub_page_renderer.py book.epub --pages 5 --output-dir rendered_pages
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
# Add the parent directory to sys.path to import pyWebLayout
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
try:
|
||||
from pyWebLayout.io.readers.epub_reader import read_epub
|
||||
from pyWebLayout.layout.document_pagination import DocumentPaginator
|
||||
from pyWebLayout.concrete.page import Page
|
||||
from pyWebLayout.style.fonts import Font
|
||||
from pyWebLayout.style.layout import Alignment
|
||||
from PIL import Image, ImageDraw
|
||||
except ImportError as e:
|
||||
print(f"Error importing required modules: {e}")
|
||||
print("Make sure pyWebLayout is properly installed and PIL is available")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def render_page_to_image(page: Page) -> Image.Image:
|
||||
"""
|
||||
Render a Page object to a PIL Image using pyWebLayout's built-in rendering.
|
||||
|
||||
Args:
|
||||
page: The Page object to render
|
||||
|
||||
Returns:
|
||||
PIL Image object
|
||||
"""
|
||||
try:
|
||||
# Use the Page's built-in render method
|
||||
rendered_image = page.render()
|
||||
if isinstance(rendered_image, Image.Image):
|
||||
return rendered_image
|
||||
else:
|
||||
# If render() doesn't return a PIL Image, create error image
|
||||
error_image = Image.new('RGB', page._size, 'white')
|
||||
draw = ImageDraw.Draw(error_image)
|
||||
draw.text((20, 20), "Error: Page.render() did not return PIL Image", fill='red')
|
||||
return error_image
|
||||
|
||||
except Exception as e:
|
||||
# Create error image if rendering fails
|
||||
error_image = Image.new('RGB', page._size, 'white')
|
||||
draw = ImageDraw.Draw(error_image)
|
||||
draw.text((20, 20), f"Rendering error: {str(e)}", fill='red')
|
||||
print(f"Warning: Error rendering page: {e}")
|
||||
return error_image
|
||||
|
||||
|
||||
def extract_text_from_page(page: Page) -> str:
|
||||
"""
|
||||
Extract text content from a Page object for verification purposes.
|
||||
|
||||
Args:
|
||||
page: The Page object to extract text from
|
||||
|
||||
Returns:
|
||||
String containing the page's text content
|
||||
"""
|
||||
text_lines = []
|
||||
text_lines.append(f"=== PAGE CONTENT ===")
|
||||
text_lines.append("")
|
||||
|
||||
try:
|
||||
# Recursively extract text from page children
|
||||
def extract_from_element(element, indent_level=0):
|
||||
indent = " " * indent_level
|
||||
|
||||
# Import abstract block types
|
||||
from pyWebLayout.abstract.block import Paragraph, Heading, HList, Table, Image as AbstractImage
|
||||
|
||||
# Handle abstract block objects first
|
||||
if isinstance(element, Paragraph):
|
||||
# Extract text from paragraph
|
||||
paragraph_text = extract_text_from_paragraph(element)
|
||||
if paragraph_text:
|
||||
text_lines.append(f"{indent}PARAGRAPH: {paragraph_text}")
|
||||
|
||||
elif isinstance(element, Heading):
|
||||
# Extract text from heading
|
||||
heading_text = extract_text_from_paragraph(element)
|
||||
if heading_text:
|
||||
text_lines.append(f"{indent}HEADING: {heading_text}")
|
||||
|
||||
elif isinstance(element, HList):
|
||||
text_lines.append(f"{indent}LIST:")
|
||||
# Extract text from list items
|
||||
try:
|
||||
for item in element.items():
|
||||
item_text = extract_text_from_paragraph(item)
|
||||
if item_text:
|
||||
text_lines.append(f"{indent} - {item_text}")
|
||||
except:
|
||||
text_lines.append(f"{indent} (List content extraction failed)")
|
||||
|
||||
elif isinstance(element, Table):
|
||||
text_lines.append(f"{indent}[TABLE]")
|
||||
|
||||
elif isinstance(element, AbstractImage):
|
||||
alt_text = getattr(element, 'alt_text', '')
|
||||
src = getattr(element, 'src', 'Unknown')
|
||||
text_lines.append(f"{indent}[IMAGE: {alt_text or src}]")
|
||||
|
||||
# Handle containers with children
|
||||
elif hasattr(element, '_children') and element._children:
|
||||
for child in element._children:
|
||||
extract_from_element(child, indent_level + 1)
|
||||
|
||||
# Handle text elements
|
||||
elif hasattr(element, 'text'):
|
||||
text = str(element.text).strip()
|
||||
if text:
|
||||
text_lines.append(f"{indent}{text}")
|
||||
|
||||
# Handle lines with text objects
|
||||
elif hasattr(element, '_text_objects') and element._text_objects:
|
||||
line_text = []
|
||||
for text_obj in element._text_objects:
|
||||
if hasattr(text_obj, 'text'):
|
||||
line_text.append(str(text_obj.text))
|
||||
if line_text:
|
||||
text_lines.append(f"{indent}{' '.join(line_text)}")
|
||||
|
||||
# Handle other object types by showing their class name
|
||||
else:
|
||||
class_name = element.__class__.__name__
|
||||
text_lines.append(f"{indent}[{class_name}]")
|
||||
|
||||
# Helper function to extract text from paragraph-like objects
|
||||
def extract_text_from_paragraph(para_obj):
|
||||
words = []
|
||||
try:
|
||||
# Try to get words from the paragraph
|
||||
if hasattr(para_obj, 'words') and callable(para_obj.words):
|
||||
for _, word in para_obj.words():
|
||||
if hasattr(word, 'text'):
|
||||
words.append(word.text)
|
||||
else:
|
||||
words.append(str(word))
|
||||
elif hasattr(para_obj, '_words'):
|
||||
# Direct access to words list
|
||||
for word in para_obj._words:
|
||||
if hasattr(word, 'text'):
|
||||
words.append(word.text)
|
||||
else:
|
||||
words.append(str(word))
|
||||
except Exception as e:
|
||||
return f"(Text extraction error: {str(e)})"
|
||||
|
||||
return ' '.join(words) if words else "(No text)"
|
||||
|
||||
# Extract text from page children
|
||||
if hasattr(page, '_children'):
|
||||
for child in page._children:
|
||||
extract_from_element(child)
|
||||
|
||||
# If no text was extracted, add a note
|
||||
if len(text_lines) <= 2: # Only header and empty line
|
||||
text_lines.append("(No text content found)")
|
||||
|
||||
except Exception as e:
|
||||
text_lines.append(f"Error extracting text: {str(e)}")
|
||||
import traceback
|
||||
text_lines.append(traceback.format_exc())
|
||||
|
||||
return "\n".join(text_lines)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to handle command line arguments and process the EPUB."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Render EPUB pages to images using pyWebLayout',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python epub_page_renderer.py book.epub --pages 5
|
||||
python epub_page_renderer.py book.epub --pages 10 --output-dir my_output --width 600 --height 800
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'epub_file',
|
||||
help='Path to the EPUB file to render'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--pages', '-p',
|
||||
type=int,
|
||||
default=5,
|
||||
help='Number of pages to render (default: 5)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output-dir', '-o',
|
||||
default='rendered_pages',
|
||||
help='Output directory for rendered images (default: rendered_pages)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--width', '-w',
|
||||
type=int,
|
||||
default=800,
|
||||
help='Page width in pixels (default: 800)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--height', '-t',
|
||||
type=int,
|
||||
default=1000,
|
||||
help='Page height in pixels (default: 1000)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--margin', '-m',
|
||||
type=int,
|
||||
default=40,
|
||||
help='Page margin in pixels (default: 40)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate arguments
|
||||
if not os.path.exists(args.epub_file):
|
||||
print(f"Error: EPUB file '{args.epub_file}' not found")
|
||||
return 1
|
||||
|
||||
if args.pages <= 0:
|
||||
print("Error: Number of pages must be positive")
|
||||
return 1
|
||||
|
||||
# Create output directory
|
||||
try:
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
except OSError as e:
|
||||
print(f"Error creating output directory: {e}")
|
||||
return 1
|
||||
|
||||
print(f"Loading EPUB file: {args.epub_file}")
|
||||
|
||||
# Load the EPUB file
|
||||
try:
|
||||
book = read_epub(args.epub_file)
|
||||
print(f"Successfully loaded EPUB: {book.get_title() or 'Unknown Title'}")
|
||||
|
||||
# Print book information
|
||||
author = book.get_metadata('AUTHOR')
|
||||
if author:
|
||||
print(f"Author: {author}")
|
||||
|
||||
print(f"Chapters: {len(book.chapters) if hasattr(book, 'chapters') else 'Unknown'}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error loading EPUB file: {e}")
|
||||
return 1
|
||||
|
||||
# Set up pagination
|
||||
page_size = (args.width, args.height)
|
||||
margins = (args.margin, args.margin, args.margin, args.margin) # top, right, bottom, left
|
||||
|
||||
print(f"Setting up pagination with page size {page_size} and margins {margins}")
|
||||
|
||||
try:
|
||||
paginator = DocumentPaginator(
|
||||
document=book,
|
||||
page_size=page_size,
|
||||
margins=margins,
|
||||
spacing=5,
|
||||
halign=Alignment.LEFT
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error setting up paginator: {e}")
|
||||
return 1
|
||||
|
||||
# Render pages
|
||||
print(f"Rendering {args.pages} pages...")
|
||||
|
||||
try:
|
||||
# Generate pages
|
||||
pages = paginator.paginate(max_pages=args.pages)
|
||||
|
||||
if not pages:
|
||||
print("No pages were generated. The book might be empty or there might be an issue with pagination.")
|
||||
return 1
|
||||
|
||||
print(f"Generated {len(pages)} pages")
|
||||
|
||||
# Render each page to an image and extract text
|
||||
for i, page in enumerate(pages):
|
||||
print(f"Rendering page {i + 1}/{len(pages)}...")
|
||||
|
||||
try:
|
||||
# Create image from page using pyWebLayout's built-in rendering
|
||||
image = render_page_to_image(page)
|
||||
|
||||
# Save the image
|
||||
output_filename = f"page_{i + 1:03d}.png"
|
||||
output_path = os.path.join(args.output_dir, output_filename)
|
||||
image.save(output_path, 'PNG')
|
||||
|
||||
# Extract and save text content for verification
|
||||
page_text = extract_text_from_page(page)
|
||||
text_filename = f"page_{i + 1:03d}.txt"
|
||||
text_path = os.path.join(args.output_dir, text_filename)
|
||||
with open(text_path, 'w', encoding='utf-8') as f:
|
||||
f.write(page_text)
|
||||
|
||||
print(f"Saved: {output_path} and {text_path}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error rendering page {i + 1}: {e}")
|
||||
continue
|
||||
|
||||
print(f"\nCompleted! Rendered {len(pages)} pages to {args.output_dir}")
|
||||
|
||||
# Show pagination progress
|
||||
if hasattr(paginator, 'get_progress'):
|
||||
progress = paginator.get_progress() * 100
|
||||
print(f"Progress through book: {progress:.1f}%")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error during pagination/rendering: {e}")
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@ -270,7 +270,7 @@ class FormattedSpan:
|
||||
return word
|
||||
|
||||
|
||||
class LineBreak:
|
||||
class LineBreak():
|
||||
"""
|
||||
A line break element that forces a new line within text content.
|
||||
While this is an inline element that can occur within paragraphs,
|
||||
|
||||
@ -9,7 +9,7 @@ This package contains styling-related components including:
|
||||
"""
|
||||
|
||||
# Import alignment options
|
||||
from pyWebLayout.style.alignment import Alignment
|
||||
from pyWebLayout.style.layout import Alignment
|
||||
|
||||
# Import font-related classes
|
||||
from pyWebLayout.style.fonts import (
|
||||
|
||||
@ -1,16 +0,0 @@
|
||||
"""
|
||||
Alignment options for text and elements in the pyWebLayout library.
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
|
||||
class Alignment(Enum):
|
||||
"""
|
||||
Enum for alignment options used in layout and rendering.
|
||||
"""
|
||||
LEFT = 1
|
||||
CENTER = 2
|
||||
RIGHT = 3
|
||||
TOP = 4
|
||||
BOTTOM = 5
|
||||
JUSTIFY = 6
|
||||
@ -77,45 +77,19 @@ class Font:
|
||||
"""Load the font using PIL's ImageFont with consistent bundled font"""
|
||||
try:
|
||||
if self._font_path:
|
||||
# Use specified font path
|
||||
self._font = ImageFont.truetype(
|
||||
self._font_path,
|
||||
self._font_size
|
||||
)
|
||||
else:
|
||||
# Try bundled font first for consistency across environments
|
||||
# Use bundled font for consistency across environments
|
||||
bundled_font_path = self._get_bundled_font_path()
|
||||
|
||||
font_candidates = []
|
||||
if bundled_font_path:
|
||||
font_candidates.append(bundled_font_path)
|
||||
|
||||
# Fallback to system fonts if bundled font is not available
|
||||
font_candidates.extend([
|
||||
# Linux fonts
|
||||
"/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
|
||||
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
||||
"/usr/share/fonts/TTF/DejaVuSans.ttf",
|
||||
"/System/Library/Fonts/Helvetica.ttc", # macOS
|
||||
"C:/Windows/Fonts/arial.ttf", # Windows
|
||||
"C:/Windows/Fonts/calibri.ttf", # Windows
|
||||
# Fallback to default
|
||||
None
|
||||
])
|
||||
|
||||
self._font = None
|
||||
for font_path in font_candidates:
|
||||
try:
|
||||
if font_path is None:
|
||||
# Use PIL's default font as last resort
|
||||
self._font = ImageFont.load_default()
|
||||
break
|
||||
else:
|
||||
self._font = ImageFont.truetype(font_path, self._font_size)
|
||||
break
|
||||
except (OSError, IOError):
|
||||
continue
|
||||
|
||||
if self._font is None:
|
||||
self._font = ImageFont.truetype(bundled_font_path, self._font_size)
|
||||
else:
|
||||
# Only fall back to PIL's default font if bundled font is not available
|
||||
self._font = ImageFont.load_default()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@ -1,11 +1,17 @@
|
||||
"""
|
||||
Layout and alignment options for the pyWebLayout library.
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Alignment(Enum):
|
||||
"""
|
||||
Enum for alignment options used in layout and rendering.
|
||||
"""
|
||||
LEFT = 1
|
||||
CENTER = 2
|
||||
RIGHT = 3
|
||||
TOP = 4
|
||||
BOTTOM = 5
|
||||
JUSTIFY = 6
|
||||
|
||||
|
||||
|
||||
@ -244,7 +244,7 @@ class TestLine(unittest.TestCase):
|
||||
halign=Alignment.LEFT
|
||||
)
|
||||
|
||||
# Create a word to add
|
||||
# Create a word to add
|
||||
|
||||
for i in range(100):
|
||||
word = Word(text="AAAAAAAA", style=self.style)
|
||||
@ -254,7 +254,7 @@ class TestLine(unittest.TestCase):
|
||||
success, overflow_part = line.add_word(word)
|
||||
# If successful, the word should be added
|
||||
if overflow_part:
|
||||
self.assertEqual(overflow_part.text , "AAAA")
|
||||
self.assertEqual(overflow_part.text , "AA")
|
||||
return
|
||||
|
||||
self.assertFalse(True)
|
||||
|
||||
@ -74,7 +74,7 @@ def main():
|
||||
try:
|
||||
print("Running tests with coverage...")
|
||||
os.system("python -m coverage erase") # Clear old coverage data
|
||||
os.system("python -m coverage run --source=pyWebLayout -m unittest tests.test_abstract_inline -v")
|
||||
os.system("python -m coverage run --source=pyWebLayout -m unittest tests -v")
|
||||
os.system("python -m coverage xml -o coverage.xml")
|
||||
os.system("python -m coverage report --include='pyWebLayout/abstract/inline.py'")
|
||||
print("✓ Fresh coverage data generated")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user