diff --git a/examples/epub_page_renderer.py b/examples/epub_page_renderer.py new file mode 100755 index 0000000..08e27ea --- /dev/null +++ b/examples/epub_page_renderer.py @@ -0,0 +1,345 @@ +#!/usr/bin/env python3 +""" +Simple EPUB page renderer tool. + +This tool uses the pyWebLayout epub_reader and typesetting modules to: +1. Load an EPUB file +2. Render the first X pages according to command line arguments +3. Save the pages as PNG images + +Usage: + python epub_page_renderer.py book.epub --pages 5 --output-dir rendered_pages +""" + +import os +import sys +import argparse +from pathlib import Path +from typing import Optional + +# Add the parent directory to sys.path to import pyWebLayout +sys.path.insert(0, str(Path(__file__).parent.parent)) + +try: + from pyWebLayout.io.readers.epub_reader import read_epub + from pyWebLayout.layout.document_pagination import DocumentPaginator + from pyWebLayout.concrete.page import Page + from pyWebLayout.style.fonts import Font + from pyWebLayout.style.layout import Alignment + from PIL import Image, ImageDraw +except ImportError as e: + print(f"Error importing required modules: {e}") + print("Make sure pyWebLayout is properly installed and PIL is available") + sys.exit(1) + + +def render_page_to_image(page: Page) -> Image.Image: + """ + Render a Page object to a PIL Image using pyWebLayout's built-in rendering. + + Args: + page: The Page object to render + + Returns: + PIL Image object + """ + try: + # Use the Page's built-in render method + rendered_image = page.render() + if isinstance(rendered_image, Image.Image): + return rendered_image + else: + # If render() doesn't return a PIL Image, create error image + error_image = Image.new('RGB', page._size, 'white') + draw = ImageDraw.Draw(error_image) + draw.text((20, 20), "Error: Page.render() did not return PIL Image", fill='red') + return error_image + + except Exception as e: + # Create error image if rendering fails + error_image = Image.new('RGB', page._size, 'white') + draw = ImageDraw.Draw(error_image) + draw.text((20, 20), f"Rendering error: {str(e)}", fill='red') + print(f"Warning: Error rendering page: {e}") + return error_image + + +def extract_text_from_page(page: Page) -> str: + """ + Extract text content from a Page object for verification purposes. + + Args: + page: The Page object to extract text from + + Returns: + String containing the page's text content + """ + text_lines = [] + text_lines.append(f"=== PAGE CONTENT ===") + text_lines.append("") + + try: + # Recursively extract text from page children + def extract_from_element(element, indent_level=0): + indent = " " * indent_level + + # Import abstract block types + from pyWebLayout.abstract.block import Paragraph, Heading, HList, Table, Image as AbstractImage + + # Handle abstract block objects first + if isinstance(element, Paragraph): + # Extract text from paragraph + paragraph_text = extract_text_from_paragraph(element) + if paragraph_text: + text_lines.append(f"{indent}PARAGRAPH: {paragraph_text}") + + elif isinstance(element, Heading): + # Extract text from heading + heading_text = extract_text_from_paragraph(element) + if heading_text: + text_lines.append(f"{indent}HEADING: {heading_text}") + + elif isinstance(element, HList): + text_lines.append(f"{indent}LIST:") + # Extract text from list items + try: + for item in element.items(): + item_text = extract_text_from_paragraph(item) + if item_text: + text_lines.append(f"{indent} - {item_text}") + except: + text_lines.append(f"{indent} (List content extraction failed)") + + elif isinstance(element, Table): + text_lines.append(f"{indent}[TABLE]") + + elif isinstance(element, AbstractImage): + alt_text = getattr(element, 'alt_text', '') + src = getattr(element, 'src', 'Unknown') + text_lines.append(f"{indent}[IMAGE: {alt_text or src}]") + + # Handle containers with children + elif hasattr(element, '_children') and element._children: + for child in element._children: + extract_from_element(child, indent_level + 1) + + # Handle text elements + elif hasattr(element, 'text'): + text = str(element.text).strip() + if text: + text_lines.append(f"{indent}{text}") + + # Handle lines with text objects + elif hasattr(element, '_text_objects') and element._text_objects: + line_text = [] + for text_obj in element._text_objects: + if hasattr(text_obj, 'text'): + line_text.append(str(text_obj.text)) + if line_text: + text_lines.append(f"{indent}{' '.join(line_text)}") + + # Handle other object types by showing their class name + else: + class_name = element.__class__.__name__ + text_lines.append(f"{indent}[{class_name}]") + + # Helper function to extract text from paragraph-like objects + def extract_text_from_paragraph(para_obj): + words = [] + try: + # Try to get words from the paragraph + if hasattr(para_obj, 'words') and callable(para_obj.words): + for _, word in para_obj.words(): + if hasattr(word, 'text'): + words.append(word.text) + else: + words.append(str(word)) + elif hasattr(para_obj, '_words'): + # Direct access to words list + for word in para_obj._words: + if hasattr(word, 'text'): + words.append(word.text) + else: + words.append(str(word)) + except Exception as e: + return f"(Text extraction error: {str(e)})" + + return ' '.join(words) if words else "(No text)" + + # Extract text from page children + if hasattr(page, '_children'): + for child in page._children: + extract_from_element(child) + + # If no text was extracted, add a note + if len(text_lines) <= 2: # Only header and empty line + text_lines.append("(No text content found)") + + except Exception as e: + text_lines.append(f"Error extracting text: {str(e)}") + import traceback + text_lines.append(traceback.format_exc()) + + return "\n".join(text_lines) + + +def main(): + """Main function to handle command line arguments and process the EPUB.""" + parser = argparse.ArgumentParser( + description='Render EPUB pages to images using pyWebLayout', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python epub_page_renderer.py book.epub --pages 5 + python epub_page_renderer.py book.epub --pages 10 --output-dir my_output --width 600 --height 800 + """ + ) + + parser.add_argument( + 'epub_file', + help='Path to the EPUB file to render' + ) + + parser.add_argument( + '--pages', '-p', + type=int, + default=5, + help='Number of pages to render (default: 5)' + ) + + parser.add_argument( + '--output-dir', '-o', + default='rendered_pages', + help='Output directory for rendered images (default: rendered_pages)' + ) + + parser.add_argument( + '--width', '-w', + type=int, + default=800, + help='Page width in pixels (default: 800)' + ) + + parser.add_argument( + '--height', '-t', + type=int, + default=1000, + help='Page height in pixels (default: 1000)' + ) + + parser.add_argument( + '--margin', '-m', + type=int, + default=40, + help='Page margin in pixels (default: 40)' + ) + + args = parser.parse_args() + + # Validate arguments + if not os.path.exists(args.epub_file): + print(f"Error: EPUB file '{args.epub_file}' not found") + return 1 + + if args.pages <= 0: + print("Error: Number of pages must be positive") + return 1 + + # Create output directory + try: + os.makedirs(args.output_dir, exist_ok=True) + except OSError as e: + print(f"Error creating output directory: {e}") + return 1 + + print(f"Loading EPUB file: {args.epub_file}") + + # Load the EPUB file + try: + book = read_epub(args.epub_file) + print(f"Successfully loaded EPUB: {book.get_title() or 'Unknown Title'}") + + # Print book information + author = book.get_metadata('AUTHOR') + if author: + print(f"Author: {author}") + + print(f"Chapters: {len(book.chapters) if hasattr(book, 'chapters') else 'Unknown'}") + + except Exception as e: + print(f"Error loading EPUB file: {e}") + return 1 + + # Set up pagination + page_size = (args.width, args.height) + margins = (args.margin, args.margin, args.margin, args.margin) # top, right, bottom, left + + print(f"Setting up pagination with page size {page_size} and margins {margins}") + + try: + paginator = DocumentPaginator( + document=book, + page_size=page_size, + margins=margins, + spacing=5, + halign=Alignment.LEFT + ) + except Exception as e: + print(f"Error setting up paginator: {e}") + return 1 + + # Render pages + print(f"Rendering {args.pages} pages...") + + try: + # Generate pages + pages = paginator.paginate(max_pages=args.pages) + + if not pages: + print("No pages were generated. The book might be empty or there might be an issue with pagination.") + return 1 + + print(f"Generated {len(pages)} pages") + + # Render each page to an image and extract text + for i, page in enumerate(pages): + print(f"Rendering page {i + 1}/{len(pages)}...") + + try: + # Create image from page using pyWebLayout's built-in rendering + image = render_page_to_image(page) + + # Save the image + output_filename = f"page_{i + 1:03d}.png" + output_path = os.path.join(args.output_dir, output_filename) + image.save(output_path, 'PNG') + + # Extract and save text content for verification + page_text = extract_text_from_page(page) + text_filename = f"page_{i + 1:03d}.txt" + text_path = os.path.join(args.output_dir, text_filename) + with open(text_path, 'w', encoding='utf-8') as f: + f.write(page_text) + + print(f"Saved: {output_path} and {text_path}") + + except Exception as e: + print(f"Error rendering page {i + 1}: {e}") + continue + + print(f"\nCompleted! Rendered {len(pages)} pages to {args.output_dir}") + + # Show pagination progress + if hasattr(paginator, 'get_progress'): + progress = paginator.get_progress() * 100 + print(f"Progress through book: {progress:.1f}%") + + except Exception as e: + print(f"Error during pagination/rendering: {e}") + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/pyWebLayout/abstract/inline.py b/pyWebLayout/abstract/inline.py index f3cd2b5..9dc64c4 100644 --- a/pyWebLayout/abstract/inline.py +++ b/pyWebLayout/abstract/inline.py @@ -270,7 +270,7 @@ class FormattedSpan: return word -class LineBreak: +class LineBreak(): """ A line break element that forces a new line within text content. While this is an inline element that can occur within paragraphs, diff --git a/pyWebLayout/style/__init__.py b/pyWebLayout/style/__init__.py index ce737f1..6dcebda 100644 --- a/pyWebLayout/style/__init__.py +++ b/pyWebLayout/style/__init__.py @@ -9,7 +9,7 @@ This package contains styling-related components including: """ # Import alignment options -from pyWebLayout.style.alignment import Alignment +from pyWebLayout.style.layout import Alignment # Import font-related classes from pyWebLayout.style.fonts import ( diff --git a/pyWebLayout/style/alignment.py b/pyWebLayout/style/alignment.py deleted file mode 100644 index 98d0dd1..0000000 --- a/pyWebLayout/style/alignment.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -Alignment options for text and elements in the pyWebLayout library. -""" - -from enum import Enum - -class Alignment(Enum): - """ - Enum for alignment options used in layout and rendering. - """ - LEFT = 1 - CENTER = 2 - RIGHT = 3 - TOP = 4 - BOTTOM = 5 - JUSTIFY = 6 diff --git a/pyWebLayout/style/fonts.py b/pyWebLayout/style/fonts.py index d62f797..dd3c2d1 100644 --- a/pyWebLayout/style/fonts.py +++ b/pyWebLayout/style/fonts.py @@ -77,45 +77,19 @@ class Font: """Load the font using PIL's ImageFont with consistent bundled font""" try: if self._font_path: + # Use specified font path self._font = ImageFont.truetype( self._font_path, self._font_size ) else: - # Try bundled font first for consistency across environments + # Use bundled font for consistency across environments bundled_font_path = self._get_bundled_font_path() - font_candidates = [] if bundled_font_path: - font_candidates.append(bundled_font_path) - - # Fallback to system fonts if bundled font is not available - font_candidates.extend([ - # Linux fonts - "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf", - "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", - "/usr/share/fonts/TTF/DejaVuSans.ttf", - "/System/Library/Fonts/Helvetica.ttc", # macOS - "C:/Windows/Fonts/arial.ttf", # Windows - "C:/Windows/Fonts/calibri.ttf", # Windows - # Fallback to default - None - ]) - - self._font = None - for font_path in font_candidates: - try: - if font_path is None: - # Use PIL's default font as last resort - self._font = ImageFont.load_default() - break - else: - self._font = ImageFont.truetype(font_path, self._font_size) - break - except (OSError, IOError): - continue - - if self._font is None: + self._font = ImageFont.truetype(bundled_font_path, self._font_size) + else: + # Only fall back to PIL's default font if bundled font is not available self._font = ImageFont.load_default() except Exception as e: diff --git a/pyWebLayout/style/layout.py b/pyWebLayout/style/layout.py index 45b545f..6bff99d 100644 --- a/pyWebLayout/style/layout.py +++ b/pyWebLayout/style/layout.py @@ -1,11 +1,17 @@ +""" +Layout and alignment options for the pyWebLayout library. +""" + from enum import Enum + class Alignment(Enum): + """ + Enum for alignment options used in layout and rendering. + """ LEFT = 1 CENTER = 2 RIGHT = 3 TOP = 4 BOTTOM = 5 JUSTIFY = 6 - - diff --git a/tests/concrete/test_concrete_text.py b/tests/concrete/test_concrete_text.py index d5c95d7..6499c41 100644 --- a/tests/concrete/test_concrete_text.py +++ b/tests/concrete/test_concrete_text.py @@ -244,7 +244,7 @@ class TestLine(unittest.TestCase): halign=Alignment.LEFT ) - # Create a word to add + # Create a word to add for i in range(100): word = Word(text="AAAAAAAA", style=self.style) @@ -254,7 +254,7 @@ class TestLine(unittest.TestCase): success, overflow_part = line.add_word(word) # If successful, the word should be added if overflow_part: - self.assertEqual(overflow_part.text , "AAAA") + self.assertEqual(overflow_part.text , "AA") return self.assertFalse(True) diff --git a/update_coverage_gutters.py b/update_coverage_gutters.py index f36e5fb..31370b1 100644 --- a/update_coverage_gutters.py +++ b/update_coverage_gutters.py @@ -74,7 +74,7 @@ def main(): try: print("Running tests with coverage...") os.system("python -m coverage erase") # Clear old coverage data - os.system("python -m coverage run --source=pyWebLayout -m unittest tests.test_abstract_inline -v") + os.system("python -m coverage run --source=pyWebLayout -m unittest tests -v") os.system("python -m coverage xml -o coverage.xml") os.system("python -m coverage report --include='pyWebLayout/abstract/inline.py'") print("✓ Fresh coverage data generated")