simplification of fonts

2025-07-12 17:49:58 +02:00 · 2025-07-12 17:49:58 +02:00 · 36281be77a
commit 36281be77a
parent b1c4a1c125
8 changed files with 363 additions and 54 deletions
--- a/examples/epub_page_renderer.py
+++ b/examples/epub_page_renderer.py
@ -0,0 +1,345 @@
+#!/usr/bin/env python3
+"""
+Simple EPUB page renderer tool.
+
+This tool uses the pyWebLayout epub_reader and typesetting modules to:
+1. Load an EPUB file
+2. Render the first X pages according to command line arguments
+3. Save the pages as PNG images
+
+Usage:
+    python epub_page_renderer.py book.epub --pages 5 --output-dir rendered_pages
+"""
+
+import os
+import sys
+import argparse
+from pathlib import Path
+from typing import Optional
+
+# Add the parent directory to sys.path to import pyWebLayout
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+try:
+    from pyWebLayout.io.readers.epub_reader import read_epub
+    from pyWebLayout.layout.document_pagination import DocumentPaginator
+    from pyWebLayout.concrete.page import Page
+    from pyWebLayout.style.fonts import Font
+    from pyWebLayout.style.layout import Alignment
+    from PIL import Image, ImageDraw
+except ImportError as e:
+    print(f"Error importing required modules: {e}")
+    print("Make sure pyWebLayout is properly installed and PIL is available")
+    sys.exit(1)
+
+
+def render_page_to_image(page: Page) -> Image.Image:
+    """
+    Render a Page object to a PIL Image using pyWebLayout's built-in rendering.
+    
+    Args:
+        page: The Page object to render
+        
+    Returns:
+        PIL Image object
+    """
+    try:
+        # Use the Page's built-in render method
+        rendered_image = page.render()
+        if isinstance(rendered_image, Image.Image):
+            return rendered_image
+        else:
+            # If render() doesn't return a PIL Image, create error image
+            error_image = Image.new('RGB', page._size, 'white')
+            draw = ImageDraw.Draw(error_image)
+            draw.text((20, 20), "Error: Page.render() did not return PIL Image", fill='red')
+            return error_image
+            
+    except Exception as e:
+        # Create error image if rendering fails
+        error_image = Image.new('RGB', page._size, 'white')
+        draw = ImageDraw.Draw(error_image)
+        draw.text((20, 20), f"Rendering error: {str(e)}", fill='red')
+        print(f"Warning: Error rendering page: {e}")
+        return error_image
+
+
+def extract_text_from_page(page: Page) -> str:
+    """
+    Extract text content from a Page object for verification purposes.
+    
+    Args:
+        page: The Page object to extract text from
+        
+    Returns:
+        String containing the page's text content
+    """
+    text_lines = []
+    text_lines.append(f"=== PAGE CONTENT ===")
+    text_lines.append("")
+    
+    try:
+        # Recursively extract text from page children
+        def extract_from_element(element, indent_level=0):
+            indent = "  " * indent_level
+            
+            # Import abstract block types
+            from pyWebLayout.abstract.block import Paragraph, Heading, HList, Table, Image as AbstractImage
+            
+            # Handle abstract block objects first
+            if isinstance(element, Paragraph):
+                # Extract text from paragraph
+                paragraph_text = extract_text_from_paragraph(element)
+                if paragraph_text:
+                    text_lines.append(f"{indent}PARAGRAPH: {paragraph_text}")
+            
+            elif isinstance(element, Heading):
+                # Extract text from heading
+                heading_text = extract_text_from_paragraph(element)
+                if heading_text:
+                    text_lines.append(f"{indent}HEADING: {heading_text}")
+            
+            elif isinstance(element, HList):
+                text_lines.append(f"{indent}LIST:")
+                # Extract text from list items
+                try:
+                    for item in element.items():
+                        item_text = extract_text_from_paragraph(item)
+                        if item_text:
+                            text_lines.append(f"{indent}  - {item_text}")
+                except:
+                    text_lines.append(f"{indent}  (List content extraction failed)")
+            
+            elif isinstance(element, Table):
+                text_lines.append(f"{indent}[TABLE]")
+            
+            elif isinstance(element, AbstractImage):
+                alt_text = getattr(element, 'alt_text', '')
+                src = getattr(element, 'src', 'Unknown')
+                text_lines.append(f"{indent}[IMAGE: {alt_text or src}]")
+            
+            # Handle containers with children
+            elif hasattr(element, '_children') and element._children:
+                for child in element._children:
+                    extract_from_element(child, indent_level + 1)
+            
+            # Handle text elements
+            elif hasattr(element, 'text'):
+                text = str(element.text).strip()
+                if text:
+                    text_lines.append(f"{indent}{text}")
+            
+            # Handle lines with text objects
+            elif hasattr(element, '_text_objects') and element._text_objects:
+                line_text = []
+                for text_obj in element._text_objects:
+                    if hasattr(text_obj, 'text'):
+                        line_text.append(str(text_obj.text))
+                if line_text:
+                    text_lines.append(f"{indent}{' '.join(line_text)}")
+            
+            # Handle other object types by showing their class name
+            else:
+                class_name = element.__class__.__name__
+                text_lines.append(f"{indent}[{class_name}]")
+        
+        # Helper function to extract text from paragraph-like objects
+        def extract_text_from_paragraph(para_obj):
+            words = []
+            try:
+                # Try to get words from the paragraph
+                if hasattr(para_obj, 'words') and callable(para_obj.words):
+                    for _, word in para_obj.words():
+                        if hasattr(word, 'text'):
+                            words.append(word.text)
+                        else:
+                            words.append(str(word))
+                elif hasattr(para_obj, '_words'):
+                    # Direct access to words list
+                    for word in para_obj._words:
+                        if hasattr(word, 'text'):
+                            words.append(word.text)
+                        else:
+                            words.append(str(word))
+            except Exception as e:
+                return f"(Text extraction error: {str(e)})"
+            
+            return ' '.join(words) if words else "(No text)"
+        
+        # Extract text from page children
+        if hasattr(page, '_children'):
+            for child in page._children:
+                extract_from_element(child)
+        
+        # If no text was extracted, add a note
+        if len(text_lines) <= 2:  # Only header and empty line
+            text_lines.append("(No text content found)")
+        
+    except Exception as e:
+        text_lines.append(f"Error extracting text: {str(e)}")
+        import traceback
+        text_lines.append(traceback.format_exc())
+    
+    return "\n".join(text_lines)
+
+
+def main():
+    """Main function to handle command line arguments and process the EPUB."""
+    parser = argparse.ArgumentParser(
+        description='Render EPUB pages to images using pyWebLayout',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python epub_page_renderer.py book.epub --pages 5
+  python epub_page_renderer.py book.epub --pages 10 --output-dir my_output --width 600 --height 800
+        """
+    )
+    
+    parser.add_argument(
+        'epub_file',
+        help='Path to the EPUB file to render'
+    )
+    
+    parser.add_argument(
+        '--pages', '-p',
+        type=int,
+        default=5,
+        help='Number of pages to render (default: 5)'
+    )
+    
+    parser.add_argument(
+        '--output-dir', '-o',
+        default='rendered_pages',
+        help='Output directory for rendered images (default: rendered_pages)'
+    )
+    
+    parser.add_argument(
+        '--width', '-w',
+        type=int,
+        default=800,
+        help='Page width in pixels (default: 800)'
+    )
+    
+    parser.add_argument(
+        '--height', '-t',
+        type=int,
+        default=1000,
+        help='Page height in pixels (default: 1000)'
+    )
+    
+    parser.add_argument(
+        '--margin', '-m',
+        type=int,
+        default=40,
+        help='Page margin in pixels (default: 40)'
+    )
+    
+    args = parser.parse_args()
+    
+    # Validate arguments
+    if not os.path.exists(args.epub_file):
+        print(f"Error: EPUB file '{args.epub_file}' not found")
+        return 1
+    
+    if args.pages <= 0:
+        print("Error: Number of pages must be positive")
+        return 1
+    
+    # Create output directory
+    try:
+        os.makedirs(args.output_dir, exist_ok=True)
+    except OSError as e:
+        print(f"Error creating output directory: {e}")
+        return 1
+    
+    print(f"Loading EPUB file: {args.epub_file}")
+    
+    # Load the EPUB file
+    try:
+        book = read_epub(args.epub_file)
+        print(f"Successfully loaded EPUB: {book.get_title() or 'Unknown Title'}")
+        
+        # Print book information
+        author = book.get_metadata('AUTHOR')
+        if author:
+            print(f"Author: {author}")
+        
+        print(f"Chapters: {len(book.chapters) if hasattr(book, 'chapters') else 'Unknown'}")
+        
+    except Exception as e:
+        print(f"Error loading EPUB file: {e}")
+        return 1
+    
+    # Set up pagination
+    page_size = (args.width, args.height)
+    margins = (args.margin, args.margin, args.margin, args.margin)  # top, right, bottom, left
+    
+    print(f"Setting up pagination with page size {page_size} and margins {margins}")
+    
+    try:
+        paginator = DocumentPaginator(
+            document=book,
+            page_size=page_size,
+            margins=margins,
+            spacing=5,
+            halign=Alignment.LEFT
+        )
+    except Exception as e:
+        print(f"Error setting up paginator: {e}")
+        return 1
+    
+    # Render pages
+    print(f"Rendering {args.pages} pages...")
+    
+    try:
+        # Generate pages
+        pages = paginator.paginate(max_pages=args.pages)
+        
+        if not pages:
+            print("No pages were generated. The book might be empty or there might be an issue with pagination.")
+            return 1
+        
+        print(f"Generated {len(pages)} pages")
+        
+        # Render each page to an image and extract text
+        for i, page in enumerate(pages):
+            print(f"Rendering page {i + 1}/{len(pages)}...")
+            
+            try:
+                # Create image from page using pyWebLayout's built-in rendering
+                image = render_page_to_image(page)
+                
+                # Save the image
+                output_filename = f"page_{i + 1:03d}.png"
+                output_path = os.path.join(args.output_dir, output_filename)
+                image.save(output_path, 'PNG')
+                
+                # Extract and save text content for verification
+                page_text = extract_text_from_page(page)
+                text_filename = f"page_{i + 1:03d}.txt"
+                text_path = os.path.join(args.output_dir, text_filename)
+                with open(text_path, 'w', encoding='utf-8') as f:
+                    f.write(page_text)
+                
+                print(f"Saved: {output_path} and {text_path}")
+                
+            except Exception as e:
+                print(f"Error rendering page {i + 1}: {e}")
+                continue
+        
+        print(f"\nCompleted! Rendered {len(pages)} pages to {args.output_dir}")
+        
+        # Show pagination progress
+        if hasattr(paginator, 'get_progress'):
+            progress = paginator.get_progress() * 100
+            print(f"Progress through book: {progress:.1f}%")
+        
+    except Exception as e:
+        print(f"Error during pagination/rendering: {e}")
+        return 1
+    
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/pyWebLayout/abstract/inline.py
+++ b/pyWebLayout/abstract/inline.py
@ -270,7 +270,7 @@ class FormattedSpan:
        return word


-class LineBreak:
+class LineBreak():
    """
    A line break element that forces a new line within text content.
    While this is an inline element that can occur within paragraphs,
--- a/pyWebLayout/style/init.py
+++ b/pyWebLayout/style/init.py
@ -9,7 +9,7 @@ This package contains styling-related components including:
 """

 # Import alignment options
-from pyWebLayout.style.alignment import Alignment
+from pyWebLayout.style.layout import Alignment

 # Import font-related classes
 from pyWebLayout.style.fonts import (
--- a/pyWebLayout/style/alignment.py
+++ b/pyWebLayout/style/alignment.py
@ -1,16 +0,0 @@
-"""
-Alignment options for text and elements in the pyWebLayout library.
-"""
-
-from enum import Enum
-
-class Alignment(Enum):
-    """
-    Enum for alignment options used in layout and rendering.
-    """
-    LEFT = 1
-    CENTER = 2
-    RIGHT = 3
-    TOP = 4
-    BOTTOM = 5
-    JUSTIFY = 6
--- a/pyWebLayout/style/fonts.py
+++ b/pyWebLayout/style/fonts.py
@ -77,45 +77,19 @@ class Font:
        """Load the font using PIL's ImageFont with consistent bundled font"""
        try:
            if self._font_path:
+                # Use specified font path
                self._font = ImageFont.truetype(
                    self._font_path, 
                    self._font_size
                )
            else:
-                # Try bundled font first for consistency across environments
+                # Use bundled font for consistency across environments
                bundled_font_path = self._get_bundled_font_path()
                
-                font_candidates = []
                if bundled_font_path:
-                    font_candidates.append(bundled_font_path)
-                
-                # Fallback to system fonts if bundled font is not available
-                font_candidates.extend([
-                    # Linux fonts
-                    "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
-                    "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
-                    "/usr/share/fonts/TTF/DejaVuSans.ttf",
-                    "/System/Library/Fonts/Helvetica.ttc",  # macOS
-                    "C:/Windows/Fonts/arial.ttf",  # Windows
-                    "C:/Windows/Fonts/calibri.ttf",  # Windows
-                    # Fallback to default
-                    None
-                ])
-                
-                self._font = None
-                for font_path in font_candidates:
-                    try:
-                        if font_path is None:
-                            # Use PIL's default font as last resort
-                            self._font = ImageFont.load_default()
-                            break
-                        else:
-                            self._font = ImageFont.truetype(font_path, self._font_size)
-                            break
-                    except (OSError, IOError):
-                        continue
-                
-                if self._font is None:
+                    self._font = ImageFont.truetype(bundled_font_path, self._font_size)
+                else:
+                    # Only fall back to PIL's default font if bundled font is not available
                    self._font = ImageFont.load_default()
                    
        except Exception as e:
--- a/pyWebLayout/style/layout.py
+++ b/pyWebLayout/style/layout.py
@ -1,11 +1,17 @@
+"""
+Layout and alignment options for the pyWebLayout library.
+"""
+
 from enum import Enum

+
 class Alignment(Enum):
+    """
+    Enum for alignment options used in layout and rendering.
+    """
    LEFT = 1
    CENTER = 2
    RIGHT = 3
    TOP = 4
    BOTTOM = 5
    JUSTIFY = 6
-
-
--- a/tests/concrete/test_concrete_text.py
+++ b/tests/concrete/test_concrete_text.py
@ -244,7 +244,7 @@ class TestLine(unittest.TestCase):
            halign=Alignment.LEFT
        )
        
-        # Create a word to add
+        # Create a word to add 

        for i in range(100):
            word = Word(text="AAAAAAAA", style=self.style)
@ -254,7 +254,7 @@ class TestLine(unittest.TestCase):
            success, overflow_part = line.add_word(word)
            # If successful, the word should be added
            if overflow_part:
-                self.assertEqual(overflow_part.text , "AAAA")
+                self.assertEqual(overflow_part.text , "AA")
                return
            
        self.assertFalse(True)
--- a/update_coverage_gutters.py
+++ b/update_coverage_gutters.py
@ -74,7 +74,7 @@ def main():
    try:
        print("Running tests with coverage...")
        os.system("python -m coverage erase")  # Clear old coverage data
-        os.system("python -m coverage run --source=pyWebLayout -m unittest tests.test_abstract_inline -v")
+        os.system("python -m coverage run --source=pyWebLayout -m unittest tests -v")
        os.system("python -m coverage xml -o coverage.xml")
        os.system("python -m coverage report --include='pyWebLayout/abstract/inline.py'")
        print("✓ Fresh coverage data generated")