#!/usr/bin/env python3 """ Debug script to test EPUB pagination step by step """ from pyWebLayout.io.readers.epub_reader import EPUBReader from pyWebLayout.concrete.page import Page from pyWebLayout.style.fonts import Font from pyWebLayout.abstract.document import Document, Chapter, Book from pyWebLayout.io.readers.html_extraction import parse_html_string def debug_epub_content(): """Debug what content we're getting from EPUB""" # Try to load a test EPUB (if available) epub_files = ['pg1342.epub', 'pg174-images-3.epub'] for epub_file in epub_files: try: print(f"\n=== Testing {epub_file} ===") # Load EPUB reader = EPUBReader(epub_file) document = reader.read() print(f"Document type: {type(document)}") print(f"Document title: {getattr(document, 'title', 'No title')}") if isinstance(document, Book): print(f"Book title: {document.get_title()}") print(f"Book author: {document.get_author()}") print(f"Number of chapters: {len(document.chapters) if document.chapters else 0}") # Get all blocks all_blocks = [] if document.chapters: for i, chapter in enumerate(document.chapters[:2]): # Just first 2 chapters print(f"\nChapter {i+1}: {chapter.title}") print(f" Number of blocks: {len(chapter.blocks)}") for j, block in enumerate(chapter.blocks[:3]): # First 3 blocks print(f" Block {j+1}: {type(block).__name__}") if hasattr(block, 'words') and callable(block.words): words = list(block.words()) word_count = len(words) if word_count > 0: first_words = ' '.join([word.text for _, word in words[:10]]) print(f" Words: {word_count} (first 10: {first_words}...)") else: print(f" No words found") else: print(f" No words method") all_blocks.extend(chapter.blocks) print(f"\nTotal blocks across all chapters: {len(all_blocks)}") # Test block conversion print(f"\n=== Testing Block Conversion ===") page = Page(size=(700, 550)) converted_count = 0 for i, block in enumerate(all_blocks[:10]): # Test first 10 blocks try: renderable = page._convert_block_to_renderable(block) if renderable: print(f"Block {i+1}: {type(block).__name__} -> {type(renderable).__name__}") if hasattr(renderable, '_size'): print(f" Size: {renderable._size}") converted_count += 1 else: print(f"Block {i+1}: {type(block).__name__} -> None") except Exception as e: print(f"Block {i+1}: {type(block).__name__} -> ERROR: {e}") print(f"Successfully converted {converted_count}/{min(10, len(all_blocks))} blocks") # Test page filling print(f"\n=== Testing Page Filling ===") test_page = Page(size=(700, 550)) blocks_added = 0 for i, block in enumerate(all_blocks[:20]): # Try to add first 20 blocks try: renderable = test_page._convert_block_to_renderable(block) if renderable: test_page.add_child(renderable) blocks_added += 1 print(f"Added block {i+1}: {type(block).__name__}") # Try layout test_page.layout() # Calculate height max_bottom = 0 for child in test_page._children: if hasattr(child, '_origin') and hasattr(child, '_size'): child_bottom = child._origin[1] + child._size[1] max_bottom = max(max_bottom, child_bottom) print(f" Current page height: {max_bottom}") if max_bottom > 510: # Page would be too full print(f" Page full after {blocks_added} blocks") break except Exception as e: print(f"Error adding block {i+1}: {e}") import traceback traceback.print_exc() break print(f"Final page has {blocks_added} blocks") # Try to render the page print(f"\n=== Testing Page Rendering ===") try: rendered_image = test_page.render() print(f"Page rendered successfully: {rendered_image.size}") except Exception as e: print(f"Page rendering failed: {e}") import traceback traceback.print_exc() break # Stop after first successful file except Exception as e: print(f"Error with {epub_file}: {e}") continue print("\n=== Debugging Complete ===") if __name__ == "__main__": debug_epub_content()