pyWebLayout/debug_epub_pagination.py

135 lines
5.8 KiB
Python

#!/usr/bin/env python3
"""
Debug script to test EPUB pagination step by step
"""
from pyWebLayout.io.readers.epub_reader import EPUBReader
from pyWebLayout.concrete.page import Page
from pyWebLayout.style.fonts import Font
from pyWebLayout.abstract.document import Document, Chapter, Book
from pyWebLayout.io.readers.html_extraction import parse_html_string
def debug_epub_content():
"""Debug what content we're getting from EPUB"""
# Try to load a test EPUB (if available)
epub_files = ['pg1342.epub', 'pg174-images-3.epub']
for epub_file in epub_files:
try:
print(f"\n=== Testing {epub_file} ===")
# Load EPUB
reader = EPUBReader(epub_file)
document = reader.read()
print(f"Document type: {type(document)}")
print(f"Document title: {getattr(document, 'title', 'No title')}")
if isinstance(document, Book):
print(f"Book title: {document.get_title()}")
print(f"Book author: {document.get_author()}")
print(f"Number of chapters: {len(document.chapters) if document.chapters else 0}")
# Get all blocks
all_blocks = []
if document.chapters:
for i, chapter in enumerate(document.chapters[:2]): # Just first 2 chapters
print(f"\nChapter {i+1}: {chapter.title}")
print(f" Number of blocks: {len(chapter.blocks)}")
for j, block in enumerate(chapter.blocks[:3]): # First 3 blocks
print(f" Block {j+1}: {type(block).__name__}")
if hasattr(block, 'words') and callable(block.words):
words = list(block.words())
word_count = len(words)
if word_count > 0:
first_words = ' '.join([word.text for _, word in words[:10]])
print(f" Words: {word_count} (first 10: {first_words}...)")
else:
print(f" No words found")
else:
print(f" No words method")
all_blocks.extend(chapter.blocks)
print(f"\nTotal blocks across all chapters: {len(all_blocks)}")
# Test block conversion
print(f"\n=== Testing Block Conversion ===")
page = Page(size=(700, 550))
converted_count = 0
for i, block in enumerate(all_blocks[:10]): # Test first 10 blocks
try:
renderable = page._convert_block_to_renderable(block)
if renderable:
print(f"Block {i+1}: {type(block).__name__} -> {type(renderable).__name__}")
if hasattr(renderable, '_size'):
print(f" Size: {renderable._size}")
converted_count += 1
else:
print(f"Block {i+1}: {type(block).__name__} -> None")
except Exception as e:
print(f"Block {i+1}: {type(block).__name__} -> ERROR: {e}")
print(f"Successfully converted {converted_count}/{min(10, len(all_blocks))} blocks")
# Test page filling
print(f"\n=== Testing Page Filling ===")
test_page = Page(size=(700, 550))
blocks_added = 0
for i, block in enumerate(all_blocks[:20]): # Try to add first 20 blocks
try:
renderable = test_page._convert_block_to_renderable(block)
if renderable:
test_page.add_child(renderable)
blocks_added += 1
print(f"Added block {i+1}: {type(block).__name__}")
# Try layout
test_page.layout()
# Calculate height
max_bottom = 0
for child in test_page._children:
if hasattr(child, '_origin') and hasattr(child, '_size'):
child_bottom = child._origin[1] + child._size[1]
max_bottom = max(max_bottom, child_bottom)
print(f" Current page height: {max_bottom}")
if max_bottom > 510: # Page would be too full
print(f" Page full after {blocks_added} blocks")
break
except Exception as e:
print(f"Error adding block {i+1}: {e}")
import traceback
traceback.print_exc()
break
print(f"Final page has {blocks_added} blocks")
# Try to render the page
print(f"\n=== Testing Page Rendering ===")
try:
rendered_image = test_page.render()
print(f"Page rendered successfully: {rendered_image.size}")
except Exception as e:
print(f"Page rendering failed: {e}")
import traceback
traceback.print_exc()
break # Stop after first successful file
except Exception as e:
print(f"Error with {epub_file}: {e}")
continue
print("\n=== Debugging Complete ===")
if __name__ == "__main__":
debug_epub_content()