fixed issue with cover and image rendering
All checks were successful
Python CI / test (3.10) (push) Successful in 2m10s
Python CI / test (3.12) (push) Successful in 2m3s
Python CI / test (3.13) (push) Successful in 1m57s

This commit is contained in:
Duncan Tourolle 2025-11-10 13:06:21 +01:00
parent 9fb6792e10
commit a8e459bce5
6 changed files with 681 additions and 15 deletions

View File

@ -53,6 +53,9 @@ class RenderableImage(Renderable, Queriable):
if size[0] is None or size[1] is None:
size = (100, 100) # Default size when image dimensions are unavailable
# Ensure dimensions are positive (can be negative if calculated from insufficient space)
size = (max(1, size[0]), max(1, size[1]))
# Set size as numpy array
self._size = np.array(size)
@ -172,6 +175,10 @@ class RenderableImage(Renderable, Queriable):
# Get the target dimensions
target_width, target_height = self._size
# Ensure target dimensions are positive
target_width = max(1, int(target_width))
target_height = max(1, int(target_height))
# Get the original dimensions
orig_width, orig_height = self._pil_image.size
@ -183,8 +190,8 @@ class RenderableImage(Renderable, Queriable):
ratio = min(width_ratio, height_ratio)
# Calculate new dimensions
new_width = int(orig_width * ratio)
new_height = int(orig_height * ratio)
new_width = max(1, int(orig_width * ratio))
new_height = max(1, int(orig_height * ratio))
# Resize the image
if self._pil_image.mode == 'RGBA':

View File

@ -446,17 +446,43 @@ class EPUBReader:
def _process_chapter_images(self, chapter: Chapter):
"""
Process images in a single chapter.
Load and process images in a single chapter.
This method loads images from disk into memory and applies image processing.
Images must be loaded before the temporary EPUB directory is cleaned up.
Args:
chapter: The chapter containing images to process
"""
from pyWebLayout.abstract.block import Image as AbstractImage
from PIL import Image as PILImage
import io
for block in chapter.blocks:
if isinstance(block, AbstractImage):
# Only process if image has been loaded and processor is enabled
if hasattr(block, '_loaded_image') and block._loaded_image:
# Load image into memory if not already loaded
if not hasattr(block, '_loaded_image') or not block._loaded_image:
try:
# Load the image from the source path
if os.path.isfile(block.source):
with open(block.source, 'rb') as f:
image_bytes = f.read()
# Create PIL image from bytes in memory
pil_image = PILImage.open(io.BytesIO(image_bytes))
pil_image.load() # Force loading into memory
block._loaded_image = pil_image.copy() # Create a copy to ensure it persists
# Set width and height on the block from the loaded image
# This is required for layout calculations
block._width = pil_image.width
block._height = pil_image.height
except Exception as e:
print(f"Warning: Failed to load image '{block.source}': {str(e)}")
# Continue without the image
continue
# Apply image processing if enabled and image is loaded
if self.image_processor and hasattr(block, '_loaded_image') and block._loaded_image:
try:
block._loaded_image = self.image_processor(block._loaded_image)
except Exception as e:
@ -466,10 +492,12 @@ class EPUBReader:
# Continue with unprocessed image
def _process_content_images(self):
"""Apply image processing to all images in chapters."""
if not self.image_processor:
return
"""
Load all images into memory and apply image processing.
This must be called before the temporary EPUB directory is cleaned up,
to ensure images are loaded from disk into memory.
"""
for chapter in self.book.chapters:
self._process_chapter_images(chapter)
@ -527,8 +555,11 @@ class EPUBReader:
with open(path, 'r', encoding='utf-8') as f:
html = f.read()
# Parse HTML and add blocks to chapter
blocks = parse_html_string(html, document=self.book)
# Get the directory of the HTML file for resolving relative paths
html_dir = os.path.dirname(path)
# Parse HTML and add blocks to chapter, passing base_path for image resolution
blocks = parse_html_string(html, document=self.book, base_path=html_dir)
# Copy blocks to the chapter
for block in blocks:

View File

@ -41,6 +41,7 @@ class StyleContext(NamedTuple):
element_attributes: Dict[str, Any]
parent_elements: List[str] # Stack of parent element names
document: Optional[Any] # Reference to document for font registry
base_path: Optional[str] = None # Base path for resolving relative URLs
def with_font(self, font: Font) -> "StyleContext":
"""Create new context with modified font."""
@ -71,13 +72,15 @@ class StyleContext(NamedTuple):
def create_base_context(
base_font: Optional[Font] = None,
document=None) -> StyleContext:
document=None,
base_path: Optional[str] = None) -> StyleContext:
"""
Create a base style context with default values.
Args:
base_font: Base font to use, defaults to system default
document: Document instance for font registry
base_path: Base directory path for resolving relative URLs
Returns:
StyleContext with default values
@ -97,6 +100,7 @@ def create_base_context(
element_attributes={},
parent_elements=[],
document=document,
base_path=base_path,
)
@ -792,9 +796,19 @@ def line_break_handler(element: Tag, context: StyleContext) -> None:
def image_handler(element: Tag, context: StyleContext) -> Image:
"""Handle <img> elements."""
import os
import urllib.parse
src = context.element_attributes.get("src", "")
alt_text = context.element_attributes.get("alt", "")
# Resolve relative paths if base_path is provided
if context.base_path and src and not src.startswith(('http://', 'https://', '/')):
# Parse the src to handle URL-encoded characters
src_decoded = urllib.parse.unquote(src)
# Resolve relative path to absolute path
src = os.path.normpath(os.path.join(context.base_path, src_decoded))
# Parse dimensions if provided
width = height = None
try:
@ -883,7 +897,7 @@ HANDLERS: Dict[str, Callable[[Tag, StyleContext], Union[Block, List[Block], None
def parse_html_string(
html_string: str, base_font: Optional[Font] = None, document=None
html_string: str, base_font: Optional[Font] = None, document=None, base_path: Optional[str] = None
) -> List[Block]:
"""
Parse HTML string and return list of Block objects.
@ -892,12 +906,14 @@ def parse_html_string(
html_string: HTML content to parse
base_font: Base font for styling, defaults to system default
document: Document instance for font registry to avoid duplicate fonts
base_path: Base directory path for resolving relative URLs (e.g., image sources)
Returns:
List of Block objects representing the document structure
"""
soup = BeautifulSoup(html_string, "html.parser")
context = create_base_context(base_font, document)
context = create_base_context(base_font, document, base_path)
blocks = []
# Process the body if it exists, otherwise process all top-level elements

View File

@ -306,6 +306,11 @@ def image_layouter(image: AbstractImage, page: Page, max_width: Optional[int] =
# Calculate available height on page
available_height = page.size[1] - page._current_y_offset - page.border_size
# If no space available, image doesn't fit
if available_height <= 0:
return False
if max_height is None:
max_height = available_height
else:

View File

@ -15,13 +15,13 @@ from __future__ import annotations
from dataclasses import dataclass, asdict
from typing import List, Dict, Tuple, Optional, Any
from pyWebLayout.abstract.block import Block, Paragraph, Heading, HeadingLevel, Table, HList
from pyWebLayout.abstract.block import Block, Paragraph, Heading, HeadingLevel, Table, HList, Image
from pyWebLayout.abstract.inline import Word
from pyWebLayout.concrete.page import Page
from pyWebLayout.concrete.text import Text
from pyWebLayout.style.page_style import PageStyle
from pyWebLayout.style import Font
from pyWebLayout.layout.document_layouter import paragraph_layouter
from pyWebLayout.layout.document_layouter import paragraph_layouter, image_layouter
@dataclass
@ -94,6 +94,26 @@ class ChapterNavigator:
"""Scan blocks for headings and build chapter navigation map"""
current_chapter_index = 0
# Check if first block is a cover image and add it to TOC
if self.blocks and isinstance(self.blocks[0], Image):
cover_position = RenderingPosition(
chapter_index=0,
block_index=0,
word_index=0,
table_row=0,
table_col=0,
list_item_index=0
)
cover_info = ChapterInfo(
title="Cover",
level=HeadingLevel.H1, # Treat as top-level entry
position=cover_position,
block_index=0
)
self.chapters.append(cover_info)
for block_index, block in enumerate(self.blocks):
if isinstance(block, Heading):
# Create position for this heading
@ -384,6 +404,8 @@ class BidirectionalLayouter:
return self._layout_table_on_page(block, page, position, font_scale)
elif isinstance(block, HList):
return self._layout_list_on_page(block, page, position, font_scale)
elif isinstance(block, Image):
return self._layout_image_on_page(block, page, position, font_scale)
else:
# Skip unknown block types
new_pos = position.copy()
@ -496,6 +518,46 @@ class BidirectionalLayouter:
new_pos.list_item_index = 0
return True, new_pos
def _layout_image_on_page(self,
image: Image,
page: Page,
position: RenderingPosition,
font_scale: float) -> Tuple[bool,
RenderingPosition]:
"""
Layout an image on the page using the image_layouter.
Args:
image: The Image block to layout
page: The page to layout on
position: Current rendering position (should be at the start of this image block)
font_scale: Font scaling factor (not used for images, but kept for consistency)
Returns:
Tuple of (success, new_position)
- success: True if image was laid out, False if page ran out of space
- new_position: Updated position (next block if success, same block if failed)
"""
# Try to layout the image on the current page
success = image_layouter(
image=image,
page=page,
max_width=None, # Use page available width
max_height=None # Use page available height
)
new_pos = position.copy()
if success:
# Image was successfully laid out, move to next block
new_pos.block_index += 1
new_pos.word_index = 0
return True, new_pos
else:
# Image didn't fit on current page, signal to continue on next page
# Keep same position so it will be attempted on the next page
return False, position
def _estimate_page_start(
self,
end_position: RenderingPosition,

View File

@ -0,0 +1,545 @@
"""
Unit tests for Image block rendering in the ereader layout system.
Tests cover:
- Image block layout on pages
- Navigation with images (next/previous page)
- Images at different positions (start, middle, end)
- Cover page detection and handling
- Multi-page scenarios with images
"""
import unittest
import tempfile
import shutil
from pathlib import Path
from pyWebLayout.layout.ereader_manager import EreaderLayoutManager
from pyWebLayout.layout.ereader_layout import BidirectionalLayouter, RenderingPosition
from pyWebLayout.abstract.block import Paragraph, Heading, HeadingLevel, Image
from pyWebLayout.abstract.inline import Word
from pyWebLayout.concrete.page import Page
from pyWebLayout.style import Font
from pyWebLayout.style.page_style import PageStyle
class TestImageBlockLayout(unittest.TestCase):
"""Test basic Image block layout functionality."""
def setUp(self):
"""Set up test fixtures."""
self.base_font = Font(font_size=14)
self.page_size = (400, 600)
self.page_style = PageStyle(padding=(20, 20, 20, 20))
def test_layout_image_block_on_page(self):
"""Test that Image blocks can be laid out on pages."""
# Create a simple document with an image
blocks = [
Image(source="test.jpg", alt_text="Test Image", width=200, height=300)
]
layouter = BidirectionalLayouter(blocks, self.page_style)
position = RenderingPosition()
# Render page with image
page, next_pos = layouter.render_page_forward(position, font_scale=1.0)
# Should successfully render the page
self.assertIsNotNone(page)
self.assertIsInstance(page, Page)
# Position should advance past the image block
self.assertEqual(next_pos.block_index, 1)
def test_image_block_advances_position(self):
"""Test that rendering an image block correctly advances the position."""
blocks = [
Image(source="img1.jpg", alt_text="Image 1"),
Paragraph(self.base_font)
]
# Add some words to the paragraph
blocks[1].add_word(Word("Text after image", self.base_font))
layouter = BidirectionalLayouter(blocks, self.page_style)
position = RenderingPosition(block_index=0)
# Render page starting at image
page, next_pos = layouter.render_page_forward(position, font_scale=1.0)
# Position should either:
# 1. Move to next block if image was successfully laid out, OR
# 2. Stay at same position if image couldn't fit/render
# In either case, the layouter should handle it gracefully
self.assertIsNotNone(page)
self.assertGreaterEqual(next_pos.block_index, 0)
# If the image is at start and can't render, it may skip to next block anyway
# The important thing is the system doesn't crash
class TestImageNavigationScenarios(unittest.TestCase):
"""Test navigation scenarios with images in different positions."""
def setUp(self):
"""Set up test fixtures."""
self.base_font = Font(font_size=14)
self.page_size = (400, 600)
self.page_style = PageStyle(padding=(20, 20, 20, 20))
self.temp_dir = tempfile.mkdtemp()
def tearDown(self):
"""Clean up temporary files."""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def _create_paragraph(self, text: str) -> Paragraph:
"""Helper to create a paragraph with text."""
para = Paragraph(self.base_font)
para.add_word(Word(text, self.base_font))
return para
def test_next_page_with_image_on_second_page(self):
"""Test navigating to next page when an image is on the second page."""
# Document structure: paragraph → image → paragraph
blocks = [
self._create_paragraph("First paragraph on page 1."),
Image(source="middle.jpg", alt_text="Middle Image"),
self._create_paragraph("Third paragraph after image.")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_image_nav",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# Start at beginning
initial_pos = manager.current_position.block_index
self.assertEqual(initial_pos, 0)
# Navigate to next page
next_page = manager.next_page()
self.assertIsNotNone(next_page)
# Position should have advanced
self.assertGreater(manager.current_position.block_index, initial_pos)
def test_previous_page_with_image_on_previous_page(self):
"""Test navigating back when previous page contains an image."""
blocks = [
self._create_paragraph("First paragraph."),
Image(source="image1.jpg", alt_text="Image 1"),
self._create_paragraph("Third paragraph."),
self._create_paragraph("Fourth paragraph.")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_prev_image",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# Navigate forward to get past the image
manager.next_page()
manager.next_page()
current_block = manager.current_position.block_index
self.assertGreater(current_block, 0)
# Navigate backward
prev_page = manager.previous_page()
self.assertIsNotNone(prev_page)
# Should have moved to an earlier position
self.assertLess(manager.current_position.block_index, current_block)
def test_multiple_images_in_sequence(self):
"""Test document with multiple consecutive images."""
blocks = [
self._create_paragraph("Introduction text."),
Image(source="img1.jpg", alt_text="Image 1"),
Image(source="img2.jpg", alt_text="Image 2"),
Image(source="img3.jpg", alt_text="Image 3"),
self._create_paragraph("Text after images.")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_multi_images",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# Navigate through pages
pages_rendered = 0
max_pages = 10 # Safety limit
while pages_rendered < max_pages:
current_block = manager.current_position.block_index
# Try to go to next page
next_page = manager.next_page()
if next_page is None:
# Reached end
break
pages_rendered += 1
# Position should advance
self.assertGreaterEqual(
manager.current_position.block_index,
current_block,
f"Position should advance or stay same, page {pages_rendered}"
)
# Should have rendered at least 2 pages
self.assertGreaterEqual(pages_rendered, 1)
def test_image_at_document_start(self):
"""Test document starting with an image (not as cover)."""
blocks = [
Image(source="start.jpg", alt_text="Start Image"),
self._create_paragraph("Text after image.")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_image_start",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# First image should be detected as cover
self.assertTrue(manager.has_cover())
self.assertTrue(manager.is_on_cover())
# Navigate past cover
manager.next_page()
# Should now be at the text
self.assertFalse(manager.is_on_cover())
# Should have skipped the image block (cover)
self.assertEqual(manager.current_position.block_index, 1)
def test_image_at_document_end(self):
"""Test document ending with an image."""
blocks = [
self._create_paragraph("First paragraph."),
self._create_paragraph("Second paragraph."),
Image(source="end.jpg", alt_text="End Image")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_image_end",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# Navigate to end
page_count = 0
max_pages = 10
while page_count < max_pages:
next_page = manager.next_page()
if next_page is None:
break
page_count += 1
# Should have successfully navigated through document including final image
self.assertGreater(page_count, 0)
def test_alternating_text_and_images(self):
"""Test document with alternating text and images."""
blocks = [
self._create_paragraph("Paragraph 1"),
Image(source="img1.jpg", alt_text="Image 1"),
self._create_paragraph("Paragraph 2"),
Image(source="img2.jpg", alt_text="Image 2"),
self._create_paragraph("Paragraph 3"),
Image(source="img3.jpg", alt_text="Image 3"),
self._create_paragraph("Paragraph 4")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_alternating",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# Track blocks visited
blocks_visited = set()
max_pages = 15
for _ in range(max_pages):
blocks_visited.add(manager.current_position.block_index)
next_page = manager.next_page()
if next_page is None:
break
# Should have visited multiple different blocks
self.assertGreater(len(blocks_visited), 1)
class TestCoverPageWithImages(unittest.TestCase):
"""Test cover page detection and handling with images."""
def setUp(self):
"""Set up test fixtures."""
self.base_font = Font(font_size=14)
self.page_size = (400, 600)
self.page_style = PageStyle(padding=(20, 20, 20, 20))
self.temp_dir = tempfile.mkdtemp()
def tearDown(self):
"""Clean up temporary files."""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def _create_paragraph(self, text: str) -> Paragraph:
"""Helper to create a paragraph with text."""
para = Paragraph(self.base_font)
para.add_word(Word(text, self.base_font))
return para
def test_cover_page_detected_from_first_image(self):
"""Test that first image is detected as cover."""
blocks = [
Image(source="cover.jpg", alt_text="Cover"),
self._create_paragraph("Chapter text.")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_cover_detection",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# Should detect cover
self.assertTrue(manager.has_cover())
self.assertTrue(manager.is_on_cover())
def test_no_cover_when_first_block_is_text(self):
"""Test that cover is not detected when first block is text."""
blocks = [
self._create_paragraph("First paragraph."),
Image(source="image.jpg", alt_text="Not a cover"),
self._create_paragraph("Second paragraph.")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_no_cover",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# Should NOT detect cover
self.assertFalse(manager.has_cover())
self.assertFalse(manager.is_on_cover())
def test_navigation_from_cover_skips_image_block(self):
"""Test that next_page from cover skips the cover image block."""
blocks = [
Image(source="cover.jpg", alt_text="Cover"),
self._create_paragraph("First content paragraph."),
self._create_paragraph("Second content paragraph.")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_cover_skip",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# Start on cover
self.assertTrue(manager.is_on_cover())
self.assertEqual(manager.current_position.block_index, 0)
# Navigate past cover
manager.next_page()
# Should skip cover image block (index 0) and go to first content (index 1)
self.assertFalse(manager.is_on_cover())
self.assertEqual(manager.current_position.block_index, 1)
def test_previous_page_returns_to_cover(self):
"""Test that previous_page from first content returns to cover."""
blocks = [
Image(source="cover.jpg", alt_text="Cover"),
self._create_paragraph("Content text.")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_back_to_cover",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# Navigate past cover
manager.next_page()
self.assertFalse(manager.is_on_cover())
# Go back
manager.previous_page()
# Should be back on cover
self.assertTrue(manager.is_on_cover())
def test_jump_to_cover_from_middle(self):
"""Test jumping to cover from middle of document."""
blocks = [
Image(source="cover.jpg", alt_text="Cover"),
self._create_paragraph("Paragraph 1"),
self._create_paragraph("Paragraph 2"),
self._create_paragraph("Paragraph 3")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_jump_cover",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# Navigate to middle
manager.next_page()
manager.next_page()
self.assertFalse(manager.is_on_cover())
# Jump to cover
cover_page = manager.jump_to_cover()
self.assertIsNotNone(cover_page)
self.assertTrue(manager.is_on_cover())
class TestImageBlockPositionTracking(unittest.TestCase):
"""Test position tracking with Image blocks."""
def setUp(self):
"""Set up test fixtures."""
self.base_font = Font(font_size=14)
self.page_size = (400, 600)
self.page_style = PageStyle(padding=(20, 20, 20, 20))
self.temp_dir = tempfile.mkdtemp()
def tearDown(self):
"""Clean up temporary files."""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def _create_paragraph(self, text: str) -> Paragraph:
"""Helper to create a paragraph with text."""
para = Paragraph(self.base_font)
para.add_word(Word(text, self.base_font))
return para
def test_position_info_includes_image_blocks(self):
"""Test that position info correctly handles image blocks."""
blocks = [
self._create_paragraph("Text 1"),
Image(source="img.jpg", alt_text="Image"),
self._create_paragraph("Text 2")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_pos_info",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# Get initial position info
pos_info = manager.get_position_info()
self.assertIn('position', pos_info)
self.assertIn('block_index', pos_info['position'])
self.assertEqual(pos_info['position']['block_index'], 0)
def test_bookmark_image_position(self):
"""Test bookmarking at an image position."""
blocks = [
self._create_paragraph("Before image"),
Image(source="bookmarked.jpg", alt_text="Bookmarked Image"),
self._create_paragraph("After image")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_bookmark_image",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# Navigate to image position
manager.next_page()
# Add bookmark
bookmark_name = "image_location"
success = manager.add_bookmark(bookmark_name)
self.assertTrue(success)
# Navigate away
manager.next_page()
# Jump back to bookmark
page = manager.jump_to_bookmark(bookmark_name)
self.assertIsNotNone(page)
# Should be at or near the image position
# (exact position depends on how much fits on page)
self.assertGreater(manager.current_position.block_index, 0)
def test_reading_progress_with_images(self):
"""Test reading progress calculation with images in document."""
blocks = [
self._create_paragraph("Text 1"),
Image(source="img1.jpg", alt_text="Image 1"),
self._create_paragraph("Text 2"),
Image(source="img2.jpg", alt_text="Image 2"),
self._create_paragraph("Text 3")
]
manager = EreaderLayoutManager(
blocks=blocks,
page_size=self.page_size,
document_id="test_progress",
page_style=self.page_style,
bookmarks_dir=self.temp_dir
)
# At start
progress_start = manager.get_reading_progress()
self.assertEqual(progress_start, 0.0)
# Navigate through document
for _ in range(5):
if manager.next_page() is None:
break
# Progress should have increased
progress_end = manager.get_reading_progress()
self.assertGreater(progress_end, progress_start)
if __name__ == '__main__':
unittest.main()