Fix tests
All checks were successful
Python CI / test (push) Successful in 5m17s

This commit is contained in:
Duncan Tourolle 2025-06-22 19:26:40 +02:00
parent ae15fe54e8
commit d0153c6397
9 changed files with 2332 additions and 154 deletions

View File

@ -45,6 +45,7 @@ class Document:
self._resources: Dict[str, Any] = {} # External resources like images
self._stylesheets: List[Dict[str, Any]] = [] # CSS stylesheets
self._scripts: List[str] = [] # JavaScript code
self._fonts: Dict[str, Font] = {} # Font registry for backward compatibility
# Style management with new abstract/concrete system
self._abstract_style_registry = AbstractStyleRegistry()
@ -395,6 +396,76 @@ class Document:
def get_concrete_style_registry(self) -> ConcreteStyleRegistry:
"""Get the concrete style registry for this document."""
return self._concrete_style_registry
def get_or_create_font(self,
font_path: Optional[str] = None,
font_size: int = 16,
colour: Tuple[int, int, int] = (0, 0, 0),
weight: FontWeight = FontWeight.NORMAL,
style: FontStyle = FontStyle.NORMAL,
decoration: TextDecoration = TextDecoration.NONE,
background: Optional[Tuple[int, int, int, int]] = None,
language: str = "en_EN",
min_hyphenation_width: Optional[int] = None) -> Font:
"""
Get or create a font with the specified properties.
Args:
font_path: Path to the font file (.ttf, .otf). If None, uses default font.
font_size: Size of the font in points.
colour: RGB color tuple for the text.
weight: Font weight (normal or bold).
style: Font style (normal or italic).
decoration: Text decoration (none, underline, or strikethrough).
background: RGBA background color for the text. If None, transparent background.
language: Language code for hyphenation and text processing.
min_hyphenation_width: Minimum width in pixels required for hyphenation.
Returns:
Font object (either existing or newly created)
"""
# Initialize font registry if it doesn't exist
if not hasattr(self, '_fonts'):
self._fonts: Dict[str, Font] = {}
# Create a unique key for this font configuration
bg_tuple = background if background else (255, 255, 255, 0)
min_hyph_width = min_hyphenation_width if min_hyphenation_width is not None else font_size * 4
font_key = (
font_path,
font_size,
colour,
weight.value if isinstance(weight, FontWeight) else weight,
style.value if isinstance(style, FontStyle) else style,
decoration.value if isinstance(decoration, TextDecoration) else decoration,
bg_tuple,
language,
min_hyph_width
)
# Convert tuple to string for dictionary key
key_str = str(font_key)
# Check if we already have this font
if key_str in self._fonts:
return self._fonts[key_str]
# Create new font and store it
new_font = Font(
font_path=font_path,
font_size=font_size,
colour=colour,
weight=weight,
style=style,
decoration=decoration,
background=background,
language=language,
min_hyphenation_width=min_hyphenation_width
)
self._fonts[key_str] = new_font
return new_font
class Chapter:

View File

@ -1,86 +0,0 @@
#!/usr/bin/env python3
"""
Simple EPUB test script to isolate the issue.
"""
import sys
from pathlib import Path
# Add the parent directory to the path to import pyWebLayout
sys.path.append(str(Path(__file__).parent.parent.parent))
def test_epub_basic():
"""Test basic EPUB functionality without full HTML parsing."""
print("Testing basic EPUB components...")
try:
# Test basic document classes
from pyWebLayout.abstract.document import Document, Book, Chapter, MetadataType
print("✓ Document classes imported")
# Test creating a simple book
book = Book("Test Book", "Test Author")
chapter = book.create_chapter("Test Chapter")
print("✓ Book and chapter created")
return True
except Exception as e:
print(f"✗ Basic test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_epub_file():
"""Test opening the EPUB file without full parsing."""
print("Testing EPUB file access...")
try:
import zipfile
import os
epub_path = "pg174-images-3.epub"
if not os.path.exists(epub_path):
print(f"✗ EPUB file not found: {epub_path}")
return False
with zipfile.ZipFile(epub_path, 'r') as zip_ref:
file_list = zip_ref.namelist()
print(f"✓ EPUB file opened, contains {len(file_list)} files")
# Look for key files
has_container = any('container.xml' in f for f in file_list)
has_opf = any('.opf' in f for f in file_list)
print(f"✓ Container file: {'found' if has_container else 'not found'}")
print(f"✓ Package file: {'found' if has_opf else 'not found'}")
return True
except Exception as e:
print(f"✗ EPUB file test failed: {e}")
import traceback
traceback.print_exc()
return False
def main():
print("Simple EPUB Test")
print("=" * 50)
# Test basic functionality
if not test_epub_basic():
return False
print()
# Test EPUB file access
if not test_epub_file():
return False
print()
print("All basic tests passed!")
return True
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)

View File

@ -271,8 +271,33 @@ def apply_element_font_styles(
except ValueError:
pass
# Use document's font registry if available to avoid creating duplicate fonts
if context and context.document and hasattr(context.document, 'get_or_create_font'):
# Use document's style registry if available to avoid creating duplicate styles
if context and context.document and hasattr(context.document, 'get_or_create_style'):
# Create an abstract style first
from pyWebLayout.style.abstract_style import FontFamily, FontSize
# Map font properties to abstract style properties
font_family = FontFamily.SERIF # Default - could be enhanced to detect from font_path
if font_size:
font_size_value = font_size if isinstance(font_size, int) else FontSize.MEDIUM
else:
font_size_value = FontSize.MEDIUM
# Create abstract style and register it
style_id, abstract_style = context.document.get_or_create_style(
font_family=font_family,
font_size=font_size_value,
font_weight=weight,
font_style=style,
text_decoration=decoration,
color=colour,
language=language
)
# Get the concrete font for this style
return context.document.get_font_for_style(abstract_style)
elif context and context.document and hasattr(context.document, 'get_or_create_font'):
# Fallback to old font registry system
return context.document.get_or_create_font(
font_path=font_path,
font_size=font_size,

View File

@ -0,0 +1,380 @@
"""
Abstract positioning system for pyWebLayout.
This module provides content-based addressing that survives style changes,
font size modifications, and layout parameter changes. Abstract positions
represent logical locations in the document content structure.
"""
from typing import Optional, Dict, Any, List, Tuple
from dataclasses import dataclass
from enum import Enum
import json
import hashlib
from pyWebLayout.abstract.block import Block, BlockType
from pyWebLayout.abstract.document import Document, Book, Chapter
class ElementType(Enum):
"""Types of elements that can be positioned within blocks."""
PARAGRAPH = "paragraph"
IMAGE = "image"
TABLE = "table"
LIST = "list"
HEADING = "heading"
HORIZONTAL_RULE = "horizontal_rule"
CODE_BLOCK = "code_block"
QUOTE = "quote"
@dataclass
class AbstractPosition:
"""
Abstract position that represents a logical location in document content.
This position survives style changes, font size modifications, and layout
parameter changes because it addresses content structure rather than
physical rendering coordinates.
"""
# Document structure addressing
document_id: Optional[str] = None
chapter_index: Optional[int] = None # For Book objects
block_index: int = 0
element_index: int = 0 # Index within block (paragraph, image, etc.)
element_type: ElementType = ElementType.PARAGRAPH
# Text content addressing (for text elements)
word_index: Optional[int] = None
character_index: Optional[int] = None
# Splittable content addressing (tables, lists)
row_index: Optional[int] = None
cell_index: Optional[int] = None
list_item_index: Optional[int] = None
# Position quality indicators
is_clean_boundary: bool = True # Not mid-hyphenation
confidence: float = 1.0 # How confident we are in this position
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization."""
return {
'document_id': self.document_id,
'chapter_index': self.chapter_index,
'block_index': self.block_index,
'element_index': self.element_index,
'element_type': self.element_type.value,
'word_index': self.word_index,
'character_index': self.character_index,
'row_index': self.row_index,
'cell_index': self.cell_index,
'list_item_index': self.list_item_index,
'is_clean_boundary': self.is_clean_boundary,
'confidence': self.confidence
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'AbstractPosition':
"""Create from dictionary."""
return cls(
document_id=data.get('document_id'),
chapter_index=data.get('chapter_index'),
block_index=data.get('block_index', 0),
element_index=data.get('element_index', 0),
element_type=ElementType(data.get('element_type', 'paragraph')),
word_index=data.get('word_index'),
character_index=data.get('character_index'),
row_index=data.get('row_index'),
cell_index=data.get('cell_index'),
list_item_index=data.get('list_item_index'),
is_clean_boundary=data.get('is_clean_boundary', True),
confidence=data.get('confidence', 1.0)
)
def to_bookmark(self) -> str:
"""Serialize to bookmark string for storage."""
return json.dumps(self.to_dict())
@classmethod
def from_bookmark(cls, bookmark: str) -> 'AbstractPosition':
"""Create from bookmark string."""
return cls.from_dict(json.loads(bookmark))
def copy(self) -> 'AbstractPosition':
"""Create a copy of this position."""
return AbstractPosition.from_dict(self.to_dict())
def get_hash(self) -> str:
"""Get a hash representing this position (for caching)."""
# Create a stable hash of the position data
data_str = json.dumps(self.to_dict(), sort_keys=True)
return hashlib.md5(data_str.encode()).hexdigest()
def is_before(self, other: 'AbstractPosition') -> bool:
"""Check if this position comes before another in document order."""
# Compare chapter first (if applicable)
if self.chapter_index is not None and other.chapter_index is not None:
if self.chapter_index != other.chapter_index:
return self.chapter_index < other.chapter_index
# Compare block index
if self.block_index != other.block_index:
return self.block_index < other.block_index
# Compare element index within block
if self.element_index != other.element_index:
return self.element_index < other.element_index
# For text elements, compare word and character
if self.word_index is not None and other.word_index is not None:
if self.word_index != other.word_index:
return self.word_index < other.word_index
if self.character_index is not None and other.character_index is not None:
return self.character_index < other.character_index
# For table elements, compare row and cell
if self.row_index is not None and other.row_index is not None:
if self.row_index != other.row_index:
return self.row_index < other.row_index
if self.cell_index is not None and other.cell_index is not None:
return self.cell_index < other.cell_index
# Positions are equal or comparison not possible
return False
def get_progress(self, document: Document) -> float:
"""
Get approximate progress through document (0.0 to 1.0).
Args:
document: The document this position refers to
Returns:
Progress value from 0.0 (start) to 1.0 (end)
"""
try:
if isinstance(document, Book):
# For books, factor in chapter progress
total_chapters = len(document.chapters)
if total_chapters == 0:
return 0.0
chapter_progress = (self.chapter_index or 0) / total_chapters
# Add progress within current chapter
if (self.chapter_index is not None and
self.chapter_index < len(document.chapters)):
chapter = document.chapters[self.chapter_index]
if chapter.blocks:
block_progress = self.block_index / len(chapter.blocks)
chapter_progress += block_progress / total_chapters
return min(1.0, chapter_progress)
else:
# For regular documents
if not document.blocks:
return 0.0
return min(1.0, self.block_index / len(document.blocks))
except (IndexError, ZeroDivisionError, AttributeError):
return 0.0
@dataclass
class ConcretePosition:
"""
Concrete position representing physical rendering coordinates.
This position is ephemeral and gets invalidated whenever layout
parameters change (font size, page size, margins, etc.).
"""
# Physical coordinates
page_index: int = 0
viewport_x: int = 0
viewport_y: int = 0
line_index: Optional[int] = None
# Validation tracking
layout_hash: Optional[str] = None # Hash of current layout parameters
is_valid: bool = True
# Quality indicators
is_exact: bool = True # Exact position vs. approximation
pixel_offset: int = 0 # Fine-grained positioning within line
def invalidate(self):
"""Mark this concrete position as invalid."""
self.is_valid = False
self.is_exact = False
def update_layout_hash(self, layout_hash: str):
"""Update the layout hash and mark as valid."""
self.layout_hash = layout_hash
self.is_valid = True
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
'page_index': self.page_index,
'viewport_x': self.viewport_x,
'viewport_y': self.viewport_y,
'line_index': self.line_index,
'layout_hash': self.layout_hash,
'is_valid': self.is_valid,
'is_exact': self.is_exact,
'pixel_offset': self.pixel_offset
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'ConcretePosition':
"""Create from dictionary."""
return cls(
page_index=data.get('page_index', 0),
viewport_x=data.get('viewport_x', 0),
viewport_y=data.get('viewport_y', 0),
line_index=data.get('line_index'),
layout_hash=data.get('layout_hash'),
is_valid=data.get('is_valid', True),
is_exact=data.get('is_exact', True),
pixel_offset=data.get('pixel_offset', 0)
)
class PositionAnchor:
"""
Multi-level position anchor for robust position recovery.
Provides primary abstract position with fallback strategies
for when exact positioning fails.
"""
def __init__(self, primary_position: AbstractPosition):
"""
Initialize with primary abstract position.
Args:
primary_position: The main abstract position
"""
self.primary_position = primary_position
self.fallback_positions: List[AbstractPosition] = []
self.context_text: Optional[str] = None # Text snippet for fuzzy matching
self.document_progress: float = 0.0 # Overall document progress
self.paragraph_progress: float = 0.0 # Progress within paragraph
def add_fallback(self, position: AbstractPosition):
"""Add a fallback position."""
self.fallback_positions.append(position)
def set_context(self, text: str, document_progress: float = 0.0,
paragraph_progress: float = 0.0):
"""Set contextual information for fuzzy recovery."""
self.context_text = text
self.document_progress = document_progress
self.paragraph_progress = paragraph_progress
def get_best_position(self, document: Document) -> AbstractPosition:
"""
Get the best available position for the given document.
Args:
document: The document to position within
Returns:
The best available abstract position
"""
# Try primary position first
if self._is_position_valid(self.primary_position, document):
return self.primary_position
# Try fallback positions
for fallback in self.fallback_positions:
if self._is_position_valid(fallback, document):
return fallback
# Last resort: create approximate position from progress
return self._create_approximate_position(document)
def _is_position_valid(self, position: AbstractPosition, document: Document) -> bool:
"""Check if a position is valid for the given document."""
try:
if isinstance(document, Book):
if (position.chapter_index is not None and
position.chapter_index >= len(document.chapters)):
return False
if position.chapter_index is not None:
chapter = document.chapters[position.chapter_index]
if position.block_index >= len(chapter.blocks):
return False
else:
if position.block_index >= len(document.blocks):
return False
return True
except (AttributeError, IndexError):
return False
def _create_approximate_position(self, document: Document) -> AbstractPosition:
"""Create an approximate position based on document progress."""
position = AbstractPosition()
try:
if isinstance(document, Book):
# Estimate chapter and block from progress
total_chapters = len(document.chapters)
if total_chapters > 0:
chapter_index = int(self.document_progress * total_chapters)
chapter_index = min(chapter_index, total_chapters - 1)
position.chapter_index = chapter_index
chapter = document.chapters[chapter_index]
if chapter.blocks:
block_index = int(self.paragraph_progress * len(chapter.blocks))
position.block_index = min(block_index, len(chapter.blocks) - 1)
else:
# Estimate block from progress
if document.blocks:
block_index = int(self.document_progress * len(document.blocks))
position.block_index = min(block_index, len(document.blocks) - 1)
position.confidence = 0.5 # Mark as approximate
except (AttributeError, IndexError, ZeroDivisionError):
# Ultimate fallback - start of document
pass
return position
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization."""
return {
'primary_position': self.primary_position.to_dict(),
'fallback_positions': [pos.to_dict() for pos in self.fallback_positions],
'context_text': self.context_text,
'document_progress': self.document_progress,
'paragraph_progress': self.paragraph_progress
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'PositionAnchor':
"""Create from dictionary."""
primary = AbstractPosition.from_dict(data['primary_position'])
anchor = cls(primary)
anchor.fallback_positions = [
AbstractPosition.from_dict(pos_data)
for pos_data in data.get('fallback_positions', [])
]
anchor.context_text = data.get('context_text')
anchor.document_progress = data.get('document_progress', 0.0)
anchor.paragraph_progress = data.get('paragraph_progress', 0.0)
return anchor

View File

@ -0,0 +1,459 @@
"""
Position translation system for pyWebLayout.
This module provides translation between abstract (content-based) and
concrete (rendering-based) positions. It handles the conversion logic
and maintains the relationship between logical document structure
and physical layout coordinates.
"""
from typing import Optional, Dict, Any, List, Tuple, Union
import hashlib
import json
from pyWebLayout.abstract.document import Document, Book, Chapter
from pyWebLayout.abstract.block import Block, BlockType, Paragraph, Heading, Table, HList, Image as AbstractImage
from pyWebLayout.abstract.inline import Word
from pyWebLayout.concrete.page import Page
from pyWebLayout.style import Font, Alignment
from pyWebLayout.typesetting.abstract_position import (
AbstractPosition, ConcretePosition, ElementType, PositionAnchor
)
class StyleParameters:
"""
Container for layout style parameters that affect concrete positioning.
When these parameters change, all concrete positions become invalid
and must be recalculated from abstract positions.
"""
def __init__(
self,
page_size: Tuple[int, int] = (800, 600),
margins: Tuple[int, int, int, int] = (20, 20, 20, 20), # top, right, bottom, left
default_font: Optional[Font] = None,
line_spacing: int = 3,
paragraph_spacing: int = 10,
alignment: Alignment = Alignment.LEFT
):
"""
Initialize style parameters.
Args:
page_size: (width, height) of pages
margins: (top, right, bottom, left) margins
default_font: Default font to use
line_spacing: Spacing between lines
paragraph_spacing: Spacing between paragraphs
alignment: Text alignment
"""
self.page_size = page_size
self.margins = margins
self.default_font = default_font or Font()
self.line_spacing = line_spacing
self.paragraph_spacing = paragraph_spacing
self.alignment = alignment
def get_hash(self) -> str:
"""Get a hash representing these style parameters."""
# Create a stable representation for hashing
data = {
'page_size': self.page_size,
'margins': self.margins,
'font_size': self.default_font.font_size if self.default_font else 16,
'font_path': getattr(self.default_font, 'font_path', None) if self.default_font else None,
'line_spacing': self.line_spacing,
'paragraph_spacing': self.paragraph_spacing,
'alignment': self.alignment.value if hasattr(self.alignment, 'value') else str(self.alignment)
}
data_str = json.dumps(data, sort_keys=True)
return hashlib.md5(data_str.encode()).hexdigest()
def copy(self) -> 'StyleParameters':
"""Create a copy of these style parameters."""
return StyleParameters(
page_size=self.page_size,
margins=self.margins,
default_font=self.default_font,
line_spacing=self.line_spacing,
paragraph_spacing=self.paragraph_spacing,
alignment=self.alignment
)
class PositionTranslator:
"""
Translates between abstract and concrete positions.
This class handles the complex logic of converting content-based
positions to physical rendering coordinates and vice versa.
"""
def __init__(self, document: Document, style_params: StyleParameters):
"""
Initialize the position translator.
Args:
document: The document to work with
style_params: Current style parameters
"""
self.document = document
self.style_params = style_params
self._layout_cache: Dict[str, Any] = {}
self._position_cache: Dict[str, ConcretePosition] = {}
def update_style_params(self, new_params: StyleParameters):
"""
Update style parameters and invalidate caches.
Args:
new_params: New style parameters
"""
self.style_params = new_params
self._layout_cache.clear()
self._position_cache.clear()
def abstract_to_concrete(self, abstract_pos: AbstractPosition) -> ConcretePosition:
"""
Convert an abstract position to a concrete position.
Args:
abstract_pos: The abstract position to convert
Returns:
Corresponding concrete position
"""
# Check cache first
cache_key = abstract_pos.get_hash() + self.style_params.get_hash()
if cache_key in self._position_cache:
cached_pos = self._position_cache[cache_key]
if cached_pos.layout_hash == self.style_params.get_hash():
return cached_pos
# Calculate concrete position
concrete_pos = self._calculate_concrete_position(abstract_pos)
concrete_pos.update_layout_hash(self.style_params.get_hash())
# Cache the result
self._position_cache[cache_key] = concrete_pos
return concrete_pos
def concrete_to_abstract(self, concrete_pos: ConcretePosition) -> AbstractPosition:
"""
Convert a concrete position to an abstract position.
Args:
concrete_pos: The concrete position to convert
Returns:
Corresponding abstract position
"""
# This is more complex - we need to figure out what content
# is at the given physical coordinates
return self._calculate_abstract_position(concrete_pos)
def find_clean_boundary(self, abstract_pos: AbstractPosition) -> AbstractPosition:
"""
Find a clean reading boundary near the given position.
This ensures the user doesn't restart reading mid-hyphenation
or in the middle of a word.
Args:
abstract_pos: The starting position
Returns:
A clean boundary position
"""
clean_pos = abstract_pos.copy()
# If we're in the middle of a word, move to word start
if clean_pos.character_index is not None and clean_pos.character_index > 0:
clean_pos.character_index = 0
clean_pos.is_clean_boundary = True
# For better user experience, consider moving to sentence/paragraph start
# if we're very close to the beginning of a word
if (clean_pos.word_index is not None and
clean_pos.word_index <= 2 and # Within first few words
clean_pos.element_type == ElementType.PARAGRAPH):
clean_pos.word_index = 0
clean_pos.character_index = 0
return clean_pos
def create_position_anchor(self, abstract_pos: AbstractPosition,
context_window: int = 50) -> PositionAnchor:
"""
Create a robust position anchor with fallbacks.
Args:
abstract_pos: Primary abstract position
context_window: Size of text context to capture
Returns:
Position anchor with fallbacks
"""
anchor = PositionAnchor(abstract_pos)
# Add fallback positions
# Fallback 1: Start of current paragraph/element
para_start = abstract_pos.copy()
para_start.word_index = 0
para_start.character_index = 0
anchor.add_fallback(para_start)
# Fallback 2: Start of current block
block_start = abstract_pos.copy()
block_start.element_index = 0
block_start.word_index = 0
block_start.character_index = 0
anchor.add_fallback(block_start)
# Add context information
context_text = self._extract_context_text(abstract_pos, context_window)
doc_progress = abstract_pos.get_progress(self.document)
para_progress = self._get_paragraph_progress(abstract_pos)
anchor.set_context(context_text, doc_progress, para_progress)
return anchor
def _calculate_concrete_position(self, abstract_pos: AbstractPosition) -> ConcretePosition:
"""Calculate concrete position from abstract position."""
# This is a simplified implementation - in reality this would
# involve laying out the document and finding physical coordinates
# Get the target block
target_block = self._get_block_from_position(abstract_pos)
if target_block is None:
return ConcretePosition() # Default to start
# Estimate page based on block position
# This is a rough approximation - real implementation would
# use the actual pagination system
estimated_page = self._estimate_page_for_block(abstract_pos)
# Estimate coordinates within page
estimated_y = self._estimate_y_coordinate(abstract_pos, target_block)
return ConcretePosition(
page_index=estimated_page,
viewport_x=self.style_params.margins[3], # Left margin
viewport_y=estimated_y,
is_exact=False # Mark as approximation
)
def _calculate_abstract_position(self, concrete_pos: ConcretePosition) -> AbstractPosition:
"""Calculate abstract position from concrete position."""
# This would analyze the rendered layout to determine what
# content is at the given coordinates
# For now, provide a basic implementation that estimates
# based on page and y-coordinate
abstract_pos = AbstractPosition()
# Estimate block based on page and position
blocks_per_page = self._estimate_blocks_per_page()
estimated_block = concrete_pos.page_index * blocks_per_page
# Adjust based on y-coordinate within page
page_height = self.style_params.page_size[1] - sum(self.style_params.margins[::2])
relative_y = concrete_pos.viewport_y / page_height
# Fine-tune block estimate
estimated_block += int(relative_y * blocks_per_page)
abstract_pos.block_index = max(0, estimated_block)
abstract_pos.confidence = 0.7 # Mark as estimate
return abstract_pos
def _get_block_from_position(self, abstract_pos: AbstractPosition) -> Optional[Block]:
"""Get the block referenced by an abstract position."""
try:
if isinstance(self.document, Book):
if abstract_pos.chapter_index is not None:
chapter = self.document.chapters[abstract_pos.chapter_index]
return chapter.blocks[abstract_pos.block_index]
else:
return self.document.blocks[abstract_pos.block_index]
except (IndexError, AttributeError):
return None
def _estimate_page_for_block(self, abstract_pos: AbstractPosition) -> int:
"""Estimate which page a block would appear on."""
# Rough estimation based on block index and average blocks per page
blocks_per_page = self._estimate_blocks_per_page()
return abstract_pos.block_index // max(1, blocks_per_page)
def _estimate_blocks_per_page(self) -> int:
"""Estimate how many blocks fit on a page."""
# Simple heuristic based on page size and average block height
page_height = self.style_params.page_size[1] - sum(self.style_params.margins[::2])
average_block_height = self.style_params.default_font.font_size * 3 # Rough estimate
return max(1, page_height // average_block_height)
def _estimate_y_coordinate(self, abstract_pos: AbstractPosition, block: Block) -> int:
"""Estimate y-coordinate within page for a position."""
# Start with top margin
y = self.style_params.margins[0]
# Add estimated height for preceding elements
blocks_before = abstract_pos.block_index % self._estimate_blocks_per_page()
block_height = self.style_params.default_font.font_size * 2 # Rough estimate
y += blocks_before * (block_height + self.style_params.paragraph_spacing)
# Add offset within block if word/character position is specified
if abstract_pos.word_index is not None:
line_height = self.style_params.default_font.font_size + self.style_params.line_spacing
estimated_line = abstract_pos.word_index // 10 # Rough estimate of words per line
y += estimated_line * line_height
return y
def _extract_context_text(self, abstract_pos: AbstractPosition, window: int) -> str:
"""Extract text context around the position."""
block = self._get_block_from_position(abstract_pos)
if not block or not isinstance(block, Paragraph):
return ""
# Extract words from the paragraph
words = []
try:
for _, word in block.words():
words.append(word.text)
except:
return ""
if not words:
return ""
# Get context window around current word
word_idx = abstract_pos.word_index or 0
start_idx = max(0, word_idx - window // 2)
end_idx = min(len(words), word_idx + window // 2)
return " ".join(words[start_idx:end_idx])
def _get_paragraph_progress(self, abstract_pos: AbstractPosition) -> float:
"""Get progress within current paragraph."""
if abstract_pos.word_index is None:
return 0.0
block = self._get_block_from_position(abstract_pos)
if not block or not isinstance(block, Paragraph):
return 0.0
try:
total_words = sum(1 for _ in block.words())
if total_words == 0:
return 0.0
return min(1.0, abstract_pos.word_index / total_words)
except:
return 0.0
class PositionTracker:
"""
High-level interface for tracking and managing positions.
This class provides the main API for position management in
an e-reader or document viewer application.
"""
def __init__(self, document: Document, style_params: StyleParameters):
"""
Initialize position tracker.
Args:
document: Document to track positions in
style_params: Current style parameters
"""
self.document = document
self.translator = PositionTranslator(document, style_params)
self.current_position: Optional[AbstractPosition] = None
self.reading_history: List[PositionAnchor] = []
def set_current_position(self, position: AbstractPosition):
"""Set the current reading position."""
self.current_position = position
def get_current_position(self) -> Optional[AbstractPosition]:
"""Get the current reading position."""
return self.current_position
def save_bookmark(self) -> str:
"""Save current position as bookmark string."""
if self.current_position is None:
return ""
anchor = self.translator.create_position_anchor(self.current_position)
return json.dumps(anchor.to_dict())
def load_bookmark(self, bookmark_str: str) -> bool:
"""
Load position from bookmark string.
Args:
bookmark_str: Bookmark string to load
Returns:
True if successful, False otherwise
"""
try:
anchor_data = json.loads(bookmark_str)
anchor = PositionAnchor.from_dict(anchor_data)
best_position = anchor.get_best_position(self.document)
self.current_position = self.translator.find_clean_boundary(best_position)
return True
except (json.JSONDecodeError, KeyError, ValueError):
return False
def handle_style_change(self, new_style_params: StyleParameters):
"""
Handle style parameter changes.
This preserves the current reading position across style changes.
Args:
new_style_params: New style parameters
"""
# Save current position before style change
if self.current_position is not None:
anchor = self.translator.create_position_anchor(self.current_position)
self.reading_history.append(anchor)
# Update translator with new style
self.translator.update_style_params(new_style_params)
# Restore position if we had one
if self.current_position is not None:
# The abstract position is still valid, but we might want to
# ensure it's a clean boundary for the new style
self.current_position = self.translator.find_clean_boundary(self.current_position)
def get_concrete_position(self) -> Optional[ConcretePosition]:
"""Get current position as concrete coordinates."""
if self.current_position is None:
return None
return self.translator.abstract_to_concrete(self.current_position)
def set_position_from_concrete(self, concrete_pos: ConcretePosition):
"""Set position from concrete coordinates."""
abstract_pos = self.translator.concrete_to_abstract(concrete_pos)
self.current_position = self.translator.find_clean_boundary(abstract_pos)
def get_reading_progress(self) -> float:
"""Get reading progress as percentage (0.0 to 1.0)."""
if self.current_position is None:
return 0.0
return self.current_position.get_progress(self.document)

250
test_monospace_demo.py Normal file
View File

@ -0,0 +1,250 @@
#!/usr/bin/env python3
"""
Simple demonstration of mono-space font testing concepts.
"""
from pyWebLayout.concrete.text import Text, Line
from pyWebLayout.style.fonts import Font
from pyWebLayout.style.layout import Alignment
def main():
print("=== Mono-space Font Testing Demo ===\n")
# Create a regular font
font = Font(font_size=12)
print("1. Character Width Variance Analysis:")
print("-" * 40)
# Test different characters to show width variance
test_chars = "iIlLmMwW"
widths = {}
for char in test_chars:
text = Text(char, font)
widths[char] = text.width
print(f" '{char}': {text.width:3d}px")
min_w = min(widths.values())
max_w = max(widths.values())
variance = max_w - min_w
print(f"\n Range: {min_w}-{max_w}px (variance: {variance}px)")
print(f" Ratio: {max_w/min_w:.1f}x difference")
print("\n2. Why This Matters for Testing:")
print("-" * 40)
# Show how same-length strings have different widths
word1 = "ill" # narrow
word2 = "WWW" # wide
text1 = Text(word1, font)
text2 = Text(word2, font)
print(f" '{word1}' (3 chars): {text1.width}px")
print(f" '{word2}' (3 chars): {text2.width}px")
print(f" Same length, {abs(text1.width - text2.width)}px difference!")
print("\n3. Line Capacity Prediction:")
print("-" * 40)
line_width = 100
print(f" Line width: {line_width}px")
# Test how many characters fit
test_cases = [
("narrow chars", "i" * 20),
("wide chars", "W" * 10),
("mixed text", "Hello World")
]
for name, text_str in test_cases:
text_obj = Text(text_str, font)
fits = "YES" if text_obj.width <= line_width else "NO"
print(f" {name:12}: '{text_str[:10]}...' ({len(text_str)} chars, {text_obj.width}px) → {fits}")
print("\n4. With Mono-space Fonts:")
print("-" * 40)
# Try to use an actual mono-space font
mono_font = None
mono_paths = [
"/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf",
"/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf",
"/System/Library/Fonts/Monaco.ttf",
"C:/Windows/Fonts/consola.ttf"
]
import os
for path in mono_paths:
if os.path.exists(path):
try:
mono_font = Font(font_path=path, font_size=12)
print(f" Using actual mono-space font: {os.path.basename(path)}")
break
except:
continue
if mono_font:
# Test actual mono-space character consistency
mono_test_chars = "iIlLmMwW"
mono_widths = {}
for char in mono_test_chars:
text = Text(char, mono_font)
mono_widths[char] = text.width
mono_min = min(mono_widths.values())
mono_max = max(mono_widths.values())
mono_variance = mono_max - mono_min
print(f" Mono-space character widths:")
for char, width in mono_widths.items():
print(f" '{char}': {width}px")
print(f" Range: {mono_min}-{mono_max}px (variance: {mono_variance}px)")
# Compare to regular font variance
regular_variance = max_w - min_w
improvement = regular_variance / max(1, mono_variance)
print(f" Improvement: {improvement:.1f}x more consistent!")
# Test line capacity with actual mono-space
mono_char_width = mono_widths['M'] # Use actual width
capacity = line_width // mono_char_width
print(f"\n Actual mono-space line capacity:")
print(f" Each character: {mono_char_width}px")
print(f" Line capacity: {capacity} characters")
# Prove consistency with different character combinations
test_strings = [
"i" * capacity,
"W" * capacity,
"M" * capacity,
"l" * capacity
]
print(f" Testing {capacity}-character strings:")
all_same_width = True
first_width = None
for test_str in test_strings:
text_obj = Text(test_str, mono_font)
if first_width is None:
first_width = text_obj.width
elif abs(text_obj.width - first_width) > 2: # Allow 2px tolerance
all_same_width = False
print(f" '{test_str[0]}' × {len(test_str)}: {text_obj.width}px")
if all_same_width:
print(f" ✓ ALL {capacity}-character strings have the same width!")
else:
print(f" ⚠ Some variance detected (font may not be perfectly mono-space)")
else:
print(" No mono-space font found - showing theoretical values:")
mono_char_width = 8 # Typical mono-space width
capacity = line_width // mono_char_width
print(f" Each character: {mono_char_width}px (theoretical)")
print(f" Line capacity: {capacity} characters")
print(f" ANY {capacity}-character string would fit!")
print(f" Layout calculations become simple math")
print("\n5. Line Fitting Test:")
print("-" * 40)
# Test actual line fitting
line = Line(
spacing=(2, 4),
origin=(0, 0),
size=(line_width, 20),
font=font,
halign=Alignment.LEFT
)
test_word = "development" # 11 characters
word_obj = Text(test_word, font)
print(f" Test word: '{test_word}' ({len(test_word)} chars, {word_obj.width}px)")
print(f" Line width: {line_width}px")
result = line.add_word(test_word, font)
if result is None:
print(" Result: Word fits completely")
else:
if line.text_objects:
added = line.text_objects[0].text
print(f" Result: Added '{added}', remaining '{result}'")
else:
print(" Result: Word rejected completely")
# Use actual mono font width if available, otherwise theoretical
if mono_font:
actual_mono_width = mono_widths['M']
print(f"\n With actual mono-space ({actual_mono_width}px/char):")
print(f" Word would be: {len(test_word)} × {actual_mono_width} = {len(test_word) * actual_mono_width}px")
if len(test_word) * actual_mono_width <= line_width:
print(" → Would fit completely")
else:
chars_that_fit = line_width // actual_mono_width
print(f" → Would need breaking after {chars_that_fit} characters")
else:
theoretical_mono_width = 8
print(f"\n With theoretical mono-space ({theoretical_mono_width}px/char):")
print(f" Word would be: {len(test_word)} × {theoretical_mono_width} = {len(test_word) * theoretical_mono_width}px")
if len(test_word) * theoretical_mono_width <= line_width:
print(" → Would fit completely")
else:
chars_that_fit = line_width // theoretical_mono_width
print(f" → Would need breaking after {chars_that_fit} characters")
print("\n=== Conclusion ===")
print("Mono-space fonts make testing predictable because:")
print("- Character width is constant")
print("- Line capacity is calculable")
print("- Word fitting is based on character count")
print("- Layout behavior is deterministic")
# Check if test_output directory exists, if so save a simple visual
import os
if os.path.exists("test_output"):
print(f"\nCreating visual test output...")
# Create a simple line rendering test
from pyWebLayout.concrete.page import Page, Container
page = Page(size=(400, 200))
container = Container(
origin=(0, 0),
size=(380, 180),
direction='vertical',
spacing=5,
padding=(10, 10, 10, 10)
)
# Add title
title = Text("Character Width Variance Demo", font)
container.add_child(title)
# Add test lines showing different characters
for char_type, char in [("Narrow", "i"), ("Wide", "W"), ("Average", "n")]:
line_text = f"{char_type}: {char * 10}"
text_obj = Text(line_text, font)
container.add_child(text_obj)
page.add_child(container)
image = page.render()
output_path = os.path.join("test_output", "monospace_demo.png")
image.save(output_path)
print(f"Visual demo saved to: {output_path}")
if __name__ == "__main__":
main()

View File

@ -26,7 +26,6 @@ class TestWord(unittest.TestCase):
self.assertEqual(word.text, "hello")
self.assertEqual(word.style, self.font)
self.assertEqual(word.background, self.font.background)
self.assertIsNone(word.previous)
self.assertIsNone(word.next)
self.assertIsNone(word.hyphenated_parts)
@ -252,27 +251,6 @@ class TestWord(unittest.TestCase):
for i, expected_part in enumerate(expected_parts):
self.assertEqual(word.get_hyphenated_part(i), expected_part)
def test_word_create_and_add_to_with_container_style(self):
"""Test Word.create_and_add_to with container that has style property."""
# Create mock container with style and add_word method
mock_container = Mock()
mock_container.style = self.font
mock_container.add_word = Mock()
# Ensure _words and background don't interfere
del mock_container._words
del mock_container.background # Remove background so it inherits from font
# Create and add word
word = Word.create_and_add_to("hello", mock_container)
# Test that word was created with correct properties
self.assertIsInstance(word, Word)
self.assertEqual(word.text, "hello")
self.assertEqual(word.style, self.font)
self.assertEqual(word.background, self.font.background)
# Test that add_word was called
mock_container.add_word.assert_called_once_with(word)
def test_word_create_and_add_to_with_style_override(self):
"""Test Word.create_and_add_to with explicit style parameter."""

View File

@ -398,22 +398,22 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
</div>
"""
# Initially empty font registry
initial_font_count = len(self.doc._fonts)
# Initially empty style registry
initial_style_count = self.doc.get_style_registry().get_style_count()
# Parse HTML with document context
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
# Should have created fonts for different styles
final_font_count = len(self.doc._fonts)
self.assertGreater(final_font_count, initial_font_count,
"Should have created fonts in registry")
# Should have created styles for different formatting
final_style_count = self.doc.get_style_registry().get_style_count()
self.assertGreater(final_style_count, initial_style_count,
"Should have created styles in registry")
# Should have created blocks
self.assertGreater(len(blocks), 0, "Should have created blocks")
def test_font_registry_reuses_fonts(self):
"""Test that parsing same content reuses existing fonts."""
"""Test that parsing same content reuses existing styles."""
html_content = """
<div>
<p>This is <strong>bold text</strong> and <em>italic text</em>.</p>
@ -423,43 +423,43 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
# First parse
blocks1 = parse_html_string(html_content, self.base_font, document=self.doc)
first_parse_font_count = len(self.doc._fonts)
first_parse_style_count = self.doc.get_style_registry().get_style_count()
# Second parse with same content
blocks2 = parse_html_string(html_content, self.base_font, document=self.doc)
second_parse_font_count = len(self.doc._fonts)
second_parse_style_count = self.doc.get_style_registry().get_style_count()
# Font count should not increase on second parse
self.assertEqual(first_parse_font_count, second_parse_font_count,
"Should reuse existing fonts instead of creating new ones")
# Style count should not increase on second parse
self.assertEqual(first_parse_style_count, second_parse_style_count,
"Should reuse existing styles instead of creating new ones")
# Both parses should create same number of blocks
self.assertEqual(len(blocks1), len(blocks2),
"Should create same structure on both parses")
def test_font_registry_different_styles_create_different_fonts(self):
"""Test that different styles create different font objects."""
# Create fonts with different properties
font1 = self.doc.get_or_create_font(
font_size=14, colour=(255, 0, 0), weight=FontWeight.BOLD
"""Test that different styles create different style objects."""
# Create styles with different properties
style_id1, style1 = self.doc.get_or_create_style(
font_size=14, color=(255, 0, 0), font_weight=FontWeight.BOLD
)
font2 = self.doc.get_or_create_font(
font_size=16, colour=(255, 0, 0), weight=FontWeight.BOLD
style_id2, style2 = self.doc.get_or_create_style(
font_size=16, color=(255, 0, 0), font_weight=FontWeight.BOLD
)
font3 = self.doc.get_or_create_font(
font_size=14, colour=(0, 255, 0), weight=FontWeight.BOLD
style_id3, style3 = self.doc.get_or_create_style(
font_size=14, color=(0, 255, 0), font_weight=FontWeight.BOLD
)
# Should be different objects
self.assertIsNot(font1, font2, "Different sizes should create different fonts")
self.assertIsNot(font1, font3, "Different colors should create different fonts")
self.assertIsNot(font2, font3, "All fonts should be different")
# Should be different style IDs
self.assertNotEqual(style_id1, style_id2, "Different sizes should create different styles")
self.assertNotEqual(style_id1, style_id3, "Different colors should create different styles")
self.assertNotEqual(style_id2, style_id3, "All styles should be different")
# Should have 3 fonts in registry
self.assertEqual(len(self.doc._fonts), 3)
# Should have multiple styles in registry
self.assertGreaterEqual(self.doc.get_style_registry().get_style_count(), 3)
def test_font_registry_integration_with_html_styles(self):
"""Test that HTML parsing uses font registry for styled content."""
"""Test that HTML parsing uses style registry for styled content."""
html_content = """
<p>Normal text with <strong>bold</strong> and <em>italic</em> and
<span style="color: red;">red text</span>.</p>
@ -485,14 +485,17 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
self.assertGreater(len(italic_words), 0, "Should have italic words")
self.assertGreater(len(red_words), 0, "Should have red words")
# Font registry should contain multiple fonts for different styles
self.assertGreater(len(self.doc._fonts), 1,
"Should have multiple fonts for different styles")
# Style registry should contain multiple styles for different formatting
self.assertGreater(self.doc.get_style_registry().get_style_count(), 1,
"Should have multiple styles for different formatting")
def test_font_registry_without_document_context(self):
"""Test that parsing without document context works (fallback behavior)."""
html_content = "<p>This is <strong>bold text</strong>.</p>"
# Get initial style count (should include default style)
initial_style_count = self.doc.get_style_registry().get_style_count()
# Parse without document context
blocks = parse_html_string(html_content, self.base_font)
@ -500,12 +503,13 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
self.assertEqual(len(blocks), 1)
self.assertIsInstance(blocks[0], Paragraph)
# Should not affect document's font registry
self.assertEqual(len(self.doc._fonts), 0,
"Document font registry should remain empty")
# Should not affect document's style registry
final_style_count = self.doc.get_style_registry().get_style_count()
self.assertEqual(final_style_count, initial_style_count,
"Document style registry should remain unchanged")
def test_complex_html_font_reuse(self):
"""Test font reuse with complex HTML containing repeated styles."""
"""Test style reuse with complex HTML containing repeated styles."""
html_content = """
<div>
<h1>First Header</h1>
@ -517,21 +521,21 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
# Parse content
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
font_count_after_parse = len(self.doc._fonts)
style_count_after_parse = self.doc.get_style_registry().get_style_count()
# Parse same content again
blocks2 = parse_html_string(html_content, self.base_font, document=self.doc)
font_count_after_second_parse = len(self.doc._fonts)
style_count_after_second_parse = self.doc.get_style_registry().get_style_count()
# Font count should not increase on second parse
self.assertEqual(font_count_after_parse, font_count_after_second_parse,
"Fonts should be reused for repeated styles")
# Style count should not increase on second parse
self.assertEqual(style_count_after_parse, style_count_after_second_parse,
"Styles should be reused for repeated formatting")
# Both should create same structure
self.assertEqual(len(blocks), len(blocks2))
def test_font_registry_with_nested_styles(self):
"""Test font registry with nested HTML styles."""
"""Test style registry with nested HTML styles."""
html_content = """
<p>Text with <strong>bold and <em>bold italic</em> nested</strong> styles.</p>
"""
@ -539,7 +543,7 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
# Parse content
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
# Should create fonts for different style combinations
# Should create styles for different style combinations
paragraph = blocks[0]
words = list(paragraph.words())
@ -551,9 +555,9 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
self.assertGreater(len(bold_italic_words), 0,
"Should have words with combined bold+italic style")
# Should have multiple fonts in registry for different combinations
self.assertGreater(len(self.doc._fonts), 1,
"Should create separate fonts for style combinations")
# Should have multiple styles in registry for different combinations
self.assertGreater(self.doc.get_style_registry().get_style_count(), 1,
"Should create separate styles for style combinations")
if __name__ == '__main__':

File diff suppressed because it is too large Load Diff