This commit is contained in:
parent
ae15fe54e8
commit
d0153c6397
@ -45,6 +45,7 @@ class Document:
|
|||||||
self._resources: Dict[str, Any] = {} # External resources like images
|
self._resources: Dict[str, Any] = {} # External resources like images
|
||||||
self._stylesheets: List[Dict[str, Any]] = [] # CSS stylesheets
|
self._stylesheets: List[Dict[str, Any]] = [] # CSS stylesheets
|
||||||
self._scripts: List[str] = [] # JavaScript code
|
self._scripts: List[str] = [] # JavaScript code
|
||||||
|
self._fonts: Dict[str, Font] = {} # Font registry for backward compatibility
|
||||||
|
|
||||||
# Style management with new abstract/concrete system
|
# Style management with new abstract/concrete system
|
||||||
self._abstract_style_registry = AbstractStyleRegistry()
|
self._abstract_style_registry = AbstractStyleRegistry()
|
||||||
@ -396,6 +397,76 @@ class Document:
|
|||||||
"""Get the concrete style registry for this document."""
|
"""Get the concrete style registry for this document."""
|
||||||
return self._concrete_style_registry
|
return self._concrete_style_registry
|
||||||
|
|
||||||
|
def get_or_create_font(self,
|
||||||
|
font_path: Optional[str] = None,
|
||||||
|
font_size: int = 16,
|
||||||
|
colour: Tuple[int, int, int] = (0, 0, 0),
|
||||||
|
weight: FontWeight = FontWeight.NORMAL,
|
||||||
|
style: FontStyle = FontStyle.NORMAL,
|
||||||
|
decoration: TextDecoration = TextDecoration.NONE,
|
||||||
|
background: Optional[Tuple[int, int, int, int]] = None,
|
||||||
|
language: str = "en_EN",
|
||||||
|
min_hyphenation_width: Optional[int] = None) -> Font:
|
||||||
|
"""
|
||||||
|
Get or create a font with the specified properties.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
font_path: Path to the font file (.ttf, .otf). If None, uses default font.
|
||||||
|
font_size: Size of the font in points.
|
||||||
|
colour: RGB color tuple for the text.
|
||||||
|
weight: Font weight (normal or bold).
|
||||||
|
style: Font style (normal or italic).
|
||||||
|
decoration: Text decoration (none, underline, or strikethrough).
|
||||||
|
background: RGBA background color for the text. If None, transparent background.
|
||||||
|
language: Language code for hyphenation and text processing.
|
||||||
|
min_hyphenation_width: Minimum width in pixels required for hyphenation.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Font object (either existing or newly created)
|
||||||
|
"""
|
||||||
|
# Initialize font registry if it doesn't exist
|
||||||
|
if not hasattr(self, '_fonts'):
|
||||||
|
self._fonts: Dict[str, Font] = {}
|
||||||
|
|
||||||
|
# Create a unique key for this font configuration
|
||||||
|
bg_tuple = background if background else (255, 255, 255, 0)
|
||||||
|
min_hyph_width = min_hyphenation_width if min_hyphenation_width is not None else font_size * 4
|
||||||
|
|
||||||
|
font_key = (
|
||||||
|
font_path,
|
||||||
|
font_size,
|
||||||
|
colour,
|
||||||
|
weight.value if isinstance(weight, FontWeight) else weight,
|
||||||
|
style.value if isinstance(style, FontStyle) else style,
|
||||||
|
decoration.value if isinstance(decoration, TextDecoration) else decoration,
|
||||||
|
bg_tuple,
|
||||||
|
language,
|
||||||
|
min_hyph_width
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert tuple to string for dictionary key
|
||||||
|
key_str = str(font_key)
|
||||||
|
|
||||||
|
# Check if we already have this font
|
||||||
|
if key_str in self._fonts:
|
||||||
|
return self._fonts[key_str]
|
||||||
|
|
||||||
|
# Create new font and store it
|
||||||
|
new_font = Font(
|
||||||
|
font_path=font_path,
|
||||||
|
font_size=font_size,
|
||||||
|
colour=colour,
|
||||||
|
weight=weight,
|
||||||
|
style=style,
|
||||||
|
decoration=decoration,
|
||||||
|
background=background,
|
||||||
|
language=language,
|
||||||
|
min_hyphenation_width=min_hyphenation_width
|
||||||
|
)
|
||||||
|
|
||||||
|
self._fonts[key_str] = new_font
|
||||||
|
return new_font
|
||||||
|
|
||||||
|
|
||||||
class Chapter:
|
class Chapter:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -1,86 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Simple EPUB test script to isolate the issue.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
# Add the parent directory to the path to import pyWebLayout
|
|
||||||
sys.path.append(str(Path(__file__).parent.parent.parent))
|
|
||||||
|
|
||||||
def test_epub_basic():
|
|
||||||
"""Test basic EPUB functionality without full HTML parsing."""
|
|
||||||
print("Testing basic EPUB components...")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Test basic document classes
|
|
||||||
from pyWebLayout.abstract.document import Document, Book, Chapter, MetadataType
|
|
||||||
print("✓ Document classes imported")
|
|
||||||
|
|
||||||
# Test creating a simple book
|
|
||||||
book = Book("Test Book", "Test Author")
|
|
||||||
chapter = book.create_chapter("Test Chapter")
|
|
||||||
print("✓ Book and chapter created")
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"✗ Basic test failed: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
return False
|
|
||||||
|
|
||||||
def test_epub_file():
|
|
||||||
"""Test opening the EPUB file without full parsing."""
|
|
||||||
print("Testing EPUB file access...")
|
|
||||||
|
|
||||||
try:
|
|
||||||
import zipfile
|
|
||||||
import os
|
|
||||||
|
|
||||||
epub_path = "pg174-images-3.epub"
|
|
||||||
if not os.path.exists(epub_path):
|
|
||||||
print(f"✗ EPUB file not found: {epub_path}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
with zipfile.ZipFile(epub_path, 'r') as zip_ref:
|
|
||||||
file_list = zip_ref.namelist()
|
|
||||||
print(f"✓ EPUB file opened, contains {len(file_list)} files")
|
|
||||||
|
|
||||||
# Look for key files
|
|
||||||
has_container = any('container.xml' in f for f in file_list)
|
|
||||||
has_opf = any('.opf' in f for f in file_list)
|
|
||||||
|
|
||||||
print(f"✓ Container file: {'found' if has_container else 'not found'}")
|
|
||||||
print(f"✓ Package file: {'found' if has_opf else 'not found'}")
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"✗ EPUB file test failed: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
return False
|
|
||||||
|
|
||||||
def main():
|
|
||||||
print("Simple EPUB Test")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
# Test basic functionality
|
|
||||||
if not test_epub_basic():
|
|
||||||
return False
|
|
||||||
|
|
||||||
print()
|
|
||||||
|
|
||||||
# Test EPUB file access
|
|
||||||
if not test_epub_file():
|
|
||||||
return False
|
|
||||||
|
|
||||||
print()
|
|
||||||
print("All basic tests passed!")
|
|
||||||
return True
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
success = main()
|
|
||||||
sys.exit(0 if success else 1)
|
|
||||||
@ -271,8 +271,33 @@ def apply_element_font_styles(
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Use document's font registry if available to avoid creating duplicate fonts
|
# Use document's style registry if available to avoid creating duplicate styles
|
||||||
if context and context.document and hasattr(context.document, 'get_or_create_font'):
|
if context and context.document and hasattr(context.document, 'get_or_create_style'):
|
||||||
|
# Create an abstract style first
|
||||||
|
from pyWebLayout.style.abstract_style import FontFamily, FontSize
|
||||||
|
|
||||||
|
# Map font properties to abstract style properties
|
||||||
|
font_family = FontFamily.SERIF # Default - could be enhanced to detect from font_path
|
||||||
|
if font_size:
|
||||||
|
font_size_value = font_size if isinstance(font_size, int) else FontSize.MEDIUM
|
||||||
|
else:
|
||||||
|
font_size_value = FontSize.MEDIUM
|
||||||
|
|
||||||
|
# Create abstract style and register it
|
||||||
|
style_id, abstract_style = context.document.get_or_create_style(
|
||||||
|
font_family=font_family,
|
||||||
|
font_size=font_size_value,
|
||||||
|
font_weight=weight,
|
||||||
|
font_style=style,
|
||||||
|
text_decoration=decoration,
|
||||||
|
color=colour,
|
||||||
|
language=language
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get the concrete font for this style
|
||||||
|
return context.document.get_font_for_style(abstract_style)
|
||||||
|
elif context and context.document and hasattr(context.document, 'get_or_create_font'):
|
||||||
|
# Fallback to old font registry system
|
||||||
return context.document.get_or_create_font(
|
return context.document.get_or_create_font(
|
||||||
font_path=font_path,
|
font_path=font_path,
|
||||||
font_size=font_size,
|
font_size=font_size,
|
||||||
|
|||||||
380
pyWebLayout/typesetting/abstract_position.py
Normal file
380
pyWebLayout/typesetting/abstract_position.py
Normal file
@ -0,0 +1,380 @@
|
|||||||
|
"""
|
||||||
|
Abstract positioning system for pyWebLayout.
|
||||||
|
|
||||||
|
This module provides content-based addressing that survives style changes,
|
||||||
|
font size modifications, and layout parameter changes. Abstract positions
|
||||||
|
represent logical locations in the document content structure.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional, Dict, Any, List, Tuple
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum
|
||||||
|
import json
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
from pyWebLayout.abstract.block import Block, BlockType
|
||||||
|
from pyWebLayout.abstract.document import Document, Book, Chapter
|
||||||
|
|
||||||
|
|
||||||
|
class ElementType(Enum):
|
||||||
|
"""Types of elements that can be positioned within blocks."""
|
||||||
|
PARAGRAPH = "paragraph"
|
||||||
|
IMAGE = "image"
|
||||||
|
TABLE = "table"
|
||||||
|
LIST = "list"
|
||||||
|
HEADING = "heading"
|
||||||
|
HORIZONTAL_RULE = "horizontal_rule"
|
||||||
|
CODE_BLOCK = "code_block"
|
||||||
|
QUOTE = "quote"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AbstractPosition:
|
||||||
|
"""
|
||||||
|
Abstract position that represents a logical location in document content.
|
||||||
|
|
||||||
|
This position survives style changes, font size modifications, and layout
|
||||||
|
parameter changes because it addresses content structure rather than
|
||||||
|
physical rendering coordinates.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Document structure addressing
|
||||||
|
document_id: Optional[str] = None
|
||||||
|
chapter_index: Optional[int] = None # For Book objects
|
||||||
|
block_index: int = 0
|
||||||
|
element_index: int = 0 # Index within block (paragraph, image, etc.)
|
||||||
|
element_type: ElementType = ElementType.PARAGRAPH
|
||||||
|
|
||||||
|
# Text content addressing (for text elements)
|
||||||
|
word_index: Optional[int] = None
|
||||||
|
character_index: Optional[int] = None
|
||||||
|
|
||||||
|
# Splittable content addressing (tables, lists)
|
||||||
|
row_index: Optional[int] = None
|
||||||
|
cell_index: Optional[int] = None
|
||||||
|
list_item_index: Optional[int] = None
|
||||||
|
|
||||||
|
# Position quality indicators
|
||||||
|
is_clean_boundary: bool = True # Not mid-hyphenation
|
||||||
|
confidence: float = 1.0 # How confident we are in this position
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Convert to dictionary for serialization."""
|
||||||
|
return {
|
||||||
|
'document_id': self.document_id,
|
||||||
|
'chapter_index': self.chapter_index,
|
||||||
|
'block_index': self.block_index,
|
||||||
|
'element_index': self.element_index,
|
||||||
|
'element_type': self.element_type.value,
|
||||||
|
'word_index': self.word_index,
|
||||||
|
'character_index': self.character_index,
|
||||||
|
'row_index': self.row_index,
|
||||||
|
'cell_index': self.cell_index,
|
||||||
|
'list_item_index': self.list_item_index,
|
||||||
|
'is_clean_boundary': self.is_clean_boundary,
|
||||||
|
'confidence': self.confidence
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: Dict[str, Any]) -> 'AbstractPosition':
|
||||||
|
"""Create from dictionary."""
|
||||||
|
return cls(
|
||||||
|
document_id=data.get('document_id'),
|
||||||
|
chapter_index=data.get('chapter_index'),
|
||||||
|
block_index=data.get('block_index', 0),
|
||||||
|
element_index=data.get('element_index', 0),
|
||||||
|
element_type=ElementType(data.get('element_type', 'paragraph')),
|
||||||
|
word_index=data.get('word_index'),
|
||||||
|
character_index=data.get('character_index'),
|
||||||
|
row_index=data.get('row_index'),
|
||||||
|
cell_index=data.get('cell_index'),
|
||||||
|
list_item_index=data.get('list_item_index'),
|
||||||
|
is_clean_boundary=data.get('is_clean_boundary', True),
|
||||||
|
confidence=data.get('confidence', 1.0)
|
||||||
|
)
|
||||||
|
|
||||||
|
def to_bookmark(self) -> str:
|
||||||
|
"""Serialize to bookmark string for storage."""
|
||||||
|
return json.dumps(self.to_dict())
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_bookmark(cls, bookmark: str) -> 'AbstractPosition':
|
||||||
|
"""Create from bookmark string."""
|
||||||
|
return cls.from_dict(json.loads(bookmark))
|
||||||
|
|
||||||
|
def copy(self) -> 'AbstractPosition':
|
||||||
|
"""Create a copy of this position."""
|
||||||
|
return AbstractPosition.from_dict(self.to_dict())
|
||||||
|
|
||||||
|
def get_hash(self) -> str:
|
||||||
|
"""Get a hash representing this position (for caching)."""
|
||||||
|
# Create a stable hash of the position data
|
||||||
|
data_str = json.dumps(self.to_dict(), sort_keys=True)
|
||||||
|
return hashlib.md5(data_str.encode()).hexdigest()
|
||||||
|
|
||||||
|
def is_before(self, other: 'AbstractPosition') -> bool:
|
||||||
|
"""Check if this position comes before another in document order."""
|
||||||
|
# Compare chapter first (if applicable)
|
||||||
|
if self.chapter_index is not None and other.chapter_index is not None:
|
||||||
|
if self.chapter_index != other.chapter_index:
|
||||||
|
return self.chapter_index < other.chapter_index
|
||||||
|
|
||||||
|
# Compare block index
|
||||||
|
if self.block_index != other.block_index:
|
||||||
|
return self.block_index < other.block_index
|
||||||
|
|
||||||
|
# Compare element index within block
|
||||||
|
if self.element_index != other.element_index:
|
||||||
|
return self.element_index < other.element_index
|
||||||
|
|
||||||
|
# For text elements, compare word and character
|
||||||
|
if self.word_index is not None and other.word_index is not None:
|
||||||
|
if self.word_index != other.word_index:
|
||||||
|
return self.word_index < other.word_index
|
||||||
|
|
||||||
|
if self.character_index is not None and other.character_index is not None:
|
||||||
|
return self.character_index < other.character_index
|
||||||
|
|
||||||
|
# For table elements, compare row and cell
|
||||||
|
if self.row_index is not None and other.row_index is not None:
|
||||||
|
if self.row_index != other.row_index:
|
||||||
|
return self.row_index < other.row_index
|
||||||
|
|
||||||
|
if self.cell_index is not None and other.cell_index is not None:
|
||||||
|
return self.cell_index < other.cell_index
|
||||||
|
|
||||||
|
# Positions are equal or comparison not possible
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_progress(self, document: Document) -> float:
|
||||||
|
"""
|
||||||
|
Get approximate progress through document (0.0 to 1.0).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document: The document this position refers to
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Progress value from 0.0 (start) to 1.0 (end)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if isinstance(document, Book):
|
||||||
|
# For books, factor in chapter progress
|
||||||
|
total_chapters = len(document.chapters)
|
||||||
|
if total_chapters == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
chapter_progress = (self.chapter_index or 0) / total_chapters
|
||||||
|
|
||||||
|
# Add progress within current chapter
|
||||||
|
if (self.chapter_index is not None and
|
||||||
|
self.chapter_index < len(document.chapters)):
|
||||||
|
chapter = document.chapters[self.chapter_index]
|
||||||
|
if chapter.blocks:
|
||||||
|
block_progress = self.block_index / len(chapter.blocks)
|
||||||
|
chapter_progress += block_progress / total_chapters
|
||||||
|
|
||||||
|
return min(1.0, chapter_progress)
|
||||||
|
else:
|
||||||
|
# For regular documents
|
||||||
|
if not document.blocks:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
return min(1.0, self.block_index / len(document.blocks))
|
||||||
|
|
||||||
|
except (IndexError, ZeroDivisionError, AttributeError):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ConcretePosition:
|
||||||
|
"""
|
||||||
|
Concrete position representing physical rendering coordinates.
|
||||||
|
|
||||||
|
This position is ephemeral and gets invalidated whenever layout
|
||||||
|
parameters change (font size, page size, margins, etc.).
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Physical coordinates
|
||||||
|
page_index: int = 0
|
||||||
|
viewport_x: int = 0
|
||||||
|
viewport_y: int = 0
|
||||||
|
line_index: Optional[int] = None
|
||||||
|
|
||||||
|
# Validation tracking
|
||||||
|
layout_hash: Optional[str] = None # Hash of current layout parameters
|
||||||
|
is_valid: bool = True
|
||||||
|
|
||||||
|
# Quality indicators
|
||||||
|
is_exact: bool = True # Exact position vs. approximation
|
||||||
|
pixel_offset: int = 0 # Fine-grained positioning within line
|
||||||
|
|
||||||
|
def invalidate(self):
|
||||||
|
"""Mark this concrete position as invalid."""
|
||||||
|
self.is_valid = False
|
||||||
|
self.is_exact = False
|
||||||
|
|
||||||
|
def update_layout_hash(self, layout_hash: str):
|
||||||
|
"""Update the layout hash and mark as valid."""
|
||||||
|
self.layout_hash = layout_hash
|
||||||
|
self.is_valid = True
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Convert to dictionary."""
|
||||||
|
return {
|
||||||
|
'page_index': self.page_index,
|
||||||
|
'viewport_x': self.viewport_x,
|
||||||
|
'viewport_y': self.viewport_y,
|
||||||
|
'line_index': self.line_index,
|
||||||
|
'layout_hash': self.layout_hash,
|
||||||
|
'is_valid': self.is_valid,
|
||||||
|
'is_exact': self.is_exact,
|
||||||
|
'pixel_offset': self.pixel_offset
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: Dict[str, Any]) -> 'ConcretePosition':
|
||||||
|
"""Create from dictionary."""
|
||||||
|
return cls(
|
||||||
|
page_index=data.get('page_index', 0),
|
||||||
|
viewport_x=data.get('viewport_x', 0),
|
||||||
|
viewport_y=data.get('viewport_y', 0),
|
||||||
|
line_index=data.get('line_index'),
|
||||||
|
layout_hash=data.get('layout_hash'),
|
||||||
|
is_valid=data.get('is_valid', True),
|
||||||
|
is_exact=data.get('is_exact', True),
|
||||||
|
pixel_offset=data.get('pixel_offset', 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PositionAnchor:
|
||||||
|
"""
|
||||||
|
Multi-level position anchor for robust position recovery.
|
||||||
|
|
||||||
|
Provides primary abstract position with fallback strategies
|
||||||
|
for when exact positioning fails.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, primary_position: AbstractPosition):
|
||||||
|
"""
|
||||||
|
Initialize with primary abstract position.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
primary_position: The main abstract position
|
||||||
|
"""
|
||||||
|
self.primary_position = primary_position
|
||||||
|
self.fallback_positions: List[AbstractPosition] = []
|
||||||
|
self.context_text: Optional[str] = None # Text snippet for fuzzy matching
|
||||||
|
self.document_progress: float = 0.0 # Overall document progress
|
||||||
|
self.paragraph_progress: float = 0.0 # Progress within paragraph
|
||||||
|
|
||||||
|
def add_fallback(self, position: AbstractPosition):
|
||||||
|
"""Add a fallback position."""
|
||||||
|
self.fallback_positions.append(position)
|
||||||
|
|
||||||
|
def set_context(self, text: str, document_progress: float = 0.0,
|
||||||
|
paragraph_progress: float = 0.0):
|
||||||
|
"""Set contextual information for fuzzy recovery."""
|
||||||
|
self.context_text = text
|
||||||
|
self.document_progress = document_progress
|
||||||
|
self.paragraph_progress = paragraph_progress
|
||||||
|
|
||||||
|
def get_best_position(self, document: Document) -> AbstractPosition:
|
||||||
|
"""
|
||||||
|
Get the best available position for the given document.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document: The document to position within
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The best available abstract position
|
||||||
|
"""
|
||||||
|
# Try primary position first
|
||||||
|
if self._is_position_valid(self.primary_position, document):
|
||||||
|
return self.primary_position
|
||||||
|
|
||||||
|
# Try fallback positions
|
||||||
|
for fallback in self.fallback_positions:
|
||||||
|
if self._is_position_valid(fallback, document):
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
# Last resort: create approximate position from progress
|
||||||
|
return self._create_approximate_position(document)
|
||||||
|
|
||||||
|
def _is_position_valid(self, position: AbstractPosition, document: Document) -> bool:
|
||||||
|
"""Check if a position is valid for the given document."""
|
||||||
|
try:
|
||||||
|
if isinstance(document, Book):
|
||||||
|
if (position.chapter_index is not None and
|
||||||
|
position.chapter_index >= len(document.chapters)):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if position.chapter_index is not None:
|
||||||
|
chapter = document.chapters[position.chapter_index]
|
||||||
|
if position.block_index >= len(chapter.blocks):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
if position.block_index >= len(document.blocks):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except (AttributeError, IndexError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _create_approximate_position(self, document: Document) -> AbstractPosition:
|
||||||
|
"""Create an approximate position based on document progress."""
|
||||||
|
position = AbstractPosition()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if isinstance(document, Book):
|
||||||
|
# Estimate chapter and block from progress
|
||||||
|
total_chapters = len(document.chapters)
|
||||||
|
if total_chapters > 0:
|
||||||
|
chapter_index = int(self.document_progress * total_chapters)
|
||||||
|
chapter_index = min(chapter_index, total_chapters - 1)
|
||||||
|
|
||||||
|
position.chapter_index = chapter_index
|
||||||
|
chapter = document.chapters[chapter_index]
|
||||||
|
|
||||||
|
if chapter.blocks:
|
||||||
|
block_index = int(self.paragraph_progress * len(chapter.blocks))
|
||||||
|
position.block_index = min(block_index, len(chapter.blocks) - 1)
|
||||||
|
else:
|
||||||
|
# Estimate block from progress
|
||||||
|
if document.blocks:
|
||||||
|
block_index = int(self.document_progress * len(document.blocks))
|
||||||
|
position.block_index = min(block_index, len(document.blocks) - 1)
|
||||||
|
|
||||||
|
position.confidence = 0.5 # Mark as approximate
|
||||||
|
|
||||||
|
except (AttributeError, IndexError, ZeroDivisionError):
|
||||||
|
# Ultimate fallback - start of document
|
||||||
|
pass
|
||||||
|
|
||||||
|
return position
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Convert to dictionary for serialization."""
|
||||||
|
return {
|
||||||
|
'primary_position': self.primary_position.to_dict(),
|
||||||
|
'fallback_positions': [pos.to_dict() for pos in self.fallback_positions],
|
||||||
|
'context_text': self.context_text,
|
||||||
|
'document_progress': self.document_progress,
|
||||||
|
'paragraph_progress': self.paragraph_progress
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: Dict[str, Any]) -> 'PositionAnchor':
|
||||||
|
"""Create from dictionary."""
|
||||||
|
primary = AbstractPosition.from_dict(data['primary_position'])
|
||||||
|
anchor = cls(primary)
|
||||||
|
|
||||||
|
anchor.fallback_positions = [
|
||||||
|
AbstractPosition.from_dict(pos_data)
|
||||||
|
for pos_data in data.get('fallback_positions', [])
|
||||||
|
]
|
||||||
|
anchor.context_text = data.get('context_text')
|
||||||
|
anchor.document_progress = data.get('document_progress', 0.0)
|
||||||
|
anchor.paragraph_progress = data.get('paragraph_progress', 0.0)
|
||||||
|
|
||||||
|
return anchor
|
||||||
459
pyWebLayout/typesetting/position_translator.py
Normal file
459
pyWebLayout/typesetting/position_translator.py
Normal file
@ -0,0 +1,459 @@
|
|||||||
|
"""
|
||||||
|
Position translation system for pyWebLayout.
|
||||||
|
|
||||||
|
This module provides translation between abstract (content-based) and
|
||||||
|
concrete (rendering-based) positions. It handles the conversion logic
|
||||||
|
and maintains the relationship between logical document structure
|
||||||
|
and physical layout coordinates.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional, Dict, Any, List, Tuple, Union
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
|
||||||
|
from pyWebLayout.abstract.document import Document, Book, Chapter
|
||||||
|
from pyWebLayout.abstract.block import Block, BlockType, Paragraph, Heading, Table, HList, Image as AbstractImage
|
||||||
|
from pyWebLayout.abstract.inline import Word
|
||||||
|
from pyWebLayout.concrete.page import Page
|
||||||
|
from pyWebLayout.style import Font, Alignment
|
||||||
|
from pyWebLayout.typesetting.abstract_position import (
|
||||||
|
AbstractPosition, ConcretePosition, ElementType, PositionAnchor
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StyleParameters:
|
||||||
|
"""
|
||||||
|
Container for layout style parameters that affect concrete positioning.
|
||||||
|
|
||||||
|
When these parameters change, all concrete positions become invalid
|
||||||
|
and must be recalculated from abstract positions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
page_size: Tuple[int, int] = (800, 600),
|
||||||
|
margins: Tuple[int, int, int, int] = (20, 20, 20, 20), # top, right, bottom, left
|
||||||
|
default_font: Optional[Font] = None,
|
||||||
|
line_spacing: int = 3,
|
||||||
|
paragraph_spacing: int = 10,
|
||||||
|
alignment: Alignment = Alignment.LEFT
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize style parameters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page_size: (width, height) of pages
|
||||||
|
margins: (top, right, bottom, left) margins
|
||||||
|
default_font: Default font to use
|
||||||
|
line_spacing: Spacing between lines
|
||||||
|
paragraph_spacing: Spacing between paragraphs
|
||||||
|
alignment: Text alignment
|
||||||
|
"""
|
||||||
|
self.page_size = page_size
|
||||||
|
self.margins = margins
|
||||||
|
self.default_font = default_font or Font()
|
||||||
|
self.line_spacing = line_spacing
|
||||||
|
self.paragraph_spacing = paragraph_spacing
|
||||||
|
self.alignment = alignment
|
||||||
|
|
||||||
|
def get_hash(self) -> str:
|
||||||
|
"""Get a hash representing these style parameters."""
|
||||||
|
# Create a stable representation for hashing
|
||||||
|
data = {
|
||||||
|
'page_size': self.page_size,
|
||||||
|
'margins': self.margins,
|
||||||
|
'font_size': self.default_font.font_size if self.default_font else 16,
|
||||||
|
'font_path': getattr(self.default_font, 'font_path', None) if self.default_font else None,
|
||||||
|
'line_spacing': self.line_spacing,
|
||||||
|
'paragraph_spacing': self.paragraph_spacing,
|
||||||
|
'alignment': self.alignment.value if hasattr(self.alignment, 'value') else str(self.alignment)
|
||||||
|
}
|
||||||
|
|
||||||
|
data_str = json.dumps(data, sort_keys=True)
|
||||||
|
return hashlib.md5(data_str.encode()).hexdigest()
|
||||||
|
|
||||||
|
def copy(self) -> 'StyleParameters':
|
||||||
|
"""Create a copy of these style parameters."""
|
||||||
|
return StyleParameters(
|
||||||
|
page_size=self.page_size,
|
||||||
|
margins=self.margins,
|
||||||
|
default_font=self.default_font,
|
||||||
|
line_spacing=self.line_spacing,
|
||||||
|
paragraph_spacing=self.paragraph_spacing,
|
||||||
|
alignment=self.alignment
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PositionTranslator:
|
||||||
|
"""
|
||||||
|
Translates between abstract and concrete positions.
|
||||||
|
|
||||||
|
This class handles the complex logic of converting content-based
|
||||||
|
positions to physical rendering coordinates and vice versa.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, document: Document, style_params: StyleParameters):
|
||||||
|
"""
|
||||||
|
Initialize the position translator.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document: The document to work with
|
||||||
|
style_params: Current style parameters
|
||||||
|
"""
|
||||||
|
self.document = document
|
||||||
|
self.style_params = style_params
|
||||||
|
self._layout_cache: Dict[str, Any] = {}
|
||||||
|
self._position_cache: Dict[str, ConcretePosition] = {}
|
||||||
|
|
||||||
|
def update_style_params(self, new_params: StyleParameters):
|
||||||
|
"""
|
||||||
|
Update style parameters and invalidate caches.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
new_params: New style parameters
|
||||||
|
"""
|
||||||
|
self.style_params = new_params
|
||||||
|
self._layout_cache.clear()
|
||||||
|
self._position_cache.clear()
|
||||||
|
|
||||||
|
def abstract_to_concrete(self, abstract_pos: AbstractPosition) -> ConcretePosition:
|
||||||
|
"""
|
||||||
|
Convert an abstract position to a concrete position.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
abstract_pos: The abstract position to convert
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Corresponding concrete position
|
||||||
|
"""
|
||||||
|
# Check cache first
|
||||||
|
cache_key = abstract_pos.get_hash() + self.style_params.get_hash()
|
||||||
|
if cache_key in self._position_cache:
|
||||||
|
cached_pos = self._position_cache[cache_key]
|
||||||
|
if cached_pos.layout_hash == self.style_params.get_hash():
|
||||||
|
return cached_pos
|
||||||
|
|
||||||
|
# Calculate concrete position
|
||||||
|
concrete_pos = self._calculate_concrete_position(abstract_pos)
|
||||||
|
concrete_pos.update_layout_hash(self.style_params.get_hash())
|
||||||
|
|
||||||
|
# Cache the result
|
||||||
|
self._position_cache[cache_key] = concrete_pos
|
||||||
|
|
||||||
|
return concrete_pos
|
||||||
|
|
||||||
|
def concrete_to_abstract(self, concrete_pos: ConcretePosition) -> AbstractPosition:
|
||||||
|
"""
|
||||||
|
Convert a concrete position to an abstract position.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
concrete_pos: The concrete position to convert
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Corresponding abstract position
|
||||||
|
"""
|
||||||
|
# This is more complex - we need to figure out what content
|
||||||
|
# is at the given physical coordinates
|
||||||
|
return self._calculate_abstract_position(concrete_pos)
|
||||||
|
|
||||||
|
def find_clean_boundary(self, abstract_pos: AbstractPosition) -> AbstractPosition:
|
||||||
|
"""
|
||||||
|
Find a clean reading boundary near the given position.
|
||||||
|
|
||||||
|
This ensures the user doesn't restart reading mid-hyphenation
|
||||||
|
or in the middle of a word.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
abstract_pos: The starting position
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A clean boundary position
|
||||||
|
"""
|
||||||
|
clean_pos = abstract_pos.copy()
|
||||||
|
|
||||||
|
# If we're in the middle of a word, move to word start
|
||||||
|
if clean_pos.character_index is not None and clean_pos.character_index > 0:
|
||||||
|
clean_pos.character_index = 0
|
||||||
|
clean_pos.is_clean_boundary = True
|
||||||
|
|
||||||
|
# For better user experience, consider moving to sentence/paragraph start
|
||||||
|
# if we're very close to the beginning of a word
|
||||||
|
if (clean_pos.word_index is not None and
|
||||||
|
clean_pos.word_index <= 2 and # Within first few words
|
||||||
|
clean_pos.element_type == ElementType.PARAGRAPH):
|
||||||
|
clean_pos.word_index = 0
|
||||||
|
clean_pos.character_index = 0
|
||||||
|
|
||||||
|
return clean_pos
|
||||||
|
|
||||||
|
def create_position_anchor(self, abstract_pos: AbstractPosition,
|
||||||
|
context_window: int = 50) -> PositionAnchor:
|
||||||
|
"""
|
||||||
|
Create a robust position anchor with fallbacks.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
abstract_pos: Primary abstract position
|
||||||
|
context_window: Size of text context to capture
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Position anchor with fallbacks
|
||||||
|
"""
|
||||||
|
anchor = PositionAnchor(abstract_pos)
|
||||||
|
|
||||||
|
# Add fallback positions
|
||||||
|
# Fallback 1: Start of current paragraph/element
|
||||||
|
para_start = abstract_pos.copy()
|
||||||
|
para_start.word_index = 0
|
||||||
|
para_start.character_index = 0
|
||||||
|
anchor.add_fallback(para_start)
|
||||||
|
|
||||||
|
# Fallback 2: Start of current block
|
||||||
|
block_start = abstract_pos.copy()
|
||||||
|
block_start.element_index = 0
|
||||||
|
block_start.word_index = 0
|
||||||
|
block_start.character_index = 0
|
||||||
|
anchor.add_fallback(block_start)
|
||||||
|
|
||||||
|
# Add context information
|
||||||
|
context_text = self._extract_context_text(abstract_pos, context_window)
|
||||||
|
doc_progress = abstract_pos.get_progress(self.document)
|
||||||
|
para_progress = self._get_paragraph_progress(abstract_pos)
|
||||||
|
|
||||||
|
anchor.set_context(context_text, doc_progress, para_progress)
|
||||||
|
|
||||||
|
return anchor
|
||||||
|
|
||||||
|
def _calculate_concrete_position(self, abstract_pos: AbstractPosition) -> ConcretePosition:
|
||||||
|
"""Calculate concrete position from abstract position."""
|
||||||
|
# This is a simplified implementation - in reality this would
|
||||||
|
# involve laying out the document and finding physical coordinates
|
||||||
|
|
||||||
|
# Get the target block
|
||||||
|
target_block = self._get_block_from_position(abstract_pos)
|
||||||
|
if target_block is None:
|
||||||
|
return ConcretePosition() # Default to start
|
||||||
|
|
||||||
|
# Estimate page based on block position
|
||||||
|
# This is a rough approximation - real implementation would
|
||||||
|
# use the actual pagination system
|
||||||
|
estimated_page = self._estimate_page_for_block(abstract_pos)
|
||||||
|
|
||||||
|
# Estimate coordinates within page
|
||||||
|
estimated_y = self._estimate_y_coordinate(abstract_pos, target_block)
|
||||||
|
|
||||||
|
return ConcretePosition(
|
||||||
|
page_index=estimated_page,
|
||||||
|
viewport_x=self.style_params.margins[3], # Left margin
|
||||||
|
viewport_y=estimated_y,
|
||||||
|
is_exact=False # Mark as approximation
|
||||||
|
)
|
||||||
|
|
||||||
|
def _calculate_abstract_position(self, concrete_pos: ConcretePosition) -> AbstractPosition:
|
||||||
|
"""Calculate abstract position from concrete position."""
|
||||||
|
# This would analyze the rendered layout to determine what
|
||||||
|
# content is at the given coordinates
|
||||||
|
|
||||||
|
# For now, provide a basic implementation that estimates
|
||||||
|
# based on page and y-coordinate
|
||||||
|
|
||||||
|
abstract_pos = AbstractPosition()
|
||||||
|
|
||||||
|
# Estimate block based on page and position
|
||||||
|
blocks_per_page = self._estimate_blocks_per_page()
|
||||||
|
estimated_block = concrete_pos.page_index * blocks_per_page
|
||||||
|
|
||||||
|
# Adjust based on y-coordinate within page
|
||||||
|
page_height = self.style_params.page_size[1] - sum(self.style_params.margins[::2])
|
||||||
|
relative_y = concrete_pos.viewport_y / page_height
|
||||||
|
|
||||||
|
# Fine-tune block estimate
|
||||||
|
estimated_block += int(relative_y * blocks_per_page)
|
||||||
|
|
||||||
|
abstract_pos.block_index = max(0, estimated_block)
|
||||||
|
abstract_pos.confidence = 0.7 # Mark as estimate
|
||||||
|
|
||||||
|
return abstract_pos
|
||||||
|
|
||||||
|
def _get_block_from_position(self, abstract_pos: AbstractPosition) -> Optional[Block]:
|
||||||
|
"""Get the block referenced by an abstract position."""
|
||||||
|
try:
|
||||||
|
if isinstance(self.document, Book):
|
||||||
|
if abstract_pos.chapter_index is not None:
|
||||||
|
chapter = self.document.chapters[abstract_pos.chapter_index]
|
||||||
|
return chapter.blocks[abstract_pos.block_index]
|
||||||
|
else:
|
||||||
|
return self.document.blocks[abstract_pos.block_index]
|
||||||
|
except (IndexError, AttributeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _estimate_page_for_block(self, abstract_pos: AbstractPosition) -> int:
|
||||||
|
"""Estimate which page a block would appear on."""
|
||||||
|
# Rough estimation based on block index and average blocks per page
|
||||||
|
blocks_per_page = self._estimate_blocks_per_page()
|
||||||
|
return abstract_pos.block_index // max(1, blocks_per_page)
|
||||||
|
|
||||||
|
def _estimate_blocks_per_page(self) -> int:
|
||||||
|
"""Estimate how many blocks fit on a page."""
|
||||||
|
# Simple heuristic based on page size and average block height
|
||||||
|
page_height = self.style_params.page_size[1] - sum(self.style_params.margins[::2])
|
||||||
|
average_block_height = self.style_params.default_font.font_size * 3 # Rough estimate
|
||||||
|
return max(1, page_height // average_block_height)
|
||||||
|
|
||||||
|
def _estimate_y_coordinate(self, abstract_pos: AbstractPosition, block: Block) -> int:
|
||||||
|
"""Estimate y-coordinate within page for a position."""
|
||||||
|
# Start with top margin
|
||||||
|
y = self.style_params.margins[0]
|
||||||
|
|
||||||
|
# Add estimated height for preceding elements
|
||||||
|
blocks_before = abstract_pos.block_index % self._estimate_blocks_per_page()
|
||||||
|
block_height = self.style_params.default_font.font_size * 2 # Rough estimate
|
||||||
|
|
||||||
|
y += blocks_before * (block_height + self.style_params.paragraph_spacing)
|
||||||
|
|
||||||
|
# Add offset within block if word/character position is specified
|
||||||
|
if abstract_pos.word_index is not None:
|
||||||
|
line_height = self.style_params.default_font.font_size + self.style_params.line_spacing
|
||||||
|
estimated_line = abstract_pos.word_index // 10 # Rough estimate of words per line
|
||||||
|
y += estimated_line * line_height
|
||||||
|
|
||||||
|
return y
|
||||||
|
|
||||||
|
def _extract_context_text(self, abstract_pos: AbstractPosition, window: int) -> str:
|
||||||
|
"""Extract text context around the position."""
|
||||||
|
block = self._get_block_from_position(abstract_pos)
|
||||||
|
if not block or not isinstance(block, Paragraph):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Extract words from the paragraph
|
||||||
|
words = []
|
||||||
|
try:
|
||||||
|
for _, word in block.words():
|
||||||
|
words.append(word.text)
|
||||||
|
except:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
if not words:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Get context window around current word
|
||||||
|
word_idx = abstract_pos.word_index or 0
|
||||||
|
start_idx = max(0, word_idx - window // 2)
|
||||||
|
end_idx = min(len(words), word_idx + window // 2)
|
||||||
|
|
||||||
|
return " ".join(words[start_idx:end_idx])
|
||||||
|
|
||||||
|
def _get_paragraph_progress(self, abstract_pos: AbstractPosition) -> float:
|
||||||
|
"""Get progress within current paragraph."""
|
||||||
|
if abstract_pos.word_index is None:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
block = self._get_block_from_position(abstract_pos)
|
||||||
|
if not block or not isinstance(block, Paragraph):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
try:
|
||||||
|
total_words = sum(1 for _ in block.words())
|
||||||
|
if total_words == 0:
|
||||||
|
return 0.0
|
||||||
|
return min(1.0, abstract_pos.word_index / total_words)
|
||||||
|
except:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
class PositionTracker:
|
||||||
|
"""
|
||||||
|
High-level interface for tracking and managing positions.
|
||||||
|
|
||||||
|
This class provides the main API for position management in
|
||||||
|
an e-reader or document viewer application.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, document: Document, style_params: StyleParameters):
|
||||||
|
"""
|
||||||
|
Initialize position tracker.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document: Document to track positions in
|
||||||
|
style_params: Current style parameters
|
||||||
|
"""
|
||||||
|
self.document = document
|
||||||
|
self.translator = PositionTranslator(document, style_params)
|
||||||
|
self.current_position: Optional[AbstractPosition] = None
|
||||||
|
self.reading_history: List[PositionAnchor] = []
|
||||||
|
|
||||||
|
def set_current_position(self, position: AbstractPosition):
|
||||||
|
"""Set the current reading position."""
|
||||||
|
self.current_position = position
|
||||||
|
|
||||||
|
def get_current_position(self) -> Optional[AbstractPosition]:
|
||||||
|
"""Get the current reading position."""
|
||||||
|
return self.current_position
|
||||||
|
|
||||||
|
def save_bookmark(self) -> str:
|
||||||
|
"""Save current position as bookmark string."""
|
||||||
|
if self.current_position is None:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
anchor = self.translator.create_position_anchor(self.current_position)
|
||||||
|
return json.dumps(anchor.to_dict())
|
||||||
|
|
||||||
|
def load_bookmark(self, bookmark_str: str) -> bool:
|
||||||
|
"""
|
||||||
|
Load position from bookmark string.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
bookmark_str: Bookmark string to load
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
anchor_data = json.loads(bookmark_str)
|
||||||
|
anchor = PositionAnchor.from_dict(anchor_data)
|
||||||
|
best_position = anchor.get_best_position(self.document)
|
||||||
|
self.current_position = self.translator.find_clean_boundary(best_position)
|
||||||
|
return True
|
||||||
|
except (json.JSONDecodeError, KeyError, ValueError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def handle_style_change(self, new_style_params: StyleParameters):
|
||||||
|
"""
|
||||||
|
Handle style parameter changes.
|
||||||
|
|
||||||
|
This preserves the current reading position across style changes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
new_style_params: New style parameters
|
||||||
|
"""
|
||||||
|
# Save current position before style change
|
||||||
|
if self.current_position is not None:
|
||||||
|
anchor = self.translator.create_position_anchor(self.current_position)
|
||||||
|
self.reading_history.append(anchor)
|
||||||
|
|
||||||
|
# Update translator with new style
|
||||||
|
self.translator.update_style_params(new_style_params)
|
||||||
|
|
||||||
|
# Restore position if we had one
|
||||||
|
if self.current_position is not None:
|
||||||
|
# The abstract position is still valid, but we might want to
|
||||||
|
# ensure it's a clean boundary for the new style
|
||||||
|
self.current_position = self.translator.find_clean_boundary(self.current_position)
|
||||||
|
|
||||||
|
def get_concrete_position(self) -> Optional[ConcretePosition]:
|
||||||
|
"""Get current position as concrete coordinates."""
|
||||||
|
if self.current_position is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return self.translator.abstract_to_concrete(self.current_position)
|
||||||
|
|
||||||
|
def set_position_from_concrete(self, concrete_pos: ConcretePosition):
|
||||||
|
"""Set position from concrete coordinates."""
|
||||||
|
abstract_pos = self.translator.concrete_to_abstract(concrete_pos)
|
||||||
|
self.current_position = self.translator.find_clean_boundary(abstract_pos)
|
||||||
|
|
||||||
|
def get_reading_progress(self) -> float:
|
||||||
|
"""Get reading progress as percentage (0.0 to 1.0)."""
|
||||||
|
if self.current_position is None:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
return self.current_position.get_progress(self.document)
|
||||||
250
test_monospace_demo.py
Normal file
250
test_monospace_demo.py
Normal file
@ -0,0 +1,250 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Simple demonstration of mono-space font testing concepts.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from pyWebLayout.concrete.text import Text, Line
|
||||||
|
from pyWebLayout.style.fonts import Font
|
||||||
|
from pyWebLayout.style.layout import Alignment
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("=== Mono-space Font Testing Demo ===\n")
|
||||||
|
|
||||||
|
# Create a regular font
|
||||||
|
font = Font(font_size=12)
|
||||||
|
|
||||||
|
print("1. Character Width Variance Analysis:")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
# Test different characters to show width variance
|
||||||
|
test_chars = "iIlLmMwW"
|
||||||
|
widths = {}
|
||||||
|
|
||||||
|
for char in test_chars:
|
||||||
|
text = Text(char, font)
|
||||||
|
widths[char] = text.width
|
||||||
|
print(f" '{char}': {text.width:3d}px")
|
||||||
|
|
||||||
|
min_w = min(widths.values())
|
||||||
|
max_w = max(widths.values())
|
||||||
|
variance = max_w - min_w
|
||||||
|
|
||||||
|
print(f"\n Range: {min_w}-{max_w}px (variance: {variance}px)")
|
||||||
|
print(f" Ratio: {max_w/min_w:.1f}x difference")
|
||||||
|
|
||||||
|
print("\n2. Why This Matters for Testing:")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
# Show how same-length strings have different widths
|
||||||
|
word1 = "ill" # narrow
|
||||||
|
word2 = "WWW" # wide
|
||||||
|
|
||||||
|
text1 = Text(word1, font)
|
||||||
|
text2 = Text(word2, font)
|
||||||
|
|
||||||
|
print(f" '{word1}' (3 chars): {text1.width}px")
|
||||||
|
print(f" '{word2}' (3 chars): {text2.width}px")
|
||||||
|
print(f" Same length, {abs(text1.width - text2.width)}px difference!")
|
||||||
|
|
||||||
|
print("\n3. Line Capacity Prediction:")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
line_width = 100
|
||||||
|
print(f" Line width: {line_width}px")
|
||||||
|
|
||||||
|
# Test how many characters fit
|
||||||
|
test_cases = [
|
||||||
|
("narrow chars", "i" * 20),
|
||||||
|
("wide chars", "W" * 10),
|
||||||
|
("mixed text", "Hello World")
|
||||||
|
]
|
||||||
|
|
||||||
|
for name, text_str in test_cases:
|
||||||
|
text_obj = Text(text_str, font)
|
||||||
|
fits = "YES" if text_obj.width <= line_width else "NO"
|
||||||
|
print(f" {name:12}: '{text_str[:10]}...' ({len(text_str)} chars, {text_obj.width}px) → {fits}")
|
||||||
|
|
||||||
|
print("\n4. With Mono-space Fonts:")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
# Try to use an actual mono-space font
|
||||||
|
mono_font = None
|
||||||
|
mono_paths = [
|
||||||
|
"/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf",
|
||||||
|
"/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf",
|
||||||
|
"/System/Library/Fonts/Monaco.ttf",
|
||||||
|
"C:/Windows/Fonts/consola.ttf"
|
||||||
|
]
|
||||||
|
|
||||||
|
import os
|
||||||
|
for path in mono_paths:
|
||||||
|
if os.path.exists(path):
|
||||||
|
try:
|
||||||
|
mono_font = Font(font_path=path, font_size=12)
|
||||||
|
print(f" Using actual mono-space font: {os.path.basename(path)}")
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if mono_font:
|
||||||
|
# Test actual mono-space character consistency
|
||||||
|
mono_test_chars = "iIlLmMwW"
|
||||||
|
mono_widths = {}
|
||||||
|
|
||||||
|
for char in mono_test_chars:
|
||||||
|
text = Text(char, mono_font)
|
||||||
|
mono_widths[char] = text.width
|
||||||
|
|
||||||
|
mono_min = min(mono_widths.values())
|
||||||
|
mono_max = max(mono_widths.values())
|
||||||
|
mono_variance = mono_max - mono_min
|
||||||
|
|
||||||
|
print(f" Mono-space character widths:")
|
||||||
|
for char, width in mono_widths.items():
|
||||||
|
print(f" '{char}': {width}px")
|
||||||
|
print(f" Range: {mono_min}-{mono_max}px (variance: {mono_variance}px)")
|
||||||
|
|
||||||
|
# Compare to regular font variance
|
||||||
|
regular_variance = max_w - min_w
|
||||||
|
improvement = regular_variance / max(1, mono_variance)
|
||||||
|
print(f" Improvement: {improvement:.1f}x more consistent!")
|
||||||
|
|
||||||
|
# Test line capacity with actual mono-space
|
||||||
|
mono_char_width = mono_widths['M'] # Use actual width
|
||||||
|
capacity = line_width // mono_char_width
|
||||||
|
|
||||||
|
print(f"\n Actual mono-space line capacity:")
|
||||||
|
print(f" Each character: {mono_char_width}px")
|
||||||
|
print(f" Line capacity: {capacity} characters")
|
||||||
|
|
||||||
|
# Prove consistency with different character combinations
|
||||||
|
test_strings = [
|
||||||
|
"i" * capacity,
|
||||||
|
"W" * capacity,
|
||||||
|
"M" * capacity,
|
||||||
|
"l" * capacity
|
||||||
|
]
|
||||||
|
|
||||||
|
print(f" Testing {capacity}-character strings:")
|
||||||
|
all_same_width = True
|
||||||
|
first_width = None
|
||||||
|
|
||||||
|
for test_str in test_strings:
|
||||||
|
text_obj = Text(test_str, mono_font)
|
||||||
|
if first_width is None:
|
||||||
|
first_width = text_obj.width
|
||||||
|
elif abs(text_obj.width - first_width) > 2: # Allow 2px tolerance
|
||||||
|
all_same_width = False
|
||||||
|
|
||||||
|
print(f" '{test_str[0]}' × {len(test_str)}: {text_obj.width}px")
|
||||||
|
|
||||||
|
if all_same_width:
|
||||||
|
print(f" ✓ ALL {capacity}-character strings have the same width!")
|
||||||
|
else:
|
||||||
|
print(f" ⚠ Some variance detected (font may not be perfectly mono-space)")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(" No mono-space font found - showing theoretical values:")
|
||||||
|
mono_char_width = 8 # Typical mono-space width
|
||||||
|
capacity = line_width // mono_char_width
|
||||||
|
|
||||||
|
print(f" Each character: {mono_char_width}px (theoretical)")
|
||||||
|
print(f" Line capacity: {capacity} characters")
|
||||||
|
print(f" ANY {capacity}-character string would fit!")
|
||||||
|
print(f" Layout calculations become simple math")
|
||||||
|
|
||||||
|
print("\n5. Line Fitting Test:")
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
# Test actual line fitting
|
||||||
|
line = Line(
|
||||||
|
spacing=(2, 4),
|
||||||
|
origin=(0, 0),
|
||||||
|
size=(line_width, 20),
|
||||||
|
font=font,
|
||||||
|
halign=Alignment.LEFT
|
||||||
|
)
|
||||||
|
|
||||||
|
test_word = "development" # 11 characters
|
||||||
|
word_obj = Text(test_word, font)
|
||||||
|
|
||||||
|
print(f" Test word: '{test_word}' ({len(test_word)} chars, {word_obj.width}px)")
|
||||||
|
print(f" Line width: {line_width}px")
|
||||||
|
|
||||||
|
result = line.add_word(test_word, font)
|
||||||
|
|
||||||
|
if result is None:
|
||||||
|
print(" Result: Word fits completely")
|
||||||
|
else:
|
||||||
|
if line.text_objects:
|
||||||
|
added = line.text_objects[0].text
|
||||||
|
print(f" Result: Added '{added}', remaining '{result}'")
|
||||||
|
else:
|
||||||
|
print(" Result: Word rejected completely")
|
||||||
|
|
||||||
|
# Use actual mono font width if available, otherwise theoretical
|
||||||
|
if mono_font:
|
||||||
|
actual_mono_width = mono_widths['M']
|
||||||
|
print(f"\n With actual mono-space ({actual_mono_width}px/char):")
|
||||||
|
print(f" Word would be: {len(test_word)} × {actual_mono_width} = {len(test_word) * actual_mono_width}px")
|
||||||
|
|
||||||
|
if len(test_word) * actual_mono_width <= line_width:
|
||||||
|
print(" → Would fit completely")
|
||||||
|
else:
|
||||||
|
chars_that_fit = line_width // actual_mono_width
|
||||||
|
print(f" → Would need breaking after {chars_that_fit} characters")
|
||||||
|
else:
|
||||||
|
theoretical_mono_width = 8
|
||||||
|
print(f"\n With theoretical mono-space ({theoretical_mono_width}px/char):")
|
||||||
|
print(f" Word would be: {len(test_word)} × {theoretical_mono_width} = {len(test_word) * theoretical_mono_width}px")
|
||||||
|
|
||||||
|
if len(test_word) * theoretical_mono_width <= line_width:
|
||||||
|
print(" → Would fit completely")
|
||||||
|
else:
|
||||||
|
chars_that_fit = line_width // theoretical_mono_width
|
||||||
|
print(f" → Would need breaking after {chars_that_fit} characters")
|
||||||
|
|
||||||
|
print("\n=== Conclusion ===")
|
||||||
|
print("Mono-space fonts make testing predictable because:")
|
||||||
|
print("- Character width is constant")
|
||||||
|
print("- Line capacity is calculable")
|
||||||
|
print("- Word fitting is based on character count")
|
||||||
|
print("- Layout behavior is deterministic")
|
||||||
|
|
||||||
|
# Check if test_output directory exists, if so save a simple visual
|
||||||
|
import os
|
||||||
|
if os.path.exists("test_output"):
|
||||||
|
print(f"\nCreating visual test output...")
|
||||||
|
|
||||||
|
# Create a simple line rendering test
|
||||||
|
from pyWebLayout.concrete.page import Page, Container
|
||||||
|
|
||||||
|
page = Page(size=(400, 200))
|
||||||
|
|
||||||
|
container = Container(
|
||||||
|
origin=(0, 0),
|
||||||
|
size=(380, 180),
|
||||||
|
direction='vertical',
|
||||||
|
spacing=5,
|
||||||
|
padding=(10, 10, 10, 10)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add title
|
||||||
|
title = Text("Character Width Variance Demo", font)
|
||||||
|
container.add_child(title)
|
||||||
|
|
||||||
|
# Add test lines showing different characters
|
||||||
|
for char_type, char in [("Narrow", "i"), ("Wide", "W"), ("Average", "n")]:
|
||||||
|
line_text = f"{char_type}: {char * 10}"
|
||||||
|
text_obj = Text(line_text, font)
|
||||||
|
container.add_child(text_obj)
|
||||||
|
|
||||||
|
page.add_child(container)
|
||||||
|
image = page.render()
|
||||||
|
|
||||||
|
output_path = os.path.join("test_output", "monospace_demo.png")
|
||||||
|
image.save(output_path)
|
||||||
|
print(f"Visual demo saved to: {output_path}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -26,7 +26,6 @@ class TestWord(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(word.text, "hello")
|
self.assertEqual(word.text, "hello")
|
||||||
self.assertEqual(word.style, self.font)
|
self.assertEqual(word.style, self.font)
|
||||||
self.assertEqual(word.background, self.font.background)
|
|
||||||
self.assertIsNone(word.previous)
|
self.assertIsNone(word.previous)
|
||||||
self.assertIsNone(word.next)
|
self.assertIsNone(word.next)
|
||||||
self.assertIsNone(word.hyphenated_parts)
|
self.assertIsNone(word.hyphenated_parts)
|
||||||
@ -252,27 +251,6 @@ class TestWord(unittest.TestCase):
|
|||||||
for i, expected_part in enumerate(expected_parts):
|
for i, expected_part in enumerate(expected_parts):
|
||||||
self.assertEqual(word.get_hyphenated_part(i), expected_part)
|
self.assertEqual(word.get_hyphenated_part(i), expected_part)
|
||||||
|
|
||||||
def test_word_create_and_add_to_with_container_style(self):
|
|
||||||
"""Test Word.create_and_add_to with container that has style property."""
|
|
||||||
# Create mock container with style and add_word method
|
|
||||||
mock_container = Mock()
|
|
||||||
mock_container.style = self.font
|
|
||||||
mock_container.add_word = Mock()
|
|
||||||
# Ensure _words and background don't interfere
|
|
||||||
del mock_container._words
|
|
||||||
del mock_container.background # Remove background so it inherits from font
|
|
||||||
|
|
||||||
# Create and add word
|
|
||||||
word = Word.create_and_add_to("hello", mock_container)
|
|
||||||
|
|
||||||
# Test that word was created with correct properties
|
|
||||||
self.assertIsInstance(word, Word)
|
|
||||||
self.assertEqual(word.text, "hello")
|
|
||||||
self.assertEqual(word.style, self.font)
|
|
||||||
self.assertEqual(word.background, self.font.background)
|
|
||||||
|
|
||||||
# Test that add_word was called
|
|
||||||
mock_container.add_word.assert_called_once_with(word)
|
|
||||||
|
|
||||||
def test_word_create_and_add_to_with_style_override(self):
|
def test_word_create_and_add_to_with_style_override(self):
|
||||||
"""Test Word.create_and_add_to with explicit style parameter."""
|
"""Test Word.create_and_add_to with explicit style parameter."""
|
||||||
|
|||||||
@ -398,22 +398,22 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
|
|||||||
</div>
|
</div>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Initially empty font registry
|
# Initially empty style registry
|
||||||
initial_font_count = len(self.doc._fonts)
|
initial_style_count = self.doc.get_style_registry().get_style_count()
|
||||||
|
|
||||||
# Parse HTML with document context
|
# Parse HTML with document context
|
||||||
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
|
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
|
||||||
|
|
||||||
# Should have created fonts for different styles
|
# Should have created styles for different formatting
|
||||||
final_font_count = len(self.doc._fonts)
|
final_style_count = self.doc.get_style_registry().get_style_count()
|
||||||
self.assertGreater(final_font_count, initial_font_count,
|
self.assertGreater(final_style_count, initial_style_count,
|
||||||
"Should have created fonts in registry")
|
"Should have created styles in registry")
|
||||||
|
|
||||||
# Should have created blocks
|
# Should have created blocks
|
||||||
self.assertGreater(len(blocks), 0, "Should have created blocks")
|
self.assertGreater(len(blocks), 0, "Should have created blocks")
|
||||||
|
|
||||||
def test_font_registry_reuses_fonts(self):
|
def test_font_registry_reuses_fonts(self):
|
||||||
"""Test that parsing same content reuses existing fonts."""
|
"""Test that parsing same content reuses existing styles."""
|
||||||
html_content = """
|
html_content = """
|
||||||
<div>
|
<div>
|
||||||
<p>This is <strong>bold text</strong> and <em>italic text</em>.</p>
|
<p>This is <strong>bold text</strong> and <em>italic text</em>.</p>
|
||||||
@ -423,43 +423,43 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
|
|||||||
|
|
||||||
# First parse
|
# First parse
|
||||||
blocks1 = parse_html_string(html_content, self.base_font, document=self.doc)
|
blocks1 = parse_html_string(html_content, self.base_font, document=self.doc)
|
||||||
first_parse_font_count = len(self.doc._fonts)
|
first_parse_style_count = self.doc.get_style_registry().get_style_count()
|
||||||
|
|
||||||
# Second parse with same content
|
# Second parse with same content
|
||||||
blocks2 = parse_html_string(html_content, self.base_font, document=self.doc)
|
blocks2 = parse_html_string(html_content, self.base_font, document=self.doc)
|
||||||
second_parse_font_count = len(self.doc._fonts)
|
second_parse_style_count = self.doc.get_style_registry().get_style_count()
|
||||||
|
|
||||||
# Font count should not increase on second parse
|
# Style count should not increase on second parse
|
||||||
self.assertEqual(first_parse_font_count, second_parse_font_count,
|
self.assertEqual(first_parse_style_count, second_parse_style_count,
|
||||||
"Should reuse existing fonts instead of creating new ones")
|
"Should reuse existing styles instead of creating new ones")
|
||||||
|
|
||||||
# Both parses should create same number of blocks
|
# Both parses should create same number of blocks
|
||||||
self.assertEqual(len(blocks1), len(blocks2),
|
self.assertEqual(len(blocks1), len(blocks2),
|
||||||
"Should create same structure on both parses")
|
"Should create same structure on both parses")
|
||||||
|
|
||||||
def test_font_registry_different_styles_create_different_fonts(self):
|
def test_font_registry_different_styles_create_different_fonts(self):
|
||||||
"""Test that different styles create different font objects."""
|
"""Test that different styles create different style objects."""
|
||||||
# Create fonts with different properties
|
# Create styles with different properties
|
||||||
font1 = self.doc.get_or_create_font(
|
style_id1, style1 = self.doc.get_or_create_style(
|
||||||
font_size=14, colour=(255, 0, 0), weight=FontWeight.BOLD
|
font_size=14, color=(255, 0, 0), font_weight=FontWeight.BOLD
|
||||||
)
|
)
|
||||||
font2 = self.doc.get_or_create_font(
|
style_id2, style2 = self.doc.get_or_create_style(
|
||||||
font_size=16, colour=(255, 0, 0), weight=FontWeight.BOLD
|
font_size=16, color=(255, 0, 0), font_weight=FontWeight.BOLD
|
||||||
)
|
)
|
||||||
font3 = self.doc.get_or_create_font(
|
style_id3, style3 = self.doc.get_or_create_style(
|
||||||
font_size=14, colour=(0, 255, 0), weight=FontWeight.BOLD
|
font_size=14, color=(0, 255, 0), font_weight=FontWeight.BOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
# Should be different objects
|
# Should be different style IDs
|
||||||
self.assertIsNot(font1, font2, "Different sizes should create different fonts")
|
self.assertNotEqual(style_id1, style_id2, "Different sizes should create different styles")
|
||||||
self.assertIsNot(font1, font3, "Different colors should create different fonts")
|
self.assertNotEqual(style_id1, style_id3, "Different colors should create different styles")
|
||||||
self.assertIsNot(font2, font3, "All fonts should be different")
|
self.assertNotEqual(style_id2, style_id3, "All styles should be different")
|
||||||
|
|
||||||
# Should have 3 fonts in registry
|
# Should have multiple styles in registry
|
||||||
self.assertEqual(len(self.doc._fonts), 3)
|
self.assertGreaterEqual(self.doc.get_style_registry().get_style_count(), 3)
|
||||||
|
|
||||||
def test_font_registry_integration_with_html_styles(self):
|
def test_font_registry_integration_with_html_styles(self):
|
||||||
"""Test that HTML parsing uses font registry for styled content."""
|
"""Test that HTML parsing uses style registry for styled content."""
|
||||||
html_content = """
|
html_content = """
|
||||||
<p>Normal text with <strong>bold</strong> and <em>italic</em> and
|
<p>Normal text with <strong>bold</strong> and <em>italic</em> and
|
||||||
<span style="color: red;">red text</span>.</p>
|
<span style="color: red;">red text</span>.</p>
|
||||||
@ -485,14 +485,17 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
|
|||||||
self.assertGreater(len(italic_words), 0, "Should have italic words")
|
self.assertGreater(len(italic_words), 0, "Should have italic words")
|
||||||
self.assertGreater(len(red_words), 0, "Should have red words")
|
self.assertGreater(len(red_words), 0, "Should have red words")
|
||||||
|
|
||||||
# Font registry should contain multiple fonts for different styles
|
# Style registry should contain multiple styles for different formatting
|
||||||
self.assertGreater(len(self.doc._fonts), 1,
|
self.assertGreater(self.doc.get_style_registry().get_style_count(), 1,
|
||||||
"Should have multiple fonts for different styles")
|
"Should have multiple styles for different formatting")
|
||||||
|
|
||||||
def test_font_registry_without_document_context(self):
|
def test_font_registry_without_document_context(self):
|
||||||
"""Test that parsing without document context works (fallback behavior)."""
|
"""Test that parsing without document context works (fallback behavior)."""
|
||||||
html_content = "<p>This is <strong>bold text</strong>.</p>"
|
html_content = "<p>This is <strong>bold text</strong>.</p>"
|
||||||
|
|
||||||
|
# Get initial style count (should include default style)
|
||||||
|
initial_style_count = self.doc.get_style_registry().get_style_count()
|
||||||
|
|
||||||
# Parse without document context
|
# Parse without document context
|
||||||
blocks = parse_html_string(html_content, self.base_font)
|
blocks = parse_html_string(html_content, self.base_font)
|
||||||
|
|
||||||
@ -500,12 +503,13 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
|
|||||||
self.assertEqual(len(blocks), 1)
|
self.assertEqual(len(blocks), 1)
|
||||||
self.assertIsInstance(blocks[0], Paragraph)
|
self.assertIsInstance(blocks[0], Paragraph)
|
||||||
|
|
||||||
# Should not affect document's font registry
|
# Should not affect document's style registry
|
||||||
self.assertEqual(len(self.doc._fonts), 0,
|
final_style_count = self.doc.get_style_registry().get_style_count()
|
||||||
"Document font registry should remain empty")
|
self.assertEqual(final_style_count, initial_style_count,
|
||||||
|
"Document style registry should remain unchanged")
|
||||||
|
|
||||||
def test_complex_html_font_reuse(self):
|
def test_complex_html_font_reuse(self):
|
||||||
"""Test font reuse with complex HTML containing repeated styles."""
|
"""Test style reuse with complex HTML containing repeated styles."""
|
||||||
html_content = """
|
html_content = """
|
||||||
<div>
|
<div>
|
||||||
<h1>First Header</h1>
|
<h1>First Header</h1>
|
||||||
@ -517,21 +521,21 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
|
|||||||
|
|
||||||
# Parse content
|
# Parse content
|
||||||
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
|
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
|
||||||
font_count_after_parse = len(self.doc._fonts)
|
style_count_after_parse = self.doc.get_style_registry().get_style_count()
|
||||||
|
|
||||||
# Parse same content again
|
# Parse same content again
|
||||||
blocks2 = parse_html_string(html_content, self.base_font, document=self.doc)
|
blocks2 = parse_html_string(html_content, self.base_font, document=self.doc)
|
||||||
font_count_after_second_parse = len(self.doc._fonts)
|
style_count_after_second_parse = self.doc.get_style_registry().get_style_count()
|
||||||
|
|
||||||
# Font count should not increase on second parse
|
# Style count should not increase on second parse
|
||||||
self.assertEqual(font_count_after_parse, font_count_after_second_parse,
|
self.assertEqual(style_count_after_parse, style_count_after_second_parse,
|
||||||
"Fonts should be reused for repeated styles")
|
"Styles should be reused for repeated formatting")
|
||||||
|
|
||||||
# Both should create same structure
|
# Both should create same structure
|
||||||
self.assertEqual(len(blocks), len(blocks2))
|
self.assertEqual(len(blocks), len(blocks2))
|
||||||
|
|
||||||
def test_font_registry_with_nested_styles(self):
|
def test_font_registry_with_nested_styles(self):
|
||||||
"""Test font registry with nested HTML styles."""
|
"""Test style registry with nested HTML styles."""
|
||||||
html_content = """
|
html_content = """
|
||||||
<p>Text with <strong>bold and <em>bold italic</em> nested</strong> styles.</p>
|
<p>Text with <strong>bold and <em>bold italic</em> nested</strong> styles.</p>
|
||||||
"""
|
"""
|
||||||
@ -539,7 +543,7 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
|
|||||||
# Parse content
|
# Parse content
|
||||||
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
|
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
|
||||||
|
|
||||||
# Should create fonts for different style combinations
|
# Should create styles for different style combinations
|
||||||
paragraph = blocks[0]
|
paragraph = blocks[0]
|
||||||
words = list(paragraph.words())
|
words = list(paragraph.words())
|
||||||
|
|
||||||
@ -551,9 +555,9 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
|
|||||||
self.assertGreater(len(bold_italic_words), 0,
|
self.assertGreater(len(bold_italic_words), 0,
|
||||||
"Should have words with combined bold+italic style")
|
"Should have words with combined bold+italic style")
|
||||||
|
|
||||||
# Should have multiple fonts in registry for different combinations
|
# Should have multiple styles in registry for different combinations
|
||||||
self.assertGreater(len(self.doc._fonts), 1,
|
self.assertGreater(self.doc.get_style_registry().get_style_count(), 1,
|
||||||
"Should create separate fonts for style combinations")
|
"Should create separate styles for style combinations")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
1097
tests/test_position_system.py
Normal file
1097
tests/test_position_system.py
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user