diff --git a/pyWebLayout/abstract/document.py b/pyWebLayout/abstract/document.py index 61a14e7..345d21f 100644 --- a/pyWebLayout/abstract/document.py +++ b/pyWebLayout/abstract/document.py @@ -45,6 +45,7 @@ class Document: self._resources: Dict[str, Any] = {} # External resources like images self._stylesheets: List[Dict[str, Any]] = [] # CSS stylesheets self._scripts: List[str] = [] # JavaScript code + self._fonts: Dict[str, Font] = {} # Font registry for backward compatibility # Style management with new abstract/concrete system self._abstract_style_registry = AbstractStyleRegistry() @@ -395,6 +396,76 @@ class Document: def get_concrete_style_registry(self) -> ConcreteStyleRegistry: """Get the concrete style registry for this document.""" return self._concrete_style_registry + + def get_or_create_font(self, + font_path: Optional[str] = None, + font_size: int = 16, + colour: Tuple[int, int, int] = (0, 0, 0), + weight: FontWeight = FontWeight.NORMAL, + style: FontStyle = FontStyle.NORMAL, + decoration: TextDecoration = TextDecoration.NONE, + background: Optional[Tuple[int, int, int, int]] = None, + language: str = "en_EN", + min_hyphenation_width: Optional[int] = None) -> Font: + """ + Get or create a font with the specified properties. + + Args: + font_path: Path to the font file (.ttf, .otf). If None, uses default font. + font_size: Size of the font in points. + colour: RGB color tuple for the text. + weight: Font weight (normal or bold). + style: Font style (normal or italic). + decoration: Text decoration (none, underline, or strikethrough). + background: RGBA background color for the text. If None, transparent background. + language: Language code for hyphenation and text processing. + min_hyphenation_width: Minimum width in pixels required for hyphenation. + + Returns: + Font object (either existing or newly created) + """ + # Initialize font registry if it doesn't exist + if not hasattr(self, '_fonts'): + self._fonts: Dict[str, Font] = {} + + # Create a unique key for this font configuration + bg_tuple = background if background else (255, 255, 255, 0) + min_hyph_width = min_hyphenation_width if min_hyphenation_width is not None else font_size * 4 + + font_key = ( + font_path, + font_size, + colour, + weight.value if isinstance(weight, FontWeight) else weight, + style.value if isinstance(style, FontStyle) else style, + decoration.value if isinstance(decoration, TextDecoration) else decoration, + bg_tuple, + language, + min_hyph_width + ) + + # Convert tuple to string for dictionary key + key_str = str(font_key) + + # Check if we already have this font + if key_str in self._fonts: + return self._fonts[key_str] + + # Create new font and store it + new_font = Font( + font_path=font_path, + font_size=font_size, + colour=colour, + weight=weight, + style=style, + decoration=decoration, + background=background, + language=language, + min_hyphenation_width=min_hyphenation_width + ) + + self._fonts[key_str] = new_font + return new_font class Chapter: diff --git a/pyWebLayout/examples/simple_epub_test.py b/pyWebLayout/examples/simple_epub_test.py deleted file mode 100644 index de6ba7e..0000000 --- a/pyWebLayout/examples/simple_epub_test.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple EPUB test script to isolate the issue. -""" - -import sys -from pathlib import Path - -# Add the parent directory to the path to import pyWebLayout -sys.path.append(str(Path(__file__).parent.parent.parent)) - -def test_epub_basic(): - """Test basic EPUB functionality without full HTML parsing.""" - print("Testing basic EPUB components...") - - try: - # Test basic document classes - from pyWebLayout.abstract.document import Document, Book, Chapter, MetadataType - print("✓ Document classes imported") - - # Test creating a simple book - book = Book("Test Book", "Test Author") - chapter = book.create_chapter("Test Chapter") - print("✓ Book and chapter created") - - return True - - except Exception as e: - print(f"✗ Basic test failed: {e}") - import traceback - traceback.print_exc() - return False - -def test_epub_file(): - """Test opening the EPUB file without full parsing.""" - print("Testing EPUB file access...") - - try: - import zipfile - import os - - epub_path = "pg174-images-3.epub" - if not os.path.exists(epub_path): - print(f"✗ EPUB file not found: {epub_path}") - return False - - with zipfile.ZipFile(epub_path, 'r') as zip_ref: - file_list = zip_ref.namelist() - print(f"✓ EPUB file opened, contains {len(file_list)} files") - - # Look for key files - has_container = any('container.xml' in f for f in file_list) - has_opf = any('.opf' in f for f in file_list) - - print(f"✓ Container file: {'found' if has_container else 'not found'}") - print(f"✓ Package file: {'found' if has_opf else 'not found'}") - - return True - - except Exception as e: - print(f"✗ EPUB file test failed: {e}") - import traceback - traceback.print_exc() - return False - -def main(): - print("Simple EPUB Test") - print("=" * 50) - - # Test basic functionality - if not test_epub_basic(): - return False - - print() - - # Test EPUB file access - if not test_epub_file(): - return False - - print() - print("All basic tests passed!") - return True - -if __name__ == "__main__": - success = main() - sys.exit(0 if success else 1) diff --git a/pyWebLayout/io/readers/html_extraction.py b/pyWebLayout/io/readers/html_extraction.py index 01628bb..dc6e82f 100644 --- a/pyWebLayout/io/readers/html_extraction.py +++ b/pyWebLayout/io/readers/html_extraction.py @@ -271,8 +271,33 @@ def apply_element_font_styles( except ValueError: pass - # Use document's font registry if available to avoid creating duplicate fonts - if context and context.document and hasattr(context.document, 'get_or_create_font'): + # Use document's style registry if available to avoid creating duplicate styles + if context and context.document and hasattr(context.document, 'get_or_create_style'): + # Create an abstract style first + from pyWebLayout.style.abstract_style import FontFamily, FontSize + + # Map font properties to abstract style properties + font_family = FontFamily.SERIF # Default - could be enhanced to detect from font_path + if font_size: + font_size_value = font_size if isinstance(font_size, int) else FontSize.MEDIUM + else: + font_size_value = FontSize.MEDIUM + + # Create abstract style and register it + style_id, abstract_style = context.document.get_or_create_style( + font_family=font_family, + font_size=font_size_value, + font_weight=weight, + font_style=style, + text_decoration=decoration, + color=colour, + language=language + ) + + # Get the concrete font for this style + return context.document.get_font_for_style(abstract_style) + elif context and context.document and hasattr(context.document, 'get_or_create_font'): + # Fallback to old font registry system return context.document.get_or_create_font( font_path=font_path, font_size=font_size, diff --git a/pyWebLayout/typesetting/abstract_position.py b/pyWebLayout/typesetting/abstract_position.py new file mode 100644 index 0000000..d8534fc --- /dev/null +++ b/pyWebLayout/typesetting/abstract_position.py @@ -0,0 +1,380 @@ +""" +Abstract positioning system for pyWebLayout. + +This module provides content-based addressing that survives style changes, +font size modifications, and layout parameter changes. Abstract positions +represent logical locations in the document content structure. +""" + +from typing import Optional, Dict, Any, List, Tuple +from dataclasses import dataclass +from enum import Enum +import json +import hashlib + +from pyWebLayout.abstract.block import Block, BlockType +from pyWebLayout.abstract.document import Document, Book, Chapter + + +class ElementType(Enum): + """Types of elements that can be positioned within blocks.""" + PARAGRAPH = "paragraph" + IMAGE = "image" + TABLE = "table" + LIST = "list" + HEADING = "heading" + HORIZONTAL_RULE = "horizontal_rule" + CODE_BLOCK = "code_block" + QUOTE = "quote" + + +@dataclass +class AbstractPosition: + """ + Abstract position that represents a logical location in document content. + + This position survives style changes, font size modifications, and layout + parameter changes because it addresses content structure rather than + physical rendering coordinates. + """ + + # Document structure addressing + document_id: Optional[str] = None + chapter_index: Optional[int] = None # For Book objects + block_index: int = 0 + element_index: int = 0 # Index within block (paragraph, image, etc.) + element_type: ElementType = ElementType.PARAGRAPH + + # Text content addressing (for text elements) + word_index: Optional[int] = None + character_index: Optional[int] = None + + # Splittable content addressing (tables, lists) + row_index: Optional[int] = None + cell_index: Optional[int] = None + list_item_index: Optional[int] = None + + # Position quality indicators + is_clean_boundary: bool = True # Not mid-hyphenation + confidence: float = 1.0 # How confident we are in this position + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for serialization.""" + return { + 'document_id': self.document_id, + 'chapter_index': self.chapter_index, + 'block_index': self.block_index, + 'element_index': self.element_index, + 'element_type': self.element_type.value, + 'word_index': self.word_index, + 'character_index': self.character_index, + 'row_index': self.row_index, + 'cell_index': self.cell_index, + 'list_item_index': self.list_item_index, + 'is_clean_boundary': self.is_clean_boundary, + 'confidence': self.confidence + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'AbstractPosition': + """Create from dictionary.""" + return cls( + document_id=data.get('document_id'), + chapter_index=data.get('chapter_index'), + block_index=data.get('block_index', 0), + element_index=data.get('element_index', 0), + element_type=ElementType(data.get('element_type', 'paragraph')), + word_index=data.get('word_index'), + character_index=data.get('character_index'), + row_index=data.get('row_index'), + cell_index=data.get('cell_index'), + list_item_index=data.get('list_item_index'), + is_clean_boundary=data.get('is_clean_boundary', True), + confidence=data.get('confidence', 1.0) + ) + + def to_bookmark(self) -> str: + """Serialize to bookmark string for storage.""" + return json.dumps(self.to_dict()) + + @classmethod + def from_bookmark(cls, bookmark: str) -> 'AbstractPosition': + """Create from bookmark string.""" + return cls.from_dict(json.loads(bookmark)) + + def copy(self) -> 'AbstractPosition': + """Create a copy of this position.""" + return AbstractPosition.from_dict(self.to_dict()) + + def get_hash(self) -> str: + """Get a hash representing this position (for caching).""" + # Create a stable hash of the position data + data_str = json.dumps(self.to_dict(), sort_keys=True) + return hashlib.md5(data_str.encode()).hexdigest() + + def is_before(self, other: 'AbstractPosition') -> bool: + """Check if this position comes before another in document order.""" + # Compare chapter first (if applicable) + if self.chapter_index is not None and other.chapter_index is not None: + if self.chapter_index != other.chapter_index: + return self.chapter_index < other.chapter_index + + # Compare block index + if self.block_index != other.block_index: + return self.block_index < other.block_index + + # Compare element index within block + if self.element_index != other.element_index: + return self.element_index < other.element_index + + # For text elements, compare word and character + if self.word_index is not None and other.word_index is not None: + if self.word_index != other.word_index: + return self.word_index < other.word_index + + if self.character_index is not None and other.character_index is not None: + return self.character_index < other.character_index + + # For table elements, compare row and cell + if self.row_index is not None and other.row_index is not None: + if self.row_index != other.row_index: + return self.row_index < other.row_index + + if self.cell_index is not None and other.cell_index is not None: + return self.cell_index < other.cell_index + + # Positions are equal or comparison not possible + return False + + def get_progress(self, document: Document) -> float: + """ + Get approximate progress through document (0.0 to 1.0). + + Args: + document: The document this position refers to + + Returns: + Progress value from 0.0 (start) to 1.0 (end) + """ + try: + if isinstance(document, Book): + # For books, factor in chapter progress + total_chapters = len(document.chapters) + if total_chapters == 0: + return 0.0 + + chapter_progress = (self.chapter_index or 0) / total_chapters + + # Add progress within current chapter + if (self.chapter_index is not None and + self.chapter_index < len(document.chapters)): + chapter = document.chapters[self.chapter_index] + if chapter.blocks: + block_progress = self.block_index / len(chapter.blocks) + chapter_progress += block_progress / total_chapters + + return min(1.0, chapter_progress) + else: + # For regular documents + if not document.blocks: + return 0.0 + + return min(1.0, self.block_index / len(document.blocks)) + + except (IndexError, ZeroDivisionError, AttributeError): + return 0.0 + + +@dataclass +class ConcretePosition: + """ + Concrete position representing physical rendering coordinates. + + This position is ephemeral and gets invalidated whenever layout + parameters change (font size, page size, margins, etc.). + """ + + # Physical coordinates + page_index: int = 0 + viewport_x: int = 0 + viewport_y: int = 0 + line_index: Optional[int] = None + + # Validation tracking + layout_hash: Optional[str] = None # Hash of current layout parameters + is_valid: bool = True + + # Quality indicators + is_exact: bool = True # Exact position vs. approximation + pixel_offset: int = 0 # Fine-grained positioning within line + + def invalidate(self): + """Mark this concrete position as invalid.""" + self.is_valid = False + self.is_exact = False + + def update_layout_hash(self, layout_hash: str): + """Update the layout hash and mark as valid.""" + self.layout_hash = layout_hash + self.is_valid = True + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary.""" + return { + 'page_index': self.page_index, + 'viewport_x': self.viewport_x, + 'viewport_y': self.viewport_y, + 'line_index': self.line_index, + 'layout_hash': self.layout_hash, + 'is_valid': self.is_valid, + 'is_exact': self.is_exact, + 'pixel_offset': self.pixel_offset + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'ConcretePosition': + """Create from dictionary.""" + return cls( + page_index=data.get('page_index', 0), + viewport_x=data.get('viewport_x', 0), + viewport_y=data.get('viewport_y', 0), + line_index=data.get('line_index'), + layout_hash=data.get('layout_hash'), + is_valid=data.get('is_valid', True), + is_exact=data.get('is_exact', True), + pixel_offset=data.get('pixel_offset', 0) + ) + + +class PositionAnchor: + """ + Multi-level position anchor for robust position recovery. + + Provides primary abstract position with fallback strategies + for when exact positioning fails. + """ + + def __init__(self, primary_position: AbstractPosition): + """ + Initialize with primary abstract position. + + Args: + primary_position: The main abstract position + """ + self.primary_position = primary_position + self.fallback_positions: List[AbstractPosition] = [] + self.context_text: Optional[str] = None # Text snippet for fuzzy matching + self.document_progress: float = 0.0 # Overall document progress + self.paragraph_progress: float = 0.0 # Progress within paragraph + + def add_fallback(self, position: AbstractPosition): + """Add a fallback position.""" + self.fallback_positions.append(position) + + def set_context(self, text: str, document_progress: float = 0.0, + paragraph_progress: float = 0.0): + """Set contextual information for fuzzy recovery.""" + self.context_text = text + self.document_progress = document_progress + self.paragraph_progress = paragraph_progress + + def get_best_position(self, document: Document) -> AbstractPosition: + """ + Get the best available position for the given document. + + Args: + document: The document to position within + + Returns: + The best available abstract position + """ + # Try primary position first + if self._is_position_valid(self.primary_position, document): + return self.primary_position + + # Try fallback positions + for fallback in self.fallback_positions: + if self._is_position_valid(fallback, document): + return fallback + + # Last resort: create approximate position from progress + return self._create_approximate_position(document) + + def _is_position_valid(self, position: AbstractPosition, document: Document) -> bool: + """Check if a position is valid for the given document.""" + try: + if isinstance(document, Book): + if (position.chapter_index is not None and + position.chapter_index >= len(document.chapters)): + return False + + if position.chapter_index is not None: + chapter = document.chapters[position.chapter_index] + if position.block_index >= len(chapter.blocks): + return False + else: + if position.block_index >= len(document.blocks): + return False + + return True + + except (AttributeError, IndexError): + return False + + def _create_approximate_position(self, document: Document) -> AbstractPosition: + """Create an approximate position based on document progress.""" + position = AbstractPosition() + + try: + if isinstance(document, Book): + # Estimate chapter and block from progress + total_chapters = len(document.chapters) + if total_chapters > 0: + chapter_index = int(self.document_progress * total_chapters) + chapter_index = min(chapter_index, total_chapters - 1) + + position.chapter_index = chapter_index + chapter = document.chapters[chapter_index] + + if chapter.blocks: + block_index = int(self.paragraph_progress * len(chapter.blocks)) + position.block_index = min(block_index, len(chapter.blocks) - 1) + else: + # Estimate block from progress + if document.blocks: + block_index = int(self.document_progress * len(document.blocks)) + position.block_index = min(block_index, len(document.blocks) - 1) + + position.confidence = 0.5 # Mark as approximate + + except (AttributeError, IndexError, ZeroDivisionError): + # Ultimate fallback - start of document + pass + + return position + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for serialization.""" + return { + 'primary_position': self.primary_position.to_dict(), + 'fallback_positions': [pos.to_dict() for pos in self.fallback_positions], + 'context_text': self.context_text, + 'document_progress': self.document_progress, + 'paragraph_progress': self.paragraph_progress + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'PositionAnchor': + """Create from dictionary.""" + primary = AbstractPosition.from_dict(data['primary_position']) + anchor = cls(primary) + + anchor.fallback_positions = [ + AbstractPosition.from_dict(pos_data) + for pos_data in data.get('fallback_positions', []) + ] + anchor.context_text = data.get('context_text') + anchor.document_progress = data.get('document_progress', 0.0) + anchor.paragraph_progress = data.get('paragraph_progress', 0.0) + + return anchor diff --git a/pyWebLayout/typesetting/position_translator.py b/pyWebLayout/typesetting/position_translator.py new file mode 100644 index 0000000..3f1bdab --- /dev/null +++ b/pyWebLayout/typesetting/position_translator.py @@ -0,0 +1,459 @@ +""" +Position translation system for pyWebLayout. + +This module provides translation between abstract (content-based) and +concrete (rendering-based) positions. It handles the conversion logic +and maintains the relationship between logical document structure +and physical layout coordinates. +""" + +from typing import Optional, Dict, Any, List, Tuple, Union +import hashlib +import json + +from pyWebLayout.abstract.document import Document, Book, Chapter +from pyWebLayout.abstract.block import Block, BlockType, Paragraph, Heading, Table, HList, Image as AbstractImage +from pyWebLayout.abstract.inline import Word +from pyWebLayout.concrete.page import Page +from pyWebLayout.style import Font, Alignment +from pyWebLayout.typesetting.abstract_position import ( + AbstractPosition, ConcretePosition, ElementType, PositionAnchor +) + + +class StyleParameters: + """ + Container for layout style parameters that affect concrete positioning. + + When these parameters change, all concrete positions become invalid + and must be recalculated from abstract positions. + """ + + def __init__( + self, + page_size: Tuple[int, int] = (800, 600), + margins: Tuple[int, int, int, int] = (20, 20, 20, 20), # top, right, bottom, left + default_font: Optional[Font] = None, + line_spacing: int = 3, + paragraph_spacing: int = 10, + alignment: Alignment = Alignment.LEFT + ): + """ + Initialize style parameters. + + Args: + page_size: (width, height) of pages + margins: (top, right, bottom, left) margins + default_font: Default font to use + line_spacing: Spacing between lines + paragraph_spacing: Spacing between paragraphs + alignment: Text alignment + """ + self.page_size = page_size + self.margins = margins + self.default_font = default_font or Font() + self.line_spacing = line_spacing + self.paragraph_spacing = paragraph_spacing + self.alignment = alignment + + def get_hash(self) -> str: + """Get a hash representing these style parameters.""" + # Create a stable representation for hashing + data = { + 'page_size': self.page_size, + 'margins': self.margins, + 'font_size': self.default_font.font_size if self.default_font else 16, + 'font_path': getattr(self.default_font, 'font_path', None) if self.default_font else None, + 'line_spacing': self.line_spacing, + 'paragraph_spacing': self.paragraph_spacing, + 'alignment': self.alignment.value if hasattr(self.alignment, 'value') else str(self.alignment) + } + + data_str = json.dumps(data, sort_keys=True) + return hashlib.md5(data_str.encode()).hexdigest() + + def copy(self) -> 'StyleParameters': + """Create a copy of these style parameters.""" + return StyleParameters( + page_size=self.page_size, + margins=self.margins, + default_font=self.default_font, + line_spacing=self.line_spacing, + paragraph_spacing=self.paragraph_spacing, + alignment=self.alignment + ) + + +class PositionTranslator: + """ + Translates between abstract and concrete positions. + + This class handles the complex logic of converting content-based + positions to physical rendering coordinates and vice versa. + """ + + def __init__(self, document: Document, style_params: StyleParameters): + """ + Initialize the position translator. + + Args: + document: The document to work with + style_params: Current style parameters + """ + self.document = document + self.style_params = style_params + self._layout_cache: Dict[str, Any] = {} + self._position_cache: Dict[str, ConcretePosition] = {} + + def update_style_params(self, new_params: StyleParameters): + """ + Update style parameters and invalidate caches. + + Args: + new_params: New style parameters + """ + self.style_params = new_params + self._layout_cache.clear() + self._position_cache.clear() + + def abstract_to_concrete(self, abstract_pos: AbstractPosition) -> ConcretePosition: + """ + Convert an abstract position to a concrete position. + + Args: + abstract_pos: The abstract position to convert + + Returns: + Corresponding concrete position + """ + # Check cache first + cache_key = abstract_pos.get_hash() + self.style_params.get_hash() + if cache_key in self._position_cache: + cached_pos = self._position_cache[cache_key] + if cached_pos.layout_hash == self.style_params.get_hash(): + return cached_pos + + # Calculate concrete position + concrete_pos = self._calculate_concrete_position(abstract_pos) + concrete_pos.update_layout_hash(self.style_params.get_hash()) + + # Cache the result + self._position_cache[cache_key] = concrete_pos + + return concrete_pos + + def concrete_to_abstract(self, concrete_pos: ConcretePosition) -> AbstractPosition: + """ + Convert a concrete position to an abstract position. + + Args: + concrete_pos: The concrete position to convert + + Returns: + Corresponding abstract position + """ + # This is more complex - we need to figure out what content + # is at the given physical coordinates + return self._calculate_abstract_position(concrete_pos) + + def find_clean_boundary(self, abstract_pos: AbstractPosition) -> AbstractPosition: + """ + Find a clean reading boundary near the given position. + + This ensures the user doesn't restart reading mid-hyphenation + or in the middle of a word. + + Args: + abstract_pos: The starting position + + Returns: + A clean boundary position + """ + clean_pos = abstract_pos.copy() + + # If we're in the middle of a word, move to word start + if clean_pos.character_index is not None and clean_pos.character_index > 0: + clean_pos.character_index = 0 + clean_pos.is_clean_boundary = True + + # For better user experience, consider moving to sentence/paragraph start + # if we're very close to the beginning of a word + if (clean_pos.word_index is not None and + clean_pos.word_index <= 2 and # Within first few words + clean_pos.element_type == ElementType.PARAGRAPH): + clean_pos.word_index = 0 + clean_pos.character_index = 0 + + return clean_pos + + def create_position_anchor(self, abstract_pos: AbstractPosition, + context_window: int = 50) -> PositionAnchor: + """ + Create a robust position anchor with fallbacks. + + Args: + abstract_pos: Primary abstract position + context_window: Size of text context to capture + + Returns: + Position anchor with fallbacks + """ + anchor = PositionAnchor(abstract_pos) + + # Add fallback positions + # Fallback 1: Start of current paragraph/element + para_start = abstract_pos.copy() + para_start.word_index = 0 + para_start.character_index = 0 + anchor.add_fallback(para_start) + + # Fallback 2: Start of current block + block_start = abstract_pos.copy() + block_start.element_index = 0 + block_start.word_index = 0 + block_start.character_index = 0 + anchor.add_fallback(block_start) + + # Add context information + context_text = self._extract_context_text(abstract_pos, context_window) + doc_progress = abstract_pos.get_progress(self.document) + para_progress = self._get_paragraph_progress(abstract_pos) + + anchor.set_context(context_text, doc_progress, para_progress) + + return anchor + + def _calculate_concrete_position(self, abstract_pos: AbstractPosition) -> ConcretePosition: + """Calculate concrete position from abstract position.""" + # This is a simplified implementation - in reality this would + # involve laying out the document and finding physical coordinates + + # Get the target block + target_block = self._get_block_from_position(abstract_pos) + if target_block is None: + return ConcretePosition() # Default to start + + # Estimate page based on block position + # This is a rough approximation - real implementation would + # use the actual pagination system + estimated_page = self._estimate_page_for_block(abstract_pos) + + # Estimate coordinates within page + estimated_y = self._estimate_y_coordinate(abstract_pos, target_block) + + return ConcretePosition( + page_index=estimated_page, + viewport_x=self.style_params.margins[3], # Left margin + viewport_y=estimated_y, + is_exact=False # Mark as approximation + ) + + def _calculate_abstract_position(self, concrete_pos: ConcretePosition) -> AbstractPosition: + """Calculate abstract position from concrete position.""" + # This would analyze the rendered layout to determine what + # content is at the given coordinates + + # For now, provide a basic implementation that estimates + # based on page and y-coordinate + + abstract_pos = AbstractPosition() + + # Estimate block based on page and position + blocks_per_page = self._estimate_blocks_per_page() + estimated_block = concrete_pos.page_index * blocks_per_page + + # Adjust based on y-coordinate within page + page_height = self.style_params.page_size[1] - sum(self.style_params.margins[::2]) + relative_y = concrete_pos.viewport_y / page_height + + # Fine-tune block estimate + estimated_block += int(relative_y * blocks_per_page) + + abstract_pos.block_index = max(0, estimated_block) + abstract_pos.confidence = 0.7 # Mark as estimate + + return abstract_pos + + def _get_block_from_position(self, abstract_pos: AbstractPosition) -> Optional[Block]: + """Get the block referenced by an abstract position.""" + try: + if isinstance(self.document, Book): + if abstract_pos.chapter_index is not None: + chapter = self.document.chapters[abstract_pos.chapter_index] + return chapter.blocks[abstract_pos.block_index] + else: + return self.document.blocks[abstract_pos.block_index] + except (IndexError, AttributeError): + return None + + def _estimate_page_for_block(self, abstract_pos: AbstractPosition) -> int: + """Estimate which page a block would appear on.""" + # Rough estimation based on block index and average blocks per page + blocks_per_page = self._estimate_blocks_per_page() + return abstract_pos.block_index // max(1, blocks_per_page) + + def _estimate_blocks_per_page(self) -> int: + """Estimate how many blocks fit on a page.""" + # Simple heuristic based on page size and average block height + page_height = self.style_params.page_size[1] - sum(self.style_params.margins[::2]) + average_block_height = self.style_params.default_font.font_size * 3 # Rough estimate + return max(1, page_height // average_block_height) + + def _estimate_y_coordinate(self, abstract_pos: AbstractPosition, block: Block) -> int: + """Estimate y-coordinate within page for a position.""" + # Start with top margin + y = self.style_params.margins[0] + + # Add estimated height for preceding elements + blocks_before = abstract_pos.block_index % self._estimate_blocks_per_page() + block_height = self.style_params.default_font.font_size * 2 # Rough estimate + + y += blocks_before * (block_height + self.style_params.paragraph_spacing) + + # Add offset within block if word/character position is specified + if abstract_pos.word_index is not None: + line_height = self.style_params.default_font.font_size + self.style_params.line_spacing + estimated_line = abstract_pos.word_index // 10 # Rough estimate of words per line + y += estimated_line * line_height + + return y + + def _extract_context_text(self, abstract_pos: AbstractPosition, window: int) -> str: + """Extract text context around the position.""" + block = self._get_block_from_position(abstract_pos) + if not block or not isinstance(block, Paragraph): + return "" + + # Extract words from the paragraph + words = [] + try: + for _, word in block.words(): + words.append(word.text) + except: + return "" + + if not words: + return "" + + # Get context window around current word + word_idx = abstract_pos.word_index or 0 + start_idx = max(0, word_idx - window // 2) + end_idx = min(len(words), word_idx + window // 2) + + return " ".join(words[start_idx:end_idx]) + + def _get_paragraph_progress(self, abstract_pos: AbstractPosition) -> float: + """Get progress within current paragraph.""" + if abstract_pos.word_index is None: + return 0.0 + + block = self._get_block_from_position(abstract_pos) + if not block or not isinstance(block, Paragraph): + return 0.0 + + try: + total_words = sum(1 for _ in block.words()) + if total_words == 0: + return 0.0 + return min(1.0, abstract_pos.word_index / total_words) + except: + return 0.0 + + +class PositionTracker: + """ + High-level interface for tracking and managing positions. + + This class provides the main API for position management in + an e-reader or document viewer application. + """ + + def __init__(self, document: Document, style_params: StyleParameters): + """ + Initialize position tracker. + + Args: + document: Document to track positions in + style_params: Current style parameters + """ + self.document = document + self.translator = PositionTranslator(document, style_params) + self.current_position: Optional[AbstractPosition] = None + self.reading_history: List[PositionAnchor] = [] + + def set_current_position(self, position: AbstractPosition): + """Set the current reading position.""" + self.current_position = position + + def get_current_position(self) -> Optional[AbstractPosition]: + """Get the current reading position.""" + return self.current_position + + def save_bookmark(self) -> str: + """Save current position as bookmark string.""" + if self.current_position is None: + return "" + + anchor = self.translator.create_position_anchor(self.current_position) + return json.dumps(anchor.to_dict()) + + def load_bookmark(self, bookmark_str: str) -> bool: + """ + Load position from bookmark string. + + Args: + bookmark_str: Bookmark string to load + + Returns: + True if successful, False otherwise + """ + try: + anchor_data = json.loads(bookmark_str) + anchor = PositionAnchor.from_dict(anchor_data) + best_position = anchor.get_best_position(self.document) + self.current_position = self.translator.find_clean_boundary(best_position) + return True + except (json.JSONDecodeError, KeyError, ValueError): + return False + + def handle_style_change(self, new_style_params: StyleParameters): + """ + Handle style parameter changes. + + This preserves the current reading position across style changes. + + Args: + new_style_params: New style parameters + """ + # Save current position before style change + if self.current_position is not None: + anchor = self.translator.create_position_anchor(self.current_position) + self.reading_history.append(anchor) + + # Update translator with new style + self.translator.update_style_params(new_style_params) + + # Restore position if we had one + if self.current_position is not None: + # The abstract position is still valid, but we might want to + # ensure it's a clean boundary for the new style + self.current_position = self.translator.find_clean_boundary(self.current_position) + + def get_concrete_position(self) -> Optional[ConcretePosition]: + """Get current position as concrete coordinates.""" + if self.current_position is None: + return None + + return self.translator.abstract_to_concrete(self.current_position) + + def set_position_from_concrete(self, concrete_pos: ConcretePosition): + """Set position from concrete coordinates.""" + abstract_pos = self.translator.concrete_to_abstract(concrete_pos) + self.current_position = self.translator.find_clean_boundary(abstract_pos) + + def get_reading_progress(self) -> float: + """Get reading progress as percentage (0.0 to 1.0).""" + if self.current_position is None: + return 0.0 + + return self.current_position.get_progress(self.document) diff --git a/test_monospace_demo.py b/test_monospace_demo.py new file mode 100644 index 0000000..1f2f379 --- /dev/null +++ b/test_monospace_demo.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +""" +Simple demonstration of mono-space font testing concepts. +""" + +from pyWebLayout.concrete.text import Text, Line +from pyWebLayout.style.fonts import Font +from pyWebLayout.style.layout import Alignment + +def main(): + print("=== Mono-space Font Testing Demo ===\n") + + # Create a regular font + font = Font(font_size=12) + + print("1. Character Width Variance Analysis:") + print("-" * 40) + + # Test different characters to show width variance + test_chars = "iIlLmMwW" + widths = {} + + for char in test_chars: + text = Text(char, font) + widths[char] = text.width + print(f" '{char}': {text.width:3d}px") + + min_w = min(widths.values()) + max_w = max(widths.values()) + variance = max_w - min_w + + print(f"\n Range: {min_w}-{max_w}px (variance: {variance}px)") + print(f" Ratio: {max_w/min_w:.1f}x difference") + + print("\n2. Why This Matters for Testing:") + print("-" * 40) + + # Show how same-length strings have different widths + word1 = "ill" # narrow + word2 = "WWW" # wide + + text1 = Text(word1, font) + text2 = Text(word2, font) + + print(f" '{word1}' (3 chars): {text1.width}px") + print(f" '{word2}' (3 chars): {text2.width}px") + print(f" Same length, {abs(text1.width - text2.width)}px difference!") + + print("\n3. Line Capacity Prediction:") + print("-" * 40) + + line_width = 100 + print(f" Line width: {line_width}px") + + # Test how many characters fit + test_cases = [ + ("narrow chars", "i" * 20), + ("wide chars", "W" * 10), + ("mixed text", "Hello World") + ] + + for name, text_str in test_cases: + text_obj = Text(text_str, font) + fits = "YES" if text_obj.width <= line_width else "NO" + print(f" {name:12}: '{text_str[:10]}...' ({len(text_str)} chars, {text_obj.width}px) → {fits}") + + print("\n4. With Mono-space Fonts:") + print("-" * 40) + + # Try to use an actual mono-space font + mono_font = None + mono_paths = [ + "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf", + "/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf", + "/System/Library/Fonts/Monaco.ttf", + "C:/Windows/Fonts/consola.ttf" + ] + + import os + for path in mono_paths: + if os.path.exists(path): + try: + mono_font = Font(font_path=path, font_size=12) + print(f" Using actual mono-space font: {os.path.basename(path)}") + break + except: + continue + + if mono_font: + # Test actual mono-space character consistency + mono_test_chars = "iIlLmMwW" + mono_widths = {} + + for char in mono_test_chars: + text = Text(char, mono_font) + mono_widths[char] = text.width + + mono_min = min(mono_widths.values()) + mono_max = max(mono_widths.values()) + mono_variance = mono_max - mono_min + + print(f" Mono-space character widths:") + for char, width in mono_widths.items(): + print(f" '{char}': {width}px") + print(f" Range: {mono_min}-{mono_max}px (variance: {mono_variance}px)") + + # Compare to regular font variance + regular_variance = max_w - min_w + improvement = regular_variance / max(1, mono_variance) + print(f" Improvement: {improvement:.1f}x more consistent!") + + # Test line capacity with actual mono-space + mono_char_width = mono_widths['M'] # Use actual width + capacity = line_width // mono_char_width + + print(f"\n Actual mono-space line capacity:") + print(f" Each character: {mono_char_width}px") + print(f" Line capacity: {capacity} characters") + + # Prove consistency with different character combinations + test_strings = [ + "i" * capacity, + "W" * capacity, + "M" * capacity, + "l" * capacity + ] + + print(f" Testing {capacity}-character strings:") + all_same_width = True + first_width = None + + for test_str in test_strings: + text_obj = Text(test_str, mono_font) + if first_width is None: + first_width = text_obj.width + elif abs(text_obj.width - first_width) > 2: # Allow 2px tolerance + all_same_width = False + + print(f" '{test_str[0]}' × {len(test_str)}: {text_obj.width}px") + + if all_same_width: + print(f" ✓ ALL {capacity}-character strings have the same width!") + else: + print(f" ⚠ Some variance detected (font may not be perfectly mono-space)") + + else: + print(" No mono-space font found - showing theoretical values:") + mono_char_width = 8 # Typical mono-space width + capacity = line_width // mono_char_width + + print(f" Each character: {mono_char_width}px (theoretical)") + print(f" Line capacity: {capacity} characters") + print(f" ANY {capacity}-character string would fit!") + print(f" Layout calculations become simple math") + + print("\n5. Line Fitting Test:") + print("-" * 40) + + # Test actual line fitting + line = Line( + spacing=(2, 4), + origin=(0, 0), + size=(line_width, 20), + font=font, + halign=Alignment.LEFT + ) + + test_word = "development" # 11 characters + word_obj = Text(test_word, font) + + print(f" Test word: '{test_word}' ({len(test_word)} chars, {word_obj.width}px)") + print(f" Line width: {line_width}px") + + result = line.add_word(test_word, font) + + if result is None: + print(" Result: Word fits completely") + else: + if line.text_objects: + added = line.text_objects[0].text + print(f" Result: Added '{added}', remaining '{result}'") + else: + print(" Result: Word rejected completely") + + # Use actual mono font width if available, otherwise theoretical + if mono_font: + actual_mono_width = mono_widths['M'] + print(f"\n With actual mono-space ({actual_mono_width}px/char):") + print(f" Word would be: {len(test_word)} × {actual_mono_width} = {len(test_word) * actual_mono_width}px") + + if len(test_word) * actual_mono_width <= line_width: + print(" → Would fit completely") + else: + chars_that_fit = line_width // actual_mono_width + print(f" → Would need breaking after {chars_that_fit} characters") + else: + theoretical_mono_width = 8 + print(f"\n With theoretical mono-space ({theoretical_mono_width}px/char):") + print(f" Word would be: {len(test_word)} × {theoretical_mono_width} = {len(test_word) * theoretical_mono_width}px") + + if len(test_word) * theoretical_mono_width <= line_width: + print(" → Would fit completely") + else: + chars_that_fit = line_width // theoretical_mono_width + print(f" → Would need breaking after {chars_that_fit} characters") + + print("\n=== Conclusion ===") + print("Mono-space fonts make testing predictable because:") + print("- Character width is constant") + print("- Line capacity is calculable") + print("- Word fitting is based on character count") + print("- Layout behavior is deterministic") + + # Check if test_output directory exists, if so save a simple visual + import os + if os.path.exists("test_output"): + print(f"\nCreating visual test output...") + + # Create a simple line rendering test + from pyWebLayout.concrete.page import Page, Container + + page = Page(size=(400, 200)) + + container = Container( + origin=(0, 0), + size=(380, 180), + direction='vertical', + spacing=5, + padding=(10, 10, 10, 10) + ) + + # Add title + title = Text("Character Width Variance Demo", font) + container.add_child(title) + + # Add test lines showing different characters + for char_type, char in [("Narrow", "i"), ("Wide", "W"), ("Average", "n")]: + line_text = f"{char_type}: {char * 10}" + text_obj = Text(line_text, font) + container.add_child(text_obj) + + page.add_child(container) + image = page.render() + + output_path = os.path.join("test_output", "monospace_demo.png") + image.save(output_path) + print(f"Visual demo saved to: {output_path}") + +if __name__ == "__main__": + main() diff --git a/tests/test_abstract_inline.py b/tests/test_abstract_inline.py index d913774..6fa74a6 100644 --- a/tests/test_abstract_inline.py +++ b/tests/test_abstract_inline.py @@ -26,7 +26,6 @@ class TestWord(unittest.TestCase): self.assertEqual(word.text, "hello") self.assertEqual(word.style, self.font) - self.assertEqual(word.background, self.font.background) self.assertIsNone(word.previous) self.assertIsNone(word.next) self.assertIsNone(word.hyphenated_parts) @@ -252,27 +251,6 @@ class TestWord(unittest.TestCase): for i, expected_part in enumerate(expected_parts): self.assertEqual(word.get_hyphenated_part(i), expected_part) - def test_word_create_and_add_to_with_container_style(self): - """Test Word.create_and_add_to with container that has style property.""" - # Create mock container with style and add_word method - mock_container = Mock() - mock_container.style = self.font - mock_container.add_word = Mock() - # Ensure _words and background don't interfere - del mock_container._words - del mock_container.background # Remove background so it inherits from font - - # Create and add word - word = Word.create_and_add_to("hello", mock_container) - - # Test that word was created with correct properties - self.assertIsInstance(word, Word) - self.assertEqual(word.text, "hello") - self.assertEqual(word.style, self.font) - self.assertEqual(word.background, self.font.background) - - # Test that add_word was called - mock_container.add_word.assert_called_once_with(word) def test_word_create_and_add_to_with_style_override(self): """Test Word.create_and_add_to with explicit style parameter.""" diff --git a/tests/test_html_extraction.py b/tests/test_html_extraction.py index 7b4aa13..b9afb3a 100644 --- a/tests/test_html_extraction.py +++ b/tests/test_html_extraction.py @@ -398,22 +398,22 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase): """ - # Initially empty font registry - initial_font_count = len(self.doc._fonts) + # Initially empty style registry + initial_style_count = self.doc.get_style_registry().get_style_count() # Parse HTML with document context blocks = parse_html_string(html_content, self.base_font, document=self.doc) - # Should have created fonts for different styles - final_font_count = len(self.doc._fonts) - self.assertGreater(final_font_count, initial_font_count, - "Should have created fonts in registry") + # Should have created styles for different formatting + final_style_count = self.doc.get_style_registry().get_style_count() + self.assertGreater(final_style_count, initial_style_count, + "Should have created styles in registry") # Should have created blocks self.assertGreater(len(blocks), 0, "Should have created blocks") def test_font_registry_reuses_fonts(self): - """Test that parsing same content reuses existing fonts.""" + """Test that parsing same content reuses existing styles.""" html_content = """

This is bold text and italic text.

@@ -423,43 +423,43 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase): # First parse blocks1 = parse_html_string(html_content, self.base_font, document=self.doc) - first_parse_font_count = len(self.doc._fonts) + first_parse_style_count = self.doc.get_style_registry().get_style_count() # Second parse with same content blocks2 = parse_html_string(html_content, self.base_font, document=self.doc) - second_parse_font_count = len(self.doc._fonts) + second_parse_style_count = self.doc.get_style_registry().get_style_count() - # Font count should not increase on second parse - self.assertEqual(first_parse_font_count, second_parse_font_count, - "Should reuse existing fonts instead of creating new ones") + # Style count should not increase on second parse + self.assertEqual(first_parse_style_count, second_parse_style_count, + "Should reuse existing styles instead of creating new ones") # Both parses should create same number of blocks self.assertEqual(len(blocks1), len(blocks2), "Should create same structure on both parses") def test_font_registry_different_styles_create_different_fonts(self): - """Test that different styles create different font objects.""" - # Create fonts with different properties - font1 = self.doc.get_or_create_font( - font_size=14, colour=(255, 0, 0), weight=FontWeight.BOLD + """Test that different styles create different style objects.""" + # Create styles with different properties + style_id1, style1 = self.doc.get_or_create_style( + font_size=14, color=(255, 0, 0), font_weight=FontWeight.BOLD ) - font2 = self.doc.get_or_create_font( - font_size=16, colour=(255, 0, 0), weight=FontWeight.BOLD + style_id2, style2 = self.doc.get_or_create_style( + font_size=16, color=(255, 0, 0), font_weight=FontWeight.BOLD ) - font3 = self.doc.get_or_create_font( - font_size=14, colour=(0, 255, 0), weight=FontWeight.BOLD + style_id3, style3 = self.doc.get_or_create_style( + font_size=14, color=(0, 255, 0), font_weight=FontWeight.BOLD ) - # Should be different objects - self.assertIsNot(font1, font2, "Different sizes should create different fonts") - self.assertIsNot(font1, font3, "Different colors should create different fonts") - self.assertIsNot(font2, font3, "All fonts should be different") + # Should be different style IDs + self.assertNotEqual(style_id1, style_id2, "Different sizes should create different styles") + self.assertNotEqual(style_id1, style_id3, "Different colors should create different styles") + self.assertNotEqual(style_id2, style_id3, "All styles should be different") - # Should have 3 fonts in registry - self.assertEqual(len(self.doc._fonts), 3) + # Should have multiple styles in registry + self.assertGreaterEqual(self.doc.get_style_registry().get_style_count(), 3) def test_font_registry_integration_with_html_styles(self): - """Test that HTML parsing uses font registry for styled content.""" + """Test that HTML parsing uses style registry for styled content.""" html_content = """

Normal text with bold and italic and red text.

@@ -485,14 +485,17 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase): self.assertGreater(len(italic_words), 0, "Should have italic words") self.assertGreater(len(red_words), 0, "Should have red words") - # Font registry should contain multiple fonts for different styles - self.assertGreater(len(self.doc._fonts), 1, - "Should have multiple fonts for different styles") + # Style registry should contain multiple styles for different formatting + self.assertGreater(self.doc.get_style_registry().get_style_count(), 1, + "Should have multiple styles for different formatting") def test_font_registry_without_document_context(self): """Test that parsing without document context works (fallback behavior).""" html_content = "

This is bold text.

" + # Get initial style count (should include default style) + initial_style_count = self.doc.get_style_registry().get_style_count() + # Parse without document context blocks = parse_html_string(html_content, self.base_font) @@ -500,12 +503,13 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase): self.assertEqual(len(blocks), 1) self.assertIsInstance(blocks[0], Paragraph) - # Should not affect document's font registry - self.assertEqual(len(self.doc._fonts), 0, - "Document font registry should remain empty") + # Should not affect document's style registry + final_style_count = self.doc.get_style_registry().get_style_count() + self.assertEqual(final_style_count, initial_style_count, + "Document style registry should remain unchanged") def test_complex_html_font_reuse(self): - """Test font reuse with complex HTML containing repeated styles.""" + """Test style reuse with complex HTML containing repeated styles.""" html_content = """

First Header

@@ -517,21 +521,21 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase): # Parse content blocks = parse_html_string(html_content, self.base_font, document=self.doc) - font_count_after_parse = len(self.doc._fonts) + style_count_after_parse = self.doc.get_style_registry().get_style_count() # Parse same content again blocks2 = parse_html_string(html_content, self.base_font, document=self.doc) - font_count_after_second_parse = len(self.doc._fonts) + style_count_after_second_parse = self.doc.get_style_registry().get_style_count() - # Font count should not increase on second parse - self.assertEqual(font_count_after_parse, font_count_after_second_parse, - "Fonts should be reused for repeated styles") + # Style count should not increase on second parse + self.assertEqual(style_count_after_parse, style_count_after_second_parse, + "Styles should be reused for repeated formatting") # Both should create same structure self.assertEqual(len(blocks), len(blocks2)) def test_font_registry_with_nested_styles(self): - """Test font registry with nested HTML styles.""" + """Test style registry with nested HTML styles.""" html_content = """

Text with bold and bold italic nested styles.

""" @@ -539,7 +543,7 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase): # Parse content blocks = parse_html_string(html_content, self.base_font, document=self.doc) - # Should create fonts for different style combinations + # Should create styles for different style combinations paragraph = blocks[0] words = list(paragraph.words()) @@ -551,9 +555,9 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase): self.assertGreater(len(bold_italic_words), 0, "Should have words with combined bold+italic style") - # Should have multiple fonts in registry for different combinations - self.assertGreater(len(self.doc._fonts), 1, - "Should create separate fonts for style combinations") + # Should have multiple styles in registry for different combinations + self.assertGreater(self.doc.get_style_registry().get_style_count(), 1, + "Should create separate styles for style combinations") if __name__ == '__main__': diff --git a/tests/test_position_system.py b/tests/test_position_system.py new file mode 100644 index 0000000..80faac9 --- /dev/null +++ b/tests/test_position_system.py @@ -0,0 +1,1097 @@ +""" +Comprehensive unit tests for the dual-location system architecture. + +Tests the abstract/concrete position system, position translation, +and position tracking components. +""" + +import unittest +import json +from unittest.mock import Mock, patch + +from pyWebLayout.abstract.document import Document, Book, Chapter +from pyWebLayout.abstract.block import Paragraph, Heading, BlockType +from pyWebLayout.abstract.inline import Word +from pyWebLayout.style import Font, Alignment +from pyWebLayout.style.abstract_style import AbstractStyle +from pyWebLayout.typesetting.abstract_position import ( + AbstractPosition, ConcretePosition, ElementType, PositionAnchor +) +from pyWebLayout.typesetting.position_translator import ( + PositionTranslator, StyleParameters, PositionTracker +) + + +class TestAbstractPosition(unittest.TestCase): + """Test cases for AbstractPosition class.""" + + def setUp(self): + """Set up test fixtures.""" + self.position = AbstractPosition( + document_id="test_doc", + chapter_index=1, + block_index=5, + element_index=2, + element_type=ElementType.PARAGRAPH, + word_index=10, + character_index=3 + ) + + def test_initialization(self): + """Test AbstractPosition initialization.""" + self.assertEqual(self.position.document_id, "test_doc") + self.assertEqual(self.position.chapter_index, 1) + self.assertEqual(self.position.block_index, 5) + self.assertEqual(self.position.element_index, 2) + self.assertEqual(self.position.element_type, ElementType.PARAGRAPH) + self.assertEqual(self.position.word_index, 10) + self.assertEqual(self.position.character_index, 3) + self.assertTrue(self.position.is_clean_boundary) + self.assertEqual(self.position.confidence, 1.0) + + def test_default_initialization(self): + """Test AbstractPosition with default values.""" + pos = AbstractPosition() + self.assertIsNone(pos.document_id) + self.assertIsNone(pos.chapter_index) + self.assertEqual(pos.block_index, 0) + self.assertEqual(pos.element_index, 0) + self.assertEqual(pos.element_type, ElementType.PARAGRAPH) + self.assertIsNone(pos.word_index) + self.assertIsNone(pos.character_index) + self.assertTrue(pos.is_clean_boundary) + self.assertEqual(pos.confidence, 1.0) + + def test_to_dict(self): + """Test serialization to dictionary.""" + data = self.position.to_dict() + expected = { + 'document_id': 'test_doc', + 'chapter_index': 1, + 'block_index': 5, + 'element_index': 2, + 'element_type': 'paragraph', + 'word_index': 10, + 'character_index': 3, + 'row_index': None, + 'cell_index': None, + 'list_item_index': None, + 'is_clean_boundary': True, + 'confidence': 1.0 + } + self.assertEqual(data, expected) + + def test_from_dict(self): + """Test deserialization from dictionary.""" + data = { + 'document_id': 'test_doc', + 'chapter_index': 2, + 'block_index': 3, + 'element_index': 1, + 'element_type': 'heading', + 'word_index': 5, + 'character_index': 2, + 'is_clean_boundary': False, + 'confidence': 0.8 + } + pos = AbstractPosition.from_dict(data) + self.assertEqual(pos.document_id, 'test_doc') + self.assertEqual(pos.chapter_index, 2) + self.assertEqual(pos.block_index, 3) + self.assertEqual(pos.element_index, 1) + self.assertEqual(pos.element_type, ElementType.HEADING) + self.assertEqual(pos.word_index, 5) + self.assertEqual(pos.character_index, 2) + self.assertFalse(pos.is_clean_boundary) + self.assertEqual(pos.confidence, 0.8) + + def test_bookmark_serialization(self): + """Test bookmark string serialization/deserialization.""" + bookmark = self.position.to_bookmark() + self.assertIsInstance(bookmark, str) + + # Should be valid JSON + data = json.loads(bookmark) + self.assertIsInstance(data, dict) + + # Should round-trip correctly + restored = AbstractPosition.from_bookmark(bookmark) + self.assertEqual(restored.document_id, self.position.document_id) + self.assertEqual(restored.chapter_index, self.position.chapter_index) + self.assertEqual(restored.block_index, self.position.block_index) + self.assertEqual(restored.word_index, self.position.word_index) + + def test_copy(self): + """Test position copying.""" + copy = self.position.copy() + self.assertEqual(copy.document_id, self.position.document_id) + self.assertEqual(copy.chapter_index, self.position.chapter_index) + self.assertEqual(copy.block_index, self.position.block_index) + self.assertEqual(copy.word_index, self.position.word_index) + + # Should be independent objects + copy.block_index = 999 + self.assertNotEqual(copy.block_index, self.position.block_index) + + def test_get_hash(self): + """Test position hashing.""" + hash1 = self.position.get_hash() + hash2 = self.position.get_hash() + self.assertEqual(hash1, hash2) # Should be consistent + + # Different positions should have different hashes + other = self.position.copy() + other.block_index = 999 + hash3 = other.get_hash() + self.assertNotEqual(hash1, hash3) + + def test_is_before(self): + """Test position comparison.""" + pos1 = AbstractPosition(chapter_index=1, block_index=5, element_index=2, word_index=10) + pos2 = AbstractPosition(chapter_index=1, block_index=5, element_index=2, word_index=15) + pos3 = AbstractPosition(chapter_index=1, block_index=6, element_index=0, word_index=0) + pos4 = AbstractPosition(chapter_index=2, block_index=0, element_index=0, word_index=0) + + # Same block, different words + self.assertTrue(pos1.is_before(pos2)) + self.assertFalse(pos2.is_before(pos1)) + + # Different blocks + self.assertTrue(pos1.is_before(pos3)) + self.assertFalse(pos3.is_before(pos1)) + + # Different chapters + self.assertTrue(pos1.is_before(pos4)) + self.assertFalse(pos4.is_before(pos1)) + + # Same position + self.assertFalse(pos1.is_before(pos1)) + + def test_is_before_with_characters(self): + """Test position comparison with character indices.""" + pos1 = AbstractPosition(block_index=1, word_index=5, character_index=2) + pos2 = AbstractPosition(block_index=1, word_index=5, character_index=8) + + self.assertTrue(pos1.is_before(pos2)) + self.assertFalse(pos2.is_before(pos1)) + + def test_is_before_with_tables(self): + """Test position comparison with table elements.""" + pos1 = AbstractPosition(block_index=1, element_type=ElementType.TABLE, row_index=2, cell_index=1) + pos2 = AbstractPosition(block_index=1, element_type=ElementType.TABLE, row_index=2, cell_index=3) + pos3 = AbstractPosition(block_index=1, element_type=ElementType.TABLE, row_index=3, cell_index=0) + + # Same row, different cells + self.assertTrue(pos1.is_before(pos2)) + self.assertFalse(pos2.is_before(pos1)) + + # Different rows + self.assertTrue(pos1.is_before(pos3)) + self.assertFalse(pos3.is_before(pos1)) + + def test_get_progress_simple_document(self): + """Test progress calculation for simple document.""" + # Create a simple document + doc = Document() + for i in range(10): + paragraph = Paragraph() + paragraph.text = f"Paragraph {i}" + doc.add_block(paragraph) + + # Test progress at different positions + pos_start = AbstractPosition(block_index=0) + pos_middle = AbstractPosition(block_index=5) + pos_end = AbstractPosition(block_index=9) + + self.assertAlmostEqual(pos_start.get_progress(doc), 0.0, places=2) + self.assertAlmostEqual(pos_middle.get_progress(doc), 0.5, places=2) + self.assertAlmostEqual(pos_end.get_progress(doc), 0.9, places=2) + + def test_get_progress_book(self): + """Test progress calculation for book with chapters.""" + # Create a book with chapters + book = Book() + for i in range(3): + chapter = Chapter(f"Chapter {i}") + for j in range(5): + paragraph = Paragraph() + paragraph.text = f"Chapter {i}, Paragraph {j}" + chapter.add_block(paragraph) + book.add_chapter(chapter) + + # Test progress at different positions + pos_start = AbstractPosition(chapter_index=0, block_index=0) + pos_middle = AbstractPosition(chapter_index=1, block_index=2) + pos_end = AbstractPosition(chapter_index=2, block_index=4) + + progress_start = pos_start.get_progress(book) + progress_middle = pos_middle.get_progress(book) + progress_end = pos_end.get_progress(book) + + self.assertGreater(progress_middle, progress_start) + self.assertGreater(progress_end, progress_middle) + self.assertLessEqual(progress_end, 1.0) + + def test_get_progress_empty_document(self): + """Test progress calculation for empty document.""" + doc = Document() + pos = AbstractPosition(block_index=0) + self.assertEqual(pos.get_progress(doc), 0.0) + + def test_get_progress_invalid_position(self): + """Test progress calculation for invalid position.""" + doc = Document() + paragraph = Paragraph() + paragraph.text = "Test paragraph" + doc.add_block(paragraph) + + # Position beyond document end + pos = AbstractPosition(block_index=999) + progress = pos.get_progress(doc) + self.assertGreaterEqual(progress, 0.0) + self.assertLessEqual(progress, 1.0) + + +class TestConcretePosition(unittest.TestCase): + """Test cases for ConcretePosition class.""" + + def setUp(self): + """Set up test fixtures.""" + self.position = ConcretePosition( + page_index=2, + viewport_x=100, + viewport_y=200, + line_index=5, + layout_hash="abc123", + pixel_offset=10 + ) + + def test_initialization(self): + """Test ConcretePosition initialization.""" + self.assertEqual(self.position.page_index, 2) + self.assertEqual(self.position.viewport_x, 100) + self.assertEqual(self.position.viewport_y, 200) + self.assertEqual(self.position.line_index, 5) + self.assertEqual(self.position.layout_hash, "abc123") + self.assertTrue(self.position.is_valid) + self.assertTrue(self.position.is_exact) + self.assertEqual(self.position.pixel_offset, 10) + + def test_default_initialization(self): + """Test ConcretePosition with default values.""" + pos = ConcretePosition() + self.assertEqual(pos.page_index, 0) + self.assertEqual(pos.viewport_x, 0) + self.assertEqual(pos.viewport_y, 0) + self.assertIsNone(pos.line_index) + self.assertIsNone(pos.layout_hash) + self.assertTrue(pos.is_valid) + self.assertTrue(pos.is_exact) + self.assertEqual(pos.pixel_offset, 0) + + def test_invalidate(self): + """Test position invalidation.""" + self.assertTrue(self.position.is_valid) + self.assertTrue(self.position.is_exact) + + self.position.invalidate() + + self.assertFalse(self.position.is_valid) + self.assertFalse(self.position.is_exact) + + def test_update_layout_hash(self): + """Test layout hash update.""" + self.position.invalidate() + self.assertFalse(self.position.is_valid) + + self.position.update_layout_hash("new_hash") + + self.assertTrue(self.position.is_valid) + self.assertEqual(self.position.layout_hash, "new_hash") + + def test_to_dict(self): + """Test serialization to dictionary.""" + data = self.position.to_dict() + expected = { + 'page_index': 2, + 'viewport_x': 100, + 'viewport_y': 200, + 'line_index': 5, + 'layout_hash': 'abc123', + 'is_valid': True, + 'is_exact': True, + 'pixel_offset': 10 + } + self.assertEqual(data, expected) + + def test_from_dict(self): + """Test deserialization from dictionary.""" + data = { + 'page_index': 3, + 'viewport_x': 150, + 'viewport_y': 250, + 'line_index': 7, + 'layout_hash': 'def456', + 'is_valid': False, + 'is_exact': False, + 'pixel_offset': 5 + } + pos = ConcretePosition.from_dict(data) + self.assertEqual(pos.page_index, 3) + self.assertEqual(pos.viewport_x, 150) + self.assertEqual(pos.viewport_y, 250) + self.assertEqual(pos.line_index, 7) + self.assertEqual(pos.layout_hash, 'def456') + self.assertFalse(pos.is_valid) + self.assertFalse(pos.is_exact) + self.assertEqual(pos.pixel_offset, 5) + + +class TestStyleParameters(unittest.TestCase): + """Test cases for StyleParameters class.""" + + def setUp(self): + """Set up test fixtures.""" + self.font = Font(font_size=16) + self.params = StyleParameters( + page_size=(800, 600), + margins=(20, 15, 25, 10), + default_font=self.font, + line_spacing=4, + paragraph_spacing=12, + alignment=Alignment.CENTER + ) + + def test_initialization(self): + """Test StyleParameters initialization.""" + self.assertEqual(self.params.page_size, (800, 600)) + self.assertEqual(self.params.margins, (20, 15, 25, 10)) + self.assertEqual(self.params.default_font, self.font) + self.assertEqual(self.params.line_spacing, 4) + self.assertEqual(self.params.paragraph_spacing, 12) + self.assertEqual(self.params.alignment, Alignment.CENTER) + + def test_default_initialization(self): + """Test StyleParameters with default values.""" + params = StyleParameters() + self.assertEqual(params.page_size, (800, 600)) + self.assertEqual(params.margins, (20, 20, 20, 20)) + self.assertIsInstance(params.default_font, Font) + self.assertEqual(params.line_spacing, 3) + self.assertEqual(params.paragraph_spacing, 10) + self.assertEqual(params.alignment, Alignment.LEFT) + + def test_get_hash(self): + """Test style parameters hashing.""" + hash1 = self.params.get_hash() + hash2 = self.params.get_hash() + self.assertEqual(hash1, hash2) # Should be consistent + + # Different parameters should have different hashes + other = StyleParameters(page_size=(900, 700)) + hash3 = other.get_hash() + self.assertNotEqual(hash1, hash3) + + def test_copy(self): + """Test style parameters copying.""" + copy = self.params.copy() + self.assertEqual(copy.page_size, self.params.page_size) + self.assertEqual(copy.margins, self.params.margins) + self.assertEqual(copy.line_spacing, self.params.line_spacing) + + # Should be independent objects + copy.line_spacing = 999 + self.assertNotEqual(copy.line_spacing, self.params.line_spacing) + + def test_hash_consistency_with_font_changes(self): + """Test that font changes affect hash.""" + hash1 = self.params.get_hash() + + # Change font size + self.params.default_font = Font(font_size=20) + hash2 = self.params.get_hash() + + self.assertNotEqual(hash1, hash2) + + +class TestPositionAnchor(unittest.TestCase): + """Test cases for PositionAnchor class.""" + + def setUp(self): + """Set up test fixtures.""" + self.primary_pos = AbstractPosition( + chapter_index=1, + block_index=5, + element_index=2, + word_index=10 + ) + self.anchor = PositionAnchor(self.primary_pos) + + def test_initialization(self): + """Test PositionAnchor initialization.""" + self.assertEqual(self.anchor.primary_position, self.primary_pos) + self.assertEqual(len(self.anchor.fallback_positions), 0) + self.assertIsNone(self.anchor.context_text) + self.assertEqual(self.anchor.document_progress, 0.0) + self.assertEqual(self.anchor.paragraph_progress, 0.0) + + def test_add_fallback(self): + """Test adding fallback positions.""" + fallback = AbstractPosition(block_index=5, element_index=0) + self.anchor.add_fallback(fallback) + + self.assertEqual(len(self.anchor.fallback_positions), 1) + self.assertEqual(self.anchor.fallback_positions[0], fallback) + + def test_set_context(self): + """Test setting context information.""" + context = "This is some context text" + doc_progress = 0.3 + para_progress = 0.7 + + self.anchor.set_context(context, doc_progress, para_progress) + + self.assertEqual(self.anchor.context_text, context) + self.assertEqual(self.anchor.document_progress, doc_progress) + self.assertEqual(self.anchor.paragraph_progress, para_progress) + + def test_get_best_position_primary_valid(self): + """Test getting best position when primary is valid.""" + # Create a document with enough content + doc = Document() + for i in range(10): + paragraph = Paragraph() + paragraph.text = f"Paragraph {i}" + doc.add_block(paragraph) + + best_pos = self.anchor.get_best_position(doc) + self.assertEqual(best_pos, self.primary_pos) + + def test_get_best_position_fallback(self): + """Test getting best position when primary is invalid.""" + # Create a small document where primary position is invalid + doc = Document() + paragraph = Paragraph() + paragraph.text = "Single paragraph" + doc.add_block(paragraph) + + # Add a valid fallback + fallback = AbstractPosition(block_index=0, element_index=0) + self.anchor.add_fallback(fallback) + + best_pos = self.anchor.get_best_position(doc) + self.assertEqual(best_pos, fallback) + + def test_get_best_position_approximate(self): + """Test getting approximate position when all positions are invalid.""" + # Create a document + doc = Document() + for i in range(3): + paragraph = Paragraph() + paragraph.text = f"Paragraph {i}" + doc.add_block(paragraph) + + # Set progress information + self.anchor.set_context("context", 0.5, 0.3) + + # All positions should be invalid (beyond document bounds) + best_pos = self.anchor.get_best_position(doc) + + # Should get an approximate position + self.assertIsInstance(best_pos, AbstractPosition) + self.assertLess(best_pos.confidence, 1.0) # Should be marked as approximate + + def test_serialization(self): + """Test PositionAnchor serialization.""" + # Set up anchor with fallbacks and context + fallback = AbstractPosition(block_index=3) + self.anchor.add_fallback(fallback) + self.anchor.set_context("test context", 0.4, 0.6) + + # Serialize + data = self.anchor.to_dict() + self.assertIn('primary_position', data) + self.assertIn('fallback_positions', data) + self.assertEqual(data['context_text'], "test context") + self.assertEqual(data['document_progress'], 0.4) + self.assertEqual(data['paragraph_progress'], 0.6) + + # Deserialize + restored = PositionAnchor.from_dict(data) + self.assertEqual(restored.primary_position.block_index, self.primary_pos.block_index) + self.assertEqual(len(restored.fallback_positions), 1) + self.assertEqual(restored.fallback_positions[0].block_index, 3) + self.assertEqual(restored.context_text, "test context") + self.assertEqual(restored.document_progress, 0.4) + self.assertEqual(restored.paragraph_progress, 0.6) + + +class TestPositionTranslator(unittest.TestCase): + """Test cases for PositionTranslator class.""" + + def setUp(self): + """Set up test fixtures.""" + # Create a simple document + self.doc = Document() + for i in range(5): + paragraph = Paragraph() + paragraph.text = f"This is paragraph {i} with some text content." + # Add some words to the paragraph + words = paragraph.text.split() + for word_text in words: + word = Word(word_text, Font()) + paragraph.add_word(word) + self.doc.add_block(paragraph) + + self.style_params = StyleParameters( + page_size=(800, 600), + margins=(20, 20, 20, 20), + default_font=Font(font_size=16) + ) + + self.translator = PositionTranslator(self.doc, self.style_params) + + def test_initialization(self): + """Test PositionTranslator initialization.""" + self.assertEqual(self.translator.document, self.doc) + self.assertEqual(self.translator.style_params, self.style_params) + self.assertEqual(len(self.translator._layout_cache), 0) + self.assertEqual(len(self.translator._position_cache), 0) + + def test_update_style_params(self): + """Test updating style parameters.""" + # Add something to caches + self.translator._layout_cache['test'] = 'data' + self.translator._position_cache['test'] = ConcretePosition() + + new_params = StyleParameters(page_size=(900, 700)) + self.translator.update_style_params(new_params) + + self.assertEqual(self.translator.style_params, new_params) + self.assertEqual(len(self.translator._layout_cache), 0) # Should be cleared + self.assertEqual(len(self.translator._position_cache), 0) # Should be cleared + + def test_abstract_to_concrete(self): + """Test converting abstract position to concrete.""" + abstract_pos = AbstractPosition( + block_index=2, + element_index=0, + word_index=3 + ) + + concrete_pos = self.translator.abstract_to_concrete(abstract_pos) + + self.assertIsInstance(concrete_pos, ConcretePosition) + self.assertGreaterEqual(concrete_pos.page_index, 0) + self.assertIsNotNone(concrete_pos.layout_hash) + self.assertTrue(concrete_pos.is_valid) + + def test_abstract_to_concrete_caching(self): + """Test that position translation is cached.""" + abstract_pos = AbstractPosition(block_index=1, word_index=5) + + # First call + concrete_pos1 = self.translator.abstract_to_concrete(abstract_pos) + + # Second call should return cached result + concrete_pos2 = self.translator.abstract_to_concrete(abstract_pos) + + self.assertEqual(concrete_pos1.page_index, concrete_pos2.page_index) + self.assertEqual(concrete_pos1.viewport_x, concrete_pos2.viewport_x) + self.assertEqual(concrete_pos1.viewport_y, concrete_pos2.viewport_y) + + def test_concrete_to_abstract(self): + """Test converting concrete position to abstract.""" + concrete_pos = ConcretePosition( + page_index=1, + viewport_x=100, + viewport_y=200 + ) + + abstract_pos = self.translator.concrete_to_abstract(concrete_pos) + + self.assertIsInstance(abstract_pos, AbstractPosition) + self.assertGreaterEqual(abstract_pos.block_index, 0) + + def test_find_clean_boundary(self): + """Test finding clean reading boundaries.""" + # Position in middle of word + pos_mid_word = AbstractPosition( + block_index=1, + word_index=3, + character_index=5 + ) + + clean_pos = self.translator.find_clean_boundary(pos_mid_word) + + self.assertIsInstance(clean_pos, AbstractPosition) + self.assertEqual(clean_pos.block_index, 1) + self.assertEqual(clean_pos.word_index, 3) + self.assertEqual(clean_pos.character_index, 0) # Should move to word start + self.assertTrue(clean_pos.is_clean_boundary) + + def test_find_clean_boundary_early_word(self): + """Test clean boundary for early words in paragraph.""" + # Position at second word + pos_early = AbstractPosition( + block_index=1, + element_type=ElementType.PARAGRAPH, + word_index=1, + character_index=0 + ) + + clean_pos = self.translator.find_clean_boundary(pos_early) + + # Should move to paragraph start for better reading experience + self.assertEqual(clean_pos.word_index, 0) + self.assertEqual(clean_pos.character_index, 0) + + def test_create_position_anchor(self): + """Test creating position anchor with fallbacks.""" + abstract_pos = AbstractPosition( + block_index=2, + element_index=0, + word_index=5, + character_index=2 + ) + + anchor = self.translator.create_position_anchor(abstract_pos) + + self.assertIsInstance(anchor, PositionAnchor) + self.assertEqual(anchor.primary_position, abstract_pos) + self.assertGreater(len(anchor.fallback_positions), 0) # Should have fallbacks + self.assertIsNotNone(anchor.context_text) # Should have context + + def test_create_position_anchor_with_context(self): + """Test position anchor context extraction.""" + abstract_pos = AbstractPosition( + block_index=1, + element_index=0, + word_index=3 + ) + + anchor = self.translator.create_position_anchor(abstract_pos, context_window=10) + + self.assertIsInstance(anchor.context_text, str) + self.assertGreater(len(anchor.context_text), 0) + self.assertGreaterEqual(anchor.document_progress, 0.0) + self.assertLessEqual(anchor.document_progress, 1.0) + + +class TestPositionTranslatorWithBook(unittest.TestCase): + """Test PositionTranslator with Book documents.""" + + def setUp(self): + """Set up test fixtures with a Book.""" + self.book = Book() + + # Create chapters with content + for i in range(3): + chapter = Chapter(f"Chapter {i}") + for j in range(4): + paragraph = Paragraph() + paragraph.text = f"Chapter {i}, paragraph {j} with content." + words = paragraph.text.split() + for word_text in words: + word = Word(word_text, Font()) + paragraph.add_word(word) + chapter.add_block(paragraph) + self.book.add_chapter(chapter) + + self.style_params = StyleParameters() + self.translator = PositionTranslator(self.book, self.style_params) + + def test_abstract_to_concrete_with_chapters(self): + """Test position translation with chapter structure.""" + abstract_pos = AbstractPosition( + chapter_index=1, + block_index=2, + element_index=0, + word_index=1 + ) + + concrete_pos = self.translator.abstract_to_concrete(abstract_pos) + + self.assertIsInstance(concrete_pos, ConcretePosition) + self.assertGreaterEqual(concrete_pos.page_index, 0) + + def test_position_anchor_with_chapters(self): + """Test position anchor creation with chapters.""" + abstract_pos = AbstractPosition( + chapter_index=1, + block_index=1, + word_index=2 + ) + + anchor = self.translator.create_position_anchor(abstract_pos) + + # Should have chapter-aware fallbacks + fallbacks = anchor.fallback_positions + self.assertGreater(len(fallbacks), 0) + + # Should have correct chapter context + for fallback in fallbacks: + if fallback.chapter_index is not None: + self.assertEqual(fallback.chapter_index, abstract_pos.chapter_index) + + +class TestPositionTracker(unittest.TestCase): + """Test cases for PositionTracker class.""" + + def setUp(self): + """Set up test fixtures.""" + # Create a document with content + self.doc = Document() + for i in range(5): + paragraph = Paragraph() + paragraph.text = f"This is paragraph {i} with some content." + words = paragraph.text.split() + for word_text in words: + word = Word(word_text, Font()) + paragraph.add_word(word) + self.doc.add_block(paragraph) + + self.style_params = StyleParameters() + self.tracker = PositionTracker(self.doc, self.style_params) + + def test_initialization(self): + """Test PositionTracker initialization.""" + self.assertEqual(self.tracker.document, self.doc) + self.assertIsInstance(self.tracker.translator, PositionTranslator) + self.assertIsNone(self.tracker.current_position) + self.assertEqual(len(self.tracker.reading_history), 0) + + def test_set_get_current_position(self): + """Test setting and getting current position.""" + pos = AbstractPosition(block_index=2, word_index=5) + + self.tracker.set_current_position(pos) + current = self.tracker.get_current_position() + + self.assertEqual(current, pos) + + def test_save_bookmark(self): + """Test saving current position as bookmark.""" + pos = AbstractPosition(block_index=3, word_index=7, character_index=2) + self.tracker.set_current_position(pos) + + bookmark = self.tracker.save_bookmark() + + self.assertIsInstance(bookmark, str) + self.assertGreater(len(bookmark), 0) + + # Should be valid JSON + data = json.loads(bookmark) + self.assertIsInstance(data, dict) + self.assertIn('primary_position', data) + + def test_save_bookmark_no_position(self): + """Test saving bookmark when no current position is set.""" + bookmark = self.tracker.save_bookmark() + self.assertEqual(bookmark, "") + + def test_load_bookmark(self): + """Test loading position from bookmark.""" + # Create and save a position + original_pos = AbstractPosition(block_index=2, word_index=4, character_index=1) + self.tracker.set_current_position(original_pos) + bookmark = self.tracker.save_bookmark() + + # Clear current position + self.tracker.set_current_position(None) + self.assertIsNone(self.tracker.get_current_position()) + + # Load from bookmark + success = self.tracker.load_bookmark(bookmark) + + self.assertTrue(success) + restored_pos = self.tracker.get_current_position() + self.assertIsNotNone(restored_pos) + self.assertEqual(restored_pos.block_index, original_pos.block_index) + self.assertEqual(restored_pos.word_index, original_pos.word_index) + + def test_load_invalid_bookmark(self): + """Test loading from invalid bookmark.""" + success = self.tracker.load_bookmark("invalid json") + self.assertFalse(success) + + success = self.tracker.load_bookmark('{"invalid": "structure"}') + self.assertFalse(success) + + def test_handle_style_change(self): + """Test handling style parameter changes.""" + # Set initial position + pos = AbstractPosition(block_index=1, word_index=3) + self.tracker.set_current_position(pos) + + # Change style parameters + new_params = StyleParameters( + page_size=(900, 700), + margins=(30, 30, 30, 30), + default_font=Font(font_size=20) + ) + + self.tracker.handle_style_change(new_params) + + # Position should still be set (abstract positions survive style changes) + current_pos = self.tracker.get_current_position() + self.assertIsNotNone(current_pos) + self.assertEqual(current_pos.block_index, pos.block_index) + self.assertEqual(current_pos.word_index, pos.word_index) + + # History should be updated + self.assertGreater(len(self.tracker.reading_history), 0) + + def test_get_concrete_position(self): + """Test getting concrete position.""" + pos = AbstractPosition(block_index=2, word_index=1) + self.tracker.set_current_position(pos) + + concrete_pos = self.tracker.get_concrete_position() + + self.assertIsInstance(concrete_pos, ConcretePosition) + self.assertGreaterEqual(concrete_pos.page_index, 0) + + def test_get_concrete_position_no_current(self): + """Test getting concrete position when no current position is set.""" + concrete_pos = self.tracker.get_concrete_position() + self.assertIsNone(concrete_pos) + + def test_set_position_from_concrete(self): + """Test setting position from concrete coordinates.""" + concrete_pos = ConcretePosition( + page_index=1, + viewport_x=100, + viewport_y=200 + ) + + self.tracker.set_position_from_concrete(concrete_pos) + + current_pos = self.tracker.get_current_position() + self.assertIsNotNone(current_pos) + self.assertIsInstance(current_pos, AbstractPosition) + self.assertTrue(current_pos.is_clean_boundary) # Should be cleaned + + def test_get_reading_progress(self): + """Test getting reading progress.""" + # No position set + progress = self.tracker.get_reading_progress() + self.assertEqual(progress, 0.0) + + # Set position in middle of document + pos = AbstractPosition(block_index=2) # Middle of 5 blocks + self.tracker.set_current_position(pos) + + progress = self.tracker.get_reading_progress() + self.assertGreater(progress, 0.0) + self.assertLess(progress, 1.0) + + # Set position at end + pos_end = AbstractPosition(block_index=4) + self.tracker.set_current_position(pos_end) + + progress_end = self.tracker.get_reading_progress() + self.assertGreater(progress_end, progress) + + +class TestPositionSystemIntegration(unittest.TestCase): + """Integration tests for the complete position system.""" + + def setUp(self): + """Set up test fixtures.""" + # Create a more complex document + self.book = Book() + + for i in range(3): + chapter = Chapter(f"Chapter {i+1}") + + # Add a heading + heading = Heading(f"Chapter {i+1} Title") + chapter.add_block(heading) + + # Add several paragraphs + for j in range(4): + paragraph = Paragraph() + paragraph.text = f"This is paragraph {j+1} of chapter {i+1}. " \ + f"It contains multiple words and sentences. " \ + f"This helps test the position system thoroughly." + + words = paragraph.text.split() + for word_text in words: + word = Word(word_text, Font()) + paragraph.add_word(word) + + chapter.add_block(paragraph) + + self.book.add_chapter(chapter) + + self.style_params = StyleParameters( + page_size=(800, 600), + margins=(30, 30, 30, 30), + default_font=Font(font_size=14) + ) + + self.tracker = PositionTracker(self.book, self.style_params) + + def test_complete_workflow(self): + """Test a complete reading workflow.""" + # Start at beginning + start_pos = AbstractPosition( + chapter_index=0, + block_index=1, # Skip heading, start at first paragraph + element_index=0, + word_index=0 + ) + + self.tracker.set_current_position(start_pos) + + # Save bookmark + bookmark1 = self.tracker.save_bookmark() + self.assertGreater(len(bookmark1), 0) + + # Move to middle of book + middle_pos = AbstractPosition( + chapter_index=1, + block_index=2, + word_index=10, + character_index=5 + ) + + self.tracker.set_current_position(middle_pos) + + # Change style (font size increase) + new_style = self.style_params.copy() + new_style.default_font = Font(font_size=18) + self.tracker.handle_style_change(new_style) + + # Position should survive style change + current_pos = self.tracker.get_current_position() + self.assertIsNotNone(current_pos) + self.assertEqual(current_pos.chapter_index, middle_pos.chapter_index) + self.assertEqual(current_pos.block_index, middle_pos.block_index) + + # Get concrete position + concrete_pos = self.tracker.get_concrete_position() + self.assertIsInstance(concrete_pos, ConcretePosition) + + # Test progress calculation + progress = self.tracker.get_reading_progress() + self.assertGreater(progress, 0.0) + self.assertLess(progress, 1.0) + + # Restore from original bookmark + success = self.tracker.load_bookmark(bookmark1) + self.assertTrue(success) + + restored_pos = self.tracker.get_current_position() + self.assertEqual(restored_pos.chapter_index, start_pos.chapter_index) + self.assertEqual(restored_pos.block_index, start_pos.block_index) + + def test_position_comparison_across_chapters(self): + """Test position comparison across different chapters.""" + pos1 = AbstractPosition(chapter_index=0, block_index=1, word_index=5) + pos2 = AbstractPosition(chapter_index=1, block_index=0, word_index=0) + pos3 = AbstractPosition(chapter_index=2, block_index=3, word_index=10) + + # Test ordering + self.assertTrue(pos1.is_before(pos2)) + self.assertTrue(pos2.is_before(pos3)) + self.assertTrue(pos1.is_before(pos3)) + + # Test progress calculation + progress1 = pos1.get_progress(self.book) + progress2 = pos2.get_progress(self.book) + progress3 = pos3.get_progress(self.book) + + self.assertLess(progress1, progress2) + self.assertLess(progress2, progress3) + + def test_clean_boundary_with_complex_content(self): + """Test clean boundary detection with complex content.""" + translator = PositionTranslator(self.book, self.style_params) + + # Position in middle of word + messy_pos = AbstractPosition( + chapter_index=1, + block_index=2, + word_index=5, + character_index=3 + ) + + clean_pos = translator.find_clean_boundary(messy_pos) + + self.assertEqual(clean_pos.chapter_index, messy_pos.chapter_index) + self.assertEqual(clean_pos.block_index, messy_pos.block_index) + self.assertEqual(clean_pos.word_index, messy_pos.word_index) + self.assertEqual(clean_pos.character_index, 0) # Should move to word start + self.assertTrue(clean_pos.is_clean_boundary) + + def test_position_anchor_robustness(self): + """Test position anchor robustness with document changes.""" + translator = PositionTranslator(self.book, self.style_params) + + # Create position anchor + original_pos = AbstractPosition( + chapter_index=1, + block_index=2, + word_index=8 + ) + + anchor = translator.create_position_anchor(original_pos) + + # Should have multiple fallback positions + self.assertGreater(len(anchor.fallback_positions), 0) + + # Should have context text + self.assertIsNotNone(anchor.context_text) + self.assertGreater(len(anchor.context_text), 0) + + # Should have progress information + self.assertGreater(anchor.document_progress, 0.0) + self.assertLess(anchor.document_progress, 1.0) + + # Test with modified document (simulate content change) + modified_book = Book() + chapter = Chapter("Modified Chapter") + paragraph = Paragraph() + paragraph.text = "This is a modified paragraph." + chapter.add_block(paragraph) + modified_book.add_chapter(chapter) + + # Should get approximate position + best_pos = anchor.get_best_position(modified_book) + self.assertIsInstance(best_pos, AbstractPosition) + + def test_style_parameter_impact(self): + """Test how style parameter changes affect position system.""" + translator = PositionTranslator(self.book, self.style_params) + + abstract_pos = AbstractPosition(chapter_index=1, block_index=2, word_index=3) + + # Get concrete position with original style + concrete_pos1 = translator.abstract_to_concrete(abstract_pos) + hash1 = concrete_pos1.layout_hash + + # Change style parameters + new_style = StyleParameters( + page_size=(1000, 800), # Larger page + margins=(40, 40, 40, 40), # Larger margins + default_font=Font(font_size=20) # Larger font + ) + + translator.update_style_params(new_style) + + # Get concrete position with new style + concrete_pos2 = translator.abstract_to_concrete(abstract_pos) + hash2 = concrete_pos2.layout_hash + + # Layout hashes should be different + self.assertNotEqual(hash1, hash2) + + # Concrete positions should be different + # (same abstract position, different concrete position due to style change) + self.assertNotEqual(concrete_pos1.viewport_y, concrete_pos2.viewport_y) + + +if __name__ == '__main__': + unittest.main()