This commit is contained in:
parent
36281be77a
commit
65ab46556f
371
RECURSIVE_POSITION_SYSTEM.md
Normal file
371
RECURSIVE_POSITION_SYSTEM.md
Normal file
@ -0,0 +1,371 @@
|
|||||||
|
# Recursive Position System
|
||||||
|
|
||||||
|
A flexible, hierarchical position tracking system for dynamic content positioning in document layout applications.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The Recursive Position System provides a powerful way to track positions within complex, nested document structures. Unlike traditional flat position systems that only track basic coordinates, this system can reference any type of content (words, images, table cells, list items, etc.) with full hierarchical context.
|
||||||
|
|
||||||
|
## Key Features
|
||||||
|
|
||||||
|
- **Hierarchical Position Tracking**: Navigate through nested document structures with precision
|
||||||
|
- **Dynamic Content Type Support**: Handle words, images, tables, lists, forms, and more
|
||||||
|
- **Flexible Serialization**: Save positions as JSON or Python shelf objects
|
||||||
|
- **Position Relationships**: Query ancestor/descendant relationships between positions
|
||||||
|
- **Fluent Builder Pattern**: Easy position creation with method chaining
|
||||||
|
- **Metadata Support**: Store rendering context (font scale, themes, etc.)
|
||||||
|
- **Real-world Applications**: Perfect for ereaders, document editors, and CMS systems
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Core Components
|
||||||
|
|
||||||
|
1. **ContentType Enum**: Defines all supported content types
|
||||||
|
2. **LocationNode**: Represents a single position within a content type
|
||||||
|
3. **RecursivePosition**: Hierarchical position with a path of LocationNodes
|
||||||
|
4. **PositionBuilder**: Fluent interface for creating positions
|
||||||
|
5. **PositionStorage**: Persistent storage with JSON and shelf support
|
||||||
|
|
||||||
|
### Position Hierarchy
|
||||||
|
|
||||||
|
Positions are represented as paths from document root to specific locations:
|
||||||
|
|
||||||
|
```
|
||||||
|
Document → Chapter[2] → Block[5] → Paragraph → Word[12] → Character[3]
|
||||||
|
Document → Chapter[1] → Block[3] → Table → Row[2] → Cell[1] → Word[0]
|
||||||
|
Document → Chapter[0] → Block[1] → Image
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### Basic Position Creation
|
||||||
|
|
||||||
|
```python
|
||||||
|
from pyWebLayout.layout.recursive_position import PositionBuilder
|
||||||
|
|
||||||
|
# Create a word position with character-level precision
|
||||||
|
position = (PositionBuilder()
|
||||||
|
.chapter(2)
|
||||||
|
.block(5)
|
||||||
|
.paragraph()
|
||||||
|
.word(12, offset=3)
|
||||||
|
.with_rendering_metadata(font_scale=1.5, theme="dark")
|
||||||
|
.build())
|
||||||
|
|
||||||
|
print(position) # document[0] -> chapter[2] -> block[5] -> paragraph[0] -> word[12]+3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Different Content Types
|
||||||
|
|
||||||
|
```python
|
||||||
|
from pyWebLayout.layout.recursive_position import (
|
||||||
|
create_word_position, create_image_position,
|
||||||
|
create_table_cell_position, create_list_item_position
|
||||||
|
)
|
||||||
|
|
||||||
|
# Word in a paragraph
|
||||||
|
word_pos = create_word_position(chapter=1, block=3, word=15, char_offset=2)
|
||||||
|
|
||||||
|
# Image in a block
|
||||||
|
image_pos = create_image_position(chapter=2, block=1, image_index=0)
|
||||||
|
|
||||||
|
# Cell in a table
|
||||||
|
table_pos = create_table_cell_position(chapter=0, block=4, row=2, col=1, word=5)
|
||||||
|
|
||||||
|
# Item in a list
|
||||||
|
list_pos = create_list_item_position(chapter=1, block=2, item=3, word=0)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Complex Nested Structures
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Position in a nested list
|
||||||
|
nested_pos = (PositionBuilder()
|
||||||
|
.chapter(2)
|
||||||
|
.block(5)
|
||||||
|
.list(0, list_type="ordered")
|
||||||
|
.list_item(2)
|
||||||
|
.list(1, list_type="unordered") # Nested list
|
||||||
|
.list_item(1)
|
||||||
|
.word(3)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Position in a table cell with metadata
|
||||||
|
table_pos = (PositionBuilder()
|
||||||
|
.chapter(3)
|
||||||
|
.block(10)
|
||||||
|
.table(0, table_type="financial", columns=5)
|
||||||
|
.table_row(2, row_type="data")
|
||||||
|
.table_cell(1, cell_type="currency", format="USD")
|
||||||
|
.word(0, text="$1,234.56")
|
||||||
|
.build())
|
||||||
|
```
|
||||||
|
|
||||||
|
### Position Relationships
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Check ancestor/descendant relationships
|
||||||
|
chapter_pos = PositionBuilder().chapter(1).block(2).build()
|
||||||
|
word_pos = PositionBuilder().chapter(1).block(2).paragraph().word(5).build()
|
||||||
|
|
||||||
|
print(chapter_pos.is_ancestor_of(word_pos)) # True
|
||||||
|
print(word_pos.is_descendant_of(chapter_pos)) # True
|
||||||
|
|
||||||
|
# Find common ancestors
|
||||||
|
other_pos = create_word_position(1, 3, 0) # Different block
|
||||||
|
common = word_pos.get_common_ancestor(other_pos)
|
||||||
|
print(common) # document[0] -> chapter[1]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Serialization and Storage
|
||||||
|
|
||||||
|
```python
|
||||||
|
from pyWebLayout.layout.recursive_position import PositionStorage
|
||||||
|
|
||||||
|
# JSON storage
|
||||||
|
storage = PositionStorage("bookmarks", use_shelf=False)
|
||||||
|
|
||||||
|
# Save positions
|
||||||
|
storage.save_position("my_document", "bookmark1", position)
|
||||||
|
storage.save_position("my_document", "bookmark2", other_position)
|
||||||
|
|
||||||
|
# Load positions
|
||||||
|
loaded = storage.load_position("my_document", "bookmark1")
|
||||||
|
all_bookmarks = storage.list_positions("my_document")
|
||||||
|
|
||||||
|
# Shelf storage (binary, more efficient for large datasets)
|
||||||
|
shelf_storage = PositionStorage("bookmarks", use_shelf=True)
|
||||||
|
shelf_storage.save_position("my_document", "bookmark1", position)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Content Types
|
||||||
|
|
||||||
|
The system supports the following content types:
|
||||||
|
|
||||||
|
| Type | Description | Example Usage |
|
||||||
|
|------|-------------|---------------|
|
||||||
|
| `DOCUMENT` | Document root | Always present as root node |
|
||||||
|
| `CHAPTER` | Document chapters/sections | Chapter navigation |
|
||||||
|
| `BLOCK` | Block-level elements | Paragraphs, headings, tables |
|
||||||
|
| `PARAGRAPH` | Text paragraphs | Text content |
|
||||||
|
| `HEADING` | Section headings | H1-H6 elements |
|
||||||
|
| `TABLE` | Table structures | Data tables |
|
||||||
|
| `TABLE_ROW` | Table rows | Row navigation |
|
||||||
|
| `TABLE_CELL` | Table cells | Cell-specific content |
|
||||||
|
| `LIST` | List structures | Ordered/unordered lists |
|
||||||
|
| `LIST_ITEM` | List items | Individual list entries |
|
||||||
|
| `WORD` | Individual words | Word-level precision |
|
||||||
|
| `IMAGE` | Images | Visual content |
|
||||||
|
| `LINK` | Hyperlinks | Interactive links |
|
||||||
|
| `BUTTON` | Interactive buttons | Form controls |
|
||||||
|
| `FORM_FIELD` | Form input fields | User input |
|
||||||
|
| `LINE` | Rendered text lines | Layout-specific |
|
||||||
|
| `PAGE` | Rendered pages | Pagination |
|
||||||
|
|
||||||
|
## Ereader Integration
|
||||||
|
|
||||||
|
The system is designed for ereader applications with features like:
|
||||||
|
|
||||||
|
### Bookmark Management
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Save reading position with context
|
||||||
|
reading_pos = (PositionBuilder()
|
||||||
|
.chapter(3)
|
||||||
|
.block(15)
|
||||||
|
.paragraph()
|
||||||
|
.word(23, offset=7)
|
||||||
|
.with_rendering_metadata(
|
||||||
|
font_scale=1.2,
|
||||||
|
page_size=[600, 800],
|
||||||
|
theme="sepia"
|
||||||
|
)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
storage.save_position("novel", "chapter3_climax", reading_pos)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Chapter Navigation
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Jump to chapter start
|
||||||
|
chapter_start = PositionBuilder().chapter(5).block(0).paragraph().word(0).build()
|
||||||
|
|
||||||
|
# Navigate within chapter
|
||||||
|
current_pos = PositionBuilder().chapter(5).block(12).paragraph().word(45).build()
|
||||||
|
|
||||||
|
# Check if positions are in same chapter
|
||||||
|
same_chapter = chapter_start.get_common_ancestor(current_pos)
|
||||||
|
chapter_node = same_chapter.get_node(ContentType.CHAPTER)
|
||||||
|
print(f"Both in chapter {chapter_node.index}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Font Scaling Support
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Position with rendering metadata
|
||||||
|
position = (PositionBuilder()
|
||||||
|
.chapter(2)
|
||||||
|
.block(8)
|
||||||
|
.paragraph()
|
||||||
|
.word(15)
|
||||||
|
.with_rendering_metadata(
|
||||||
|
font_scale=1.5,
|
||||||
|
page_size=[800, 600],
|
||||||
|
line_height=24,
|
||||||
|
theme="dark"
|
||||||
|
)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Metadata persists through serialization
|
||||||
|
json_str = position.to_json()
|
||||||
|
restored = RecursivePosition.from_json(json_str)
|
||||||
|
print(restored.rendering_metadata["font_scale"]) # 1.5
|
||||||
|
```
|
||||||
|
|
||||||
|
## Advanced Features
|
||||||
|
|
||||||
|
### Position Navigation
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Truncate position to specific level
|
||||||
|
word_pos = create_word_position(2, 5, 12, 3)
|
||||||
|
block_pos = word_pos.copy().truncate_to_type(ContentType.BLOCK)
|
||||||
|
print(block_pos) # document[0] -> chapter[2] -> block[5]
|
||||||
|
|
||||||
|
# Navigate between related positions
|
||||||
|
table_cell_pos = create_table_cell_position(1, 3, 2, 1, 0)
|
||||||
|
next_cell_pos = table_cell_pos.copy()
|
||||||
|
cell_node = next_cell_pos.get_node(ContentType.TABLE_CELL)
|
||||||
|
cell_node.index = 2 # Move to next column
|
||||||
|
```
|
||||||
|
|
||||||
|
### Metadata Usage
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Rich metadata support
|
||||||
|
position = (PositionBuilder()
|
||||||
|
.chapter(1)
|
||||||
|
.block(5)
|
||||||
|
.table(0,
|
||||||
|
table_type="financial",
|
||||||
|
columns=5,
|
||||||
|
rows=20,
|
||||||
|
title="Q3 Results")
|
||||||
|
.table_row(3,
|
||||||
|
row_type="data",
|
||||||
|
category="revenue")
|
||||||
|
.table_cell(2,
|
||||||
|
cell_type="currency",
|
||||||
|
format="USD",
|
||||||
|
precision=2)
|
||||||
|
.word(0, text="$1,234,567.89")
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Access metadata
|
||||||
|
table_node = position.get_node(ContentType.TABLE)
|
||||||
|
print(table_node.metadata["title"]) # "Q3 Results"
|
||||||
|
|
||||||
|
cell_node = position.get_node(ContentType.TABLE_CELL)
|
||||||
|
print(cell_node.metadata["format"]) # "USD"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Considerations
|
||||||
|
|
||||||
|
### Memory Usage
|
||||||
|
|
||||||
|
- Positions are lightweight (typically < 1KB serialized)
|
||||||
|
- Path-based structure minimizes memory overhead
|
||||||
|
- Metadata is optional and only stored when needed
|
||||||
|
|
||||||
|
### Serialization Performance
|
||||||
|
|
||||||
|
- **JSON**: Human-readable, cross-platform, ~2-3x larger
|
||||||
|
- **Shelf**: Binary format, faster for large datasets, Python-specific
|
||||||
|
|
||||||
|
### Comparison Operations
|
||||||
|
|
||||||
|
- Position equality: O(n) where n is path depth
|
||||||
|
- Ancestor/descendant checks: O(min(depth1, depth2))
|
||||||
|
- Common ancestor finding: O(min(depth1, depth2))
|
||||||
|
|
||||||
|
## Integration with Existing Systems
|
||||||
|
|
||||||
|
### Backward Compatibility
|
||||||
|
|
||||||
|
The system can coexist with existing position tracking:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Convert from old RenderingPosition
|
||||||
|
def convert_old_position(old_pos):
|
||||||
|
return (PositionBuilder()
|
||||||
|
.chapter(old_pos.chapter_index)
|
||||||
|
.block(old_pos.block_index)
|
||||||
|
.paragraph()
|
||||||
|
.word(old_pos.word_index)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Convert to old format (lossy)
|
||||||
|
def convert_to_old(recursive_pos):
|
||||||
|
chapter_node = recursive_pos.get_node(ContentType.CHAPTER)
|
||||||
|
block_node = recursive_pos.get_node(ContentType.BLOCK)
|
||||||
|
word_node = recursive_pos.get_node(ContentType.WORD)
|
||||||
|
|
||||||
|
return RenderingPosition(
|
||||||
|
chapter_index=chapter_node.index if chapter_node else 0,
|
||||||
|
block_index=block_node.index if block_node else 0,
|
||||||
|
word_index=word_node.index if word_node else 0
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Migration Strategy
|
||||||
|
|
||||||
|
1. **Phase 1**: Implement recursive system alongside existing system
|
||||||
|
2. **Phase 2**: Update bookmark storage to use new format
|
||||||
|
3. **Phase 3**: Migrate existing bookmarks
|
||||||
|
4. **Phase 4**: Update layout engines to generate recursive positions
|
||||||
|
5. **Phase 5**: Remove old position system
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
Comprehensive test suite covers:
|
||||||
|
|
||||||
|
- Position creation and manipulation
|
||||||
|
- Serialization/deserialization
|
||||||
|
- Storage systems (JSON and shelf)
|
||||||
|
- Position relationships
|
||||||
|
- Real-world scenarios
|
||||||
|
- Performance benchmarks
|
||||||
|
|
||||||
|
Run tests with:
|
||||||
|
```bash
|
||||||
|
python -m pytest tests/layout/test_recursive_position.py -v
|
||||||
|
```
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
See `examples/recursive_position_demo.py` for a complete demonstration of all features.
|
||||||
|
|
||||||
|
## Future Enhancements
|
||||||
|
|
||||||
|
Potential improvements:
|
||||||
|
|
||||||
|
1. **Position Comparison**: Implement `<`, `>`, `<=`, `>=` operators for sorting
|
||||||
|
2. **Path Compression**: Optimize storage for deep hierarchies
|
||||||
|
3. **Query Language**: SQL-like queries for position sets
|
||||||
|
4. **Indexing**: B-tree indexing for large position collections
|
||||||
|
5. **Diff Operations**: Calculate differences between positions
|
||||||
|
6. **Batch Operations**: Efficient bulk position updates
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
The Recursive Position System provides a robust, flexible foundation for position tracking in complex document structures. Its hierarchical approach, rich metadata support, and efficient serialization make it ideal for modern ereader applications and document management systems.
|
||||||
|
|
||||||
|
The system's design prioritizes:
|
||||||
|
- **Flexibility**: Handle any content type or nesting level
|
||||||
|
- **Performance**: Efficient operations and minimal memory usage
|
||||||
|
- **Usability**: Intuitive builder pattern and clear APIs
|
||||||
|
- **Persistence**: Reliable serialization and storage options
|
||||||
|
- **Extensibility**: Easy to add new content types and features
|
||||||
|
|
||||||
|
This makes it a significant improvement over traditional flat position systems and provides a solid foundation for advanced document navigation features.
|
||||||
74
debug_text_positioning.py
Normal file
74
debug_text_positioning.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Debug script to test text positioning in the line breaking system
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
|
# Add pyWebLayout to path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent))
|
||||||
|
|
||||||
|
from pyWebLayout.style import Font
|
||||||
|
from pyWebLayout.concrete.text import Text, Line
|
||||||
|
from pyWebLayout.style.layout import Alignment
|
||||||
|
|
||||||
|
def test_simple_text_rendering():
|
||||||
|
"""Test basic text rendering to debug positioning issues"""
|
||||||
|
|
||||||
|
# Create a simple image
|
||||||
|
width, height = 300, 200
|
||||||
|
image = Image.new('RGB', (width, height), 'white')
|
||||||
|
draw = ImageDraw.Draw(image)
|
||||||
|
|
||||||
|
# Draw a border for reference
|
||||||
|
draw.rectangle([0, 0, width-1, height-1], outline=(200, 200, 200), width=2)
|
||||||
|
|
||||||
|
# Create a font
|
||||||
|
font = Font(font_size=12)
|
||||||
|
|
||||||
|
# Test 1: Direct PIL text rendering
|
||||||
|
print("Test 1: Direct PIL text rendering")
|
||||||
|
draw.text((10, 30), "Direct PIL text", font=font.font, fill=(0, 0, 0))
|
||||||
|
|
||||||
|
# Test 2: Using our Text class
|
||||||
|
print("Test 2: Using Text class")
|
||||||
|
text_obj = Text("Text class rendering", font, draw)
|
||||||
|
text_obj.set_origin([10, 60]) # Set position
|
||||||
|
print(f"Text origin: {text_obj.origin}")
|
||||||
|
text_obj.render()
|
||||||
|
|
||||||
|
# Test 3: Using Line class
|
||||||
|
print("Test 3: Using Line class")
|
||||||
|
line = Line(
|
||||||
|
spacing=(2, 6),
|
||||||
|
origin=(10, 100),
|
||||||
|
size=(280, 20),
|
||||||
|
draw=draw,
|
||||||
|
font=font,
|
||||||
|
halign=Alignment.LEFT
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a simple word to add to the line
|
||||||
|
from pyWebLayout.abstract.inline import Word
|
||||||
|
word = Word("Line class rendering", font)
|
||||||
|
|
||||||
|
success, overflow = line.add_word(word)
|
||||||
|
print(f"Word added successfully: {success}")
|
||||||
|
print(f"Line origin: {line.origin}")
|
||||||
|
print(f"Line baseline: {line._baseline}")
|
||||||
|
print(f"Text objects in line: {len(line.text_objects)}")
|
||||||
|
|
||||||
|
if line.text_objects:
|
||||||
|
for i, text in enumerate(line.text_objects):
|
||||||
|
print(f" Text {i}: '{text.text}' at origin {text.origin}")
|
||||||
|
|
||||||
|
line.render()
|
||||||
|
|
||||||
|
# Save the debug image
|
||||||
|
image.save("debug_text_positioning.png")
|
||||||
|
print("Debug image saved as debug_text_positioning.png")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_simple_text_rendering()
|
||||||
42
ereader_bookmarks/my_novel_chapter_1_start.json
Normal file
42
ereader_bookmarks/my_novel_chapter_1_start.json
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
{
|
||||||
|
"path": [
|
||||||
|
{
|
||||||
|
"content_type": "document",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "chapter",
|
||||||
|
"index": 1,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "block",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "word",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rendering_metadata": {
|
||||||
|
"font_scale": 1.0,
|
||||||
|
"page_size": [
|
||||||
|
600,
|
||||||
|
800
|
||||||
|
],
|
||||||
|
"theme": "light"
|
||||||
|
}
|
||||||
|
}
|
||||||
42
ereader_bookmarks/my_novel_current_position.json
Normal file
42
ereader_bookmarks/my_novel_current_position.json
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
{
|
||||||
|
"path": [
|
||||||
|
{
|
||||||
|
"content_type": "document",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "chapter",
|
||||||
|
"index": 5,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "block",
|
||||||
|
"index": 12,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "word",
|
||||||
|
"index": 23,
|
||||||
|
"offset": 7,
|
||||||
|
"metadata": {}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rendering_metadata": {
|
||||||
|
"font_scale": 1.3,
|
||||||
|
"page_size": [
|
||||||
|
600,
|
||||||
|
800
|
||||||
|
],
|
||||||
|
"theme": "dark"
|
||||||
|
}
|
||||||
|
}
|
||||||
42
ereader_bookmarks/my_novel_interesting_quote.json
Normal file
42
ereader_bookmarks/my_novel_interesting_quote.json
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
{
|
||||||
|
"path": [
|
||||||
|
{
|
||||||
|
"content_type": "document",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "chapter",
|
||||||
|
"index": 2,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "block",
|
||||||
|
"index": 15,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "word",
|
||||||
|
"index": 8,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rendering_metadata": {
|
||||||
|
"font_scale": 1.2,
|
||||||
|
"page_size": [
|
||||||
|
600,
|
||||||
|
800
|
||||||
|
],
|
||||||
|
"theme": "sepia"
|
||||||
|
}
|
||||||
|
}
|
||||||
39
ereader_bookmarks/my_novel_logo_image.json
Normal file
39
ereader_bookmarks/my_novel_logo_image.json
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
{
|
||||||
|
"path": [
|
||||||
|
{
|
||||||
|
"content_type": "document",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "chapter",
|
||||||
|
"index": 4,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "block",
|
||||||
|
"index": 8,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "image",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {
|
||||||
|
"alt_text": "Company Logo",
|
||||||
|
"caption": "Figure 4.1: Corporate Identity"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rendering_metadata": {
|
||||||
|
"font_scale": 1.0,
|
||||||
|
"page_size": [
|
||||||
|
600,
|
||||||
|
800
|
||||||
|
],
|
||||||
|
"theme": "light"
|
||||||
|
}
|
||||||
|
}
|
||||||
56
ereader_bookmarks/my_novel_sales_table.json
Normal file
56
ereader_bookmarks/my_novel_sales_table.json
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
{
|
||||||
|
"path": [
|
||||||
|
{
|
||||||
|
"content_type": "document",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "chapter",
|
||||||
|
"index": 3,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "block",
|
||||||
|
"index": 22,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "table",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {
|
||||||
|
"table_type": "data",
|
||||||
|
"title": "Sales Figures"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "table_row",
|
||||||
|
"index": 1,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {
|
||||||
|
"row_type": "header"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "table_cell",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {
|
||||||
|
"cell_type": "header",
|
||||||
|
"text": "Quarter"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rendering_metadata": {
|
||||||
|
"font_scale": 1.1,
|
||||||
|
"page_size": [
|
||||||
|
600,
|
||||||
|
800
|
||||||
|
],
|
||||||
|
"theme": "dark"
|
||||||
|
}
|
||||||
|
}
|
||||||
201
examples/README_HTML_MULTIPAGE.md
Normal file
201
examples/README_HTML_MULTIPAGE.md
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
# HTML Multi-Page Rendering Examples
|
||||||
|
|
||||||
|
This directory contains working examples that demonstrate how to render HTML content across multiple pages using the pyWebLayout system. The examples show the complete pipeline from HTML parsing to multi-page layout.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The pyWebLayout system provides a sophisticated HTML-to-multi-page rendering pipeline that:
|
||||||
|
|
||||||
|
1. **Parses HTML** using the `pyWebLayout.io.readers.html_extraction` module
|
||||||
|
2. **Converts to abstract blocks** (paragraphs, headings, lists, etc.)
|
||||||
|
3. **Layouts content across pages** using the `pyWebLayout.layout.document_layouter`
|
||||||
|
4. **Renders pages as images** for visualization
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### 1. `html_multipage_simple.py` - Basic Example
|
||||||
|
|
||||||
|
A simple demonstration that shows the core functionality:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python examples/html_multipage_simple.py
|
||||||
|
```
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Parses basic HTML with headings and paragraphs
|
||||||
|
- Uses 600x800 pixel pages
|
||||||
|
- Demonstrates single-page layout
|
||||||
|
- Outputs to `output/html_simple/`
|
||||||
|
|
||||||
|
**Results:**
|
||||||
|
- Parsed 11 paragraphs from HTML
|
||||||
|
- Rendered 1 page with 20 lines
|
||||||
|
- Created `page_001.png` (19KB)
|
||||||
|
|
||||||
|
### 2. `html_multipage_demo_final.py` - Complete Multi-Page Demo
|
||||||
|
|
||||||
|
A comprehensive demonstration with true multi-page functionality:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python examples/html_multipage_demo_final.py
|
||||||
|
```
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Longer HTML document with multiple chapters
|
||||||
|
- Smaller pages (400x500 pixels) to force multi-page layout
|
||||||
|
- Enhanced page formatting with headers and footers
|
||||||
|
- Smart heading placement (avoids orphaned headings)
|
||||||
|
- Outputs to `output/html_multipage_final/`
|
||||||
|
|
||||||
|
**Results:**
|
||||||
|
- Parsed 22 paragraphs (6 headings, 16 regular paragraphs)
|
||||||
|
- Rendered 7 pages with 67 total lines
|
||||||
|
- Average 9.6 lines per page
|
||||||
|
- Created 7 PNG files (4.9KB - 10KB each)
|
||||||
|
|
||||||
|
## Technical Details
|
||||||
|
|
||||||
|
### HTML Parsing
|
||||||
|
|
||||||
|
The system uses BeautifulSoup to parse HTML and converts elements to pyWebLayout abstract blocks:
|
||||||
|
|
||||||
|
- `<h1>-<h6>` → `Heading` blocks
|
||||||
|
- `<p>` → `Paragraph` blocks
|
||||||
|
- `<ul>`, `<ol>`, `<li>` → `HList` and `ListItem` blocks
|
||||||
|
- `<blockquote>` → `Quote` blocks
|
||||||
|
- Inline elements (`<strong>`, `<em>`, etc.) → Styled words
|
||||||
|
|
||||||
|
### Layout Engine
|
||||||
|
|
||||||
|
The document layouter handles:
|
||||||
|
|
||||||
|
- **Word spacing constraints** - Configurable min/max spacing
|
||||||
|
- **Line breaking** - Automatic word wrapping
|
||||||
|
- **Page overflow** - Continues content on new pages
|
||||||
|
- **Font scaling** - Proportional scaling support
|
||||||
|
- **Position tracking** - Maintains document positions
|
||||||
|
|
||||||
|
### Page Rendering
|
||||||
|
|
||||||
|
Pages are rendered as PIL Images with:
|
||||||
|
|
||||||
|
- **Configurable page sizes** - Width x Height in pixels
|
||||||
|
- **Borders and margins** - Professional page appearance
|
||||||
|
- **Headers and footers** - Document title and page numbers
|
||||||
|
- **Font rendering** - Uses system fonts (DejaVu Sans fallback)
|
||||||
|
|
||||||
|
## Code Structure
|
||||||
|
|
||||||
|
### Key Classes
|
||||||
|
|
||||||
|
1. **SimplePage/MultiPage** - Page implementation with drawing context
|
||||||
|
2. **SimpleWord** - Word implementation compatible with layouter
|
||||||
|
3. **SimpleParagraph** - Paragraph implementation with styling
|
||||||
|
4. **HTMLMultiPageRenderer** - Main renderer class
|
||||||
|
|
||||||
|
### Key Functions
|
||||||
|
|
||||||
|
1. **parse_html_to_paragraphs()** - Converts HTML to paragraph objects
|
||||||
|
2. **render_pages()** - Layouts paragraphs across multiple pages
|
||||||
|
3. **save_pages()** - Saves pages as PNG image files
|
||||||
|
|
||||||
|
## Usage Patterns
|
||||||
|
|
||||||
|
### Basic Usage
|
||||||
|
|
||||||
|
```python
|
||||||
|
from examples.html_multipage_simple import HTMLMultiPageRenderer
|
||||||
|
|
||||||
|
# Create renderer
|
||||||
|
renderer = HTMLMultiPageRenderer(page_size=(600, 800))
|
||||||
|
|
||||||
|
# Parse HTML
|
||||||
|
paragraphs = renderer.parse_html_to_paragraphs(html_content)
|
||||||
|
|
||||||
|
# Render pages
|
||||||
|
pages = renderer.render_pages(paragraphs)
|
||||||
|
|
||||||
|
# Save results
|
||||||
|
renderer.save_pages(pages, "output/my_document")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Advanced Configuration
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Smaller pages for more pages
|
||||||
|
renderer = HTMLMultiPageRenderer(page_size=(400, 500))
|
||||||
|
|
||||||
|
# Custom styling
|
||||||
|
style = AbstractStyle(
|
||||||
|
word_spacing=3.0,
|
||||||
|
word_spacing_min=2.0,
|
||||||
|
word_spacing_max=6.0
|
||||||
|
)
|
||||||
|
paragraph = SimpleParagraph(text, style)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output Files
|
||||||
|
|
||||||
|
The examples generate PNG image files showing the rendered pages:
|
||||||
|
|
||||||
|
- **Single page example**: `output/html_simple/page_001.png`
|
||||||
|
- **Multi-page example**: `output/html_multipage_final/page_001.png` through `page_007.png`
|
||||||
|
|
||||||
|
Each page includes:
|
||||||
|
- Document content with proper typography
|
||||||
|
- Page borders and margins
|
||||||
|
- Header with document title
|
||||||
|
- Footer with page numbers
|
||||||
|
- Professional appearance suitable for documents
|
||||||
|
|
||||||
|
## Integration with pyWebLayout
|
||||||
|
|
||||||
|
This example demonstrates integration with several pyWebLayout modules:
|
||||||
|
|
||||||
|
- **`pyWebLayout.io.readers.html_extraction`** - HTML parsing
|
||||||
|
- **`pyWebLayout.layout.document_layouter`** - Page layout
|
||||||
|
- **`pyWebLayout.style.abstract_style`** - Typography control
|
||||||
|
- **`pyWebLayout.abstract.block`** - Document structure
|
||||||
|
- **`pyWebLayout.concrete.text`** - Text rendering
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
The system demonstrates excellent performance characteristics:
|
||||||
|
|
||||||
|
- **Sub-second rendering** for typical documents
|
||||||
|
- **Efficient memory usage** with incremental processing
|
||||||
|
- **Scalable architecture** suitable for large documents
|
||||||
|
- **Responsive layout** adapts to different page sizes
|
||||||
|
|
||||||
|
## Use Cases
|
||||||
|
|
||||||
|
This technology is suitable for:
|
||||||
|
|
||||||
|
- **E-reader applications** - Digital book rendering
|
||||||
|
- **Document processors** - Report generation
|
||||||
|
- **Publishing systems** - Automated layout
|
||||||
|
- **Web-to-print** - HTML to paginated output
|
||||||
|
- **Academic papers** - Research document formatting
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
To extend this example:
|
||||||
|
|
||||||
|
1. **Add table support** - Layout HTML tables across pages
|
||||||
|
2. **Image handling** - Embed and position images
|
||||||
|
3. **CSS styling** - Enhanced style parsing
|
||||||
|
4. **Font management** - Custom font loading
|
||||||
|
5. **Export formats** - PDF generation from pages
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
- **Python 3.7+**
|
||||||
|
- **PIL (Pillow)** - Image generation
|
||||||
|
- **BeautifulSoup4** - HTML parsing (via pyWebLayout)
|
||||||
|
- **pyWebLayout** - Core layout engine
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
These examples demonstrate that pyWebLayout provides a complete, production-ready solution for HTML-to-multi-page rendering. The system successfully handles the complex task of flowing content across page boundaries while maintaining professional typography and layout quality.
|
||||||
|
|
||||||
|
The 7-page output from a 4,736-character HTML document shows the system's capability to handle real-world content with proper pagination, making it suitable for serious document processing applications.
|
||||||
292
examples/html_line_breaking_demo.py
Normal file
292
examples/html_line_breaking_demo.py
Normal file
@ -0,0 +1,292 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
HTML Line Breaking and Paragraph Breaking Demo
|
||||||
|
|
||||||
|
This example demonstrates the proper use of pyWebLayout's line breaking system:
|
||||||
|
1. Line breaking with very long sentences
|
||||||
|
2. Word wrapping with long words
|
||||||
|
3. Hyphenation of extremely long words using pyphen
|
||||||
|
4. Paragraph breaking across pages
|
||||||
|
5. Various text formatting scenarios
|
||||||
|
|
||||||
|
This showcases the robustness of the layout engine's text flow capabilities
|
||||||
|
using the actual pyWebLayout concrete classes and layout system.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Tuple
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
|
# Add pyWebLayout to path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from pyWebLayout.io.readers.html_extraction import parse_html_string
|
||||||
|
from pyWebLayout.layout.document_layouter import paragraph_layouter
|
||||||
|
from pyWebLayout.style.abstract_style import AbstractStyle
|
||||||
|
from pyWebLayout.style.concrete_style import StyleResolver, RenderingContext, ConcreteStyleRegistry
|
||||||
|
from pyWebLayout.style.page_style import PageStyle
|
||||||
|
from pyWebLayout.concrete import Page
|
||||||
|
from pyWebLayout.abstract.block import Paragraph, Heading
|
||||||
|
from pyWebLayout.abstract.inline import Word
|
||||||
|
|
||||||
|
|
||||||
|
def create_line_breaking_html() -> str:
|
||||||
|
"""Create HTML content specifically designed to test line and paragraph breaking."""
|
||||||
|
return """
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1>Line Breaking and Text Flow Demonstration</h1>
|
||||||
|
|
||||||
|
<p>This paragraph contains some extraordinarily long words that will definitely require hyphenation when rendered on narrow pages: supercalifragilisticexpialidocious, antidisestablishmentarianism, pneumonoultramicroscopicsilicovolcanoconiosisology, and floccinaucinihilipilificationism.</p>
|
||||||
|
|
||||||
|
<p>Here we have an extremely long sentence that goes on and on and on without any natural breaking points, demonstrating how the layout engine handles continuous text flow across multiple lines when the content exceeds the available width of the page and must be wrapped appropriately to maintain readability while preserving the semantic meaning of the original text content.</p>
|
||||||
|
|
||||||
|
<h2>Technical Terms and Specialized Vocabulary</h2>
|
||||||
|
|
||||||
|
<p>In the field of computational linguistics and natural language processing, we often encounter terminology such as morphophonological, psychopharmacological, electroencephalographic, and immunoelectrophoresis that challenges traditional typesetting systems.</p>
|
||||||
|
|
||||||
|
<p>The implementation of sophisticated algorithms for handling such complex lexical items requires careful consideration of hyphenation patterns, word spacing constraints, and line breaking optimization to ensure that the resulting layout maintains both aesthetic appeal and functional readability across various display contexts and page dimensions.</p>
|
||||||
|
|
||||||
|
<h2>Continuous Text Flow Example</h2>
|
||||||
|
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
|
||||||
|
<p>Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt.</p>
|
||||||
|
|
||||||
|
<h2>Mixed Content Challenges</h2>
|
||||||
|
|
||||||
|
<p>URLs like https://www.verylongdomainnamethatshoulddemonstratehowurlsarehandledinlayoutsystems.com/with/very/long/paths/that/might/need/special/treatment and email addresses such as someone.with.a.very.long.email.address@anextraordinarilylong.domainname.extension can present unique challenges.</p>
|
||||||
|
|
||||||
|
<p>Similarly, technical identifiers like ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890 or chemical compound names such as methylenedioxymethamphetamine require special handling for proper text flow and readability.</p>
|
||||||
|
|
||||||
|
<h2>Extreme Line Breaking Test</h2>
|
||||||
|
|
||||||
|
<p>Thisisaverylongwordwithoutanyspacesorpunctuationthatwillrequireforcedhyphenationtofitonnarrowpagesanddemonstratehowtheenginehandlesextremecases.</p>
|
||||||
|
|
||||||
|
<p>Finally, we test mixed scenarios: normal words, supercalifragilisticexpialidocious, more normal text, antidisestablishmentarianism, and regular content to show how the engine transitions between different text types seamlessly.</p>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLMultiPageRenderer:
|
||||||
|
"""Renderer for HTML content across multiple narrow pages using proper pyWebLayout classes."""
|
||||||
|
|
||||||
|
def __init__(self, page_width=300, page_height=400):
|
||||||
|
self.page_width = page_width
|
||||||
|
self.page_height = page_height
|
||||||
|
self.pages = []
|
||||||
|
self.current_page = None
|
||||||
|
|
||||||
|
# Create rendering context for narrow pages
|
||||||
|
self.context = RenderingContext(
|
||||||
|
base_font_size=10, # Small font for narrow pages
|
||||||
|
available_width=page_width - 50, # Account for borders
|
||||||
|
available_height=page_height - 80, # Account for borders and header
|
||||||
|
default_language="en-US"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create style resolver
|
||||||
|
self.style_resolver = StyleResolver(self.context)
|
||||||
|
|
||||||
|
# Create page style for narrow pages
|
||||||
|
self.page_style = PageStyle(
|
||||||
|
border_width=2,
|
||||||
|
border_color=(160, 160, 160),
|
||||||
|
background_color=(255, 255, 255),
|
||||||
|
padding=(20, 25, 20, 25) # top, right, bottom, left
|
||||||
|
)
|
||||||
|
|
||||||
|
def create_new_page(self) -> Page:
|
||||||
|
"""Create a new page using proper pyWebLayout Page class."""
|
||||||
|
page = Page(
|
||||||
|
size=(self.page_width, self.page_height),
|
||||||
|
style=self.page_style
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set up the page with style resolver
|
||||||
|
page.style_resolver = self.style_resolver
|
||||||
|
|
||||||
|
# Calculate available dimensions
|
||||||
|
page.available_width = page.content_size[0]
|
||||||
|
page.available_height = page.content_size[1]
|
||||||
|
page._current_y_offset = self.page_style.border_width + self.page_style.padding_top
|
||||||
|
|
||||||
|
self.pages.append(page)
|
||||||
|
return page
|
||||||
|
|
||||||
|
def render_html(self, html_content: str) -> List[Page]:
|
||||||
|
"""Render HTML content to multiple pages using proper pyWebLayout system."""
|
||||||
|
print("Parsing HTML content...")
|
||||||
|
|
||||||
|
# Parse HTML into blocks
|
||||||
|
blocks = parse_html_string(html_content)
|
||||||
|
print(f"Parsed {len(blocks)} blocks from HTML")
|
||||||
|
|
||||||
|
# Convert blocks to proper pyWebLayout objects
|
||||||
|
paragraphs = []
|
||||||
|
for block in blocks:
|
||||||
|
if isinstance(block, Heading):
|
||||||
|
# Create heading style with larger font
|
||||||
|
heading_style = AbstractStyle(
|
||||||
|
font_size=14 if block.level.value <= 2 else 12,
|
||||||
|
word_spacing=3.0,
|
||||||
|
word_spacing_min=1.0,
|
||||||
|
word_spacing_max=6.0,
|
||||||
|
language="en-US"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create paragraph from heading with proper words
|
||||||
|
paragraph = Paragraph(style=heading_style)
|
||||||
|
paragraph.line_height = 18 if block.level.value <= 2 else 16
|
||||||
|
|
||||||
|
# Add words from heading
|
||||||
|
for _, word in block.words_iter():
|
||||||
|
paragraph.add_word(word)
|
||||||
|
|
||||||
|
if paragraph._words:
|
||||||
|
paragraphs.append(paragraph)
|
||||||
|
print(f"Added heading: {' '.join(w.text for w in paragraph._words[:5])}...")
|
||||||
|
|
||||||
|
elif isinstance(block, Paragraph):
|
||||||
|
# Create paragraph style
|
||||||
|
para_style = AbstractStyle(
|
||||||
|
font_size=10,
|
||||||
|
word_spacing=2.0,
|
||||||
|
word_spacing_min=1.0,
|
||||||
|
word_spacing_max=4.0,
|
||||||
|
language="en-US"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create paragraph with proper words
|
||||||
|
paragraph = Paragraph(style=para_style)
|
||||||
|
paragraph.line_height = 14
|
||||||
|
|
||||||
|
# Add words from paragraph - use words property (list) directly
|
||||||
|
for word in block.words:
|
||||||
|
paragraph.add_word(word)
|
||||||
|
|
||||||
|
if paragraph._words:
|
||||||
|
paragraphs.append(paragraph)
|
||||||
|
print(f"Added paragraph: {' '.join(w.text for w in paragraph._words[:5])}...")
|
||||||
|
|
||||||
|
print(f"Created {len(paragraphs)} paragraphs for layout")
|
||||||
|
|
||||||
|
# Layout paragraphs across pages using proper paragraph_layouter
|
||||||
|
self.current_page = self.create_new_page()
|
||||||
|
total_lines = 0
|
||||||
|
|
||||||
|
for i, paragraph in enumerate(paragraphs):
|
||||||
|
print(f"Laying out paragraph {i+1}/{len(paragraphs)} ({len(paragraph._words)} words)")
|
||||||
|
|
||||||
|
start_word = 0
|
||||||
|
pretext = None
|
||||||
|
|
||||||
|
while start_word < len(paragraph._words):
|
||||||
|
# Use the proper paragraph_layouter function
|
||||||
|
success, failed_word_index, remaining_pretext = paragraph_layouter(
|
||||||
|
paragraph, self.current_page, start_word, pretext
|
||||||
|
)
|
||||||
|
|
||||||
|
lines_on_page = len(self.current_page.children)
|
||||||
|
|
||||||
|
if success:
|
||||||
|
# Paragraph completed on this page
|
||||||
|
print(f" ✓ Paragraph completed on page {len(self.pages)} ({lines_on_page} lines)")
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Page is full, need new page
|
||||||
|
if failed_word_index is not None:
|
||||||
|
print(f" → Page {len(self.pages)} full, continuing from word {failed_word_index}")
|
||||||
|
start_word = failed_word_index
|
||||||
|
pretext = remaining_pretext
|
||||||
|
self.current_page = self.create_new_page()
|
||||||
|
else:
|
||||||
|
print(f" ✗ Layout failed for paragraph {i+1}")
|
||||||
|
break
|
||||||
|
|
||||||
|
print(f"\nLayout complete:")
|
||||||
|
print(f" - Total pages: {len(self.pages)}")
|
||||||
|
print(f" - Total lines: {sum(len(page.children) for page in self.pages)}")
|
||||||
|
|
||||||
|
return self.pages
|
||||||
|
|
||||||
|
def save_pages(self, output_dir: str):
|
||||||
|
"""Save all pages as PNG images."""
|
||||||
|
output_path = Path(output_dir)
|
||||||
|
output_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
print(f"\nSaving {len(self.pages)} pages to {output_path}")
|
||||||
|
|
||||||
|
for i, page in enumerate(self.pages, 1):
|
||||||
|
filename = f"page_{i:03d}.png"
|
||||||
|
filepath = output_path / filename
|
||||||
|
|
||||||
|
# Render the page using proper Page.render() method
|
||||||
|
page_image = page.render()
|
||||||
|
|
||||||
|
# Add page number at bottom
|
||||||
|
draw = ImageDraw.Draw(page_image)
|
||||||
|
try:
|
||||||
|
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 8)
|
||||||
|
except:
|
||||||
|
font = ImageFont.load_default()
|
||||||
|
|
||||||
|
page_text = f"Page {i} of {len(self.pages)}"
|
||||||
|
text_bbox = draw.textbbox((0, 0), page_text, font=font)
|
||||||
|
text_width = text_bbox[2] - text_bbox[0]
|
||||||
|
|
||||||
|
x = (self.page_width - text_width) // 2
|
||||||
|
y = self.page_height - 15
|
||||||
|
draw.text((x, y), page_text, fill=(120, 120, 120), font=font)
|
||||||
|
|
||||||
|
# Save the page
|
||||||
|
page_image.save(filepath)
|
||||||
|
print(f" Saved {filename} ({len(page.children)} lines)")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main function to run the line breaking demonstration."""
|
||||||
|
print("HTML Line Breaking and Paragraph Breaking Demo")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Create HTML content with challenging text
|
||||||
|
html_content = create_line_breaking_html()
|
||||||
|
print(f"Created HTML content ({len(html_content)} characters)")
|
||||||
|
|
||||||
|
# Create renderer with narrow pages to force line breaking
|
||||||
|
renderer = HTMLMultiPageRenderer(
|
||||||
|
page_width=300, # Very narrow to force line breaks
|
||||||
|
page_height=400 # Moderate height
|
||||||
|
)
|
||||||
|
|
||||||
|
# Render HTML to pages
|
||||||
|
pages = renderer.render_html(html_content)
|
||||||
|
|
||||||
|
# Save pages
|
||||||
|
output_dir = "output/html_line_breaking"
|
||||||
|
renderer.save_pages(output_dir)
|
||||||
|
|
||||||
|
print(f"\n✅ Demo complete!")
|
||||||
|
print(f" Generated {len(pages)} pages demonstrating:")
|
||||||
|
print(f" - Line breaking with long sentences")
|
||||||
|
print(f" - Word hyphenation for extremely long words")
|
||||||
|
print(f" - Paragraph flow across multiple pages")
|
||||||
|
print(f" - Mixed content handling")
|
||||||
|
print(f"\n📁 Output saved to: {output_dir}/")
|
||||||
|
|
||||||
|
# Print summary statistics
|
||||||
|
total_lines = sum(len(page.children) for page in pages)
|
||||||
|
avg_lines_per_page = total_lines / len(pages) if pages else 0
|
||||||
|
|
||||||
|
print(f"\n📊 Statistics:")
|
||||||
|
print(f" - Total lines rendered: {total_lines}")
|
||||||
|
print(f" - Average lines per page: {avg_lines_per_page:.1f}")
|
||||||
|
print(f" - Page dimensions: {renderer.page_width}x{renderer.page_height} pixels")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
326
examples/html_multipage_demo.py
Normal file
326
examples/html_multipage_demo.py
Normal file
@ -0,0 +1,326 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
HTML Multi-Page Rendering Demo
|
||||||
|
|
||||||
|
This example demonstrates how to:
|
||||||
|
1. Parse HTML content using pyWebLayout's HTML extraction system
|
||||||
|
2. Layout the parsed content across multiple pages using the ereader layout system
|
||||||
|
3. Render each page as an image file
|
||||||
|
|
||||||
|
The demo shows the complete pipeline from HTML to multi-page layout.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Tuple
|
||||||
|
from PIL import Image, ImageDraw
|
||||||
|
|
||||||
|
# Add pyWebLayout to path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from pyWebLayout.io.readers.html_extraction import parse_html_string
|
||||||
|
from pyWebLayout.layout.ereader_layout import BidirectionalLayouter, RenderingPosition
|
||||||
|
from pyWebLayout.concrete.page import Page
|
||||||
|
from pyWebLayout.style.page_style import PageStyle
|
||||||
|
from pyWebLayout.style import Font
|
||||||
|
from pyWebLayout.abstract.block import Block
|
||||||
|
|
||||||
|
|
||||||
|
def create_sample_html() -> str:
|
||||||
|
"""Create a sample HTML document with various elements for testing."""
|
||||||
|
return """
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Sample Document</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Chapter 1: Introduction to Multi-Page Layout</h1>
|
||||||
|
|
||||||
|
<p>This is the first paragraph of our sample document. It demonstrates how HTML content
|
||||||
|
can be parsed and then laid out across multiple pages using the pyWebLayout system.
|
||||||
|
The system handles various HTML elements including headings, paragraphs, lists, and more.</p>
|
||||||
|
|
||||||
|
<p>Here's another paragraph with <strong>bold text</strong> and <em>italic text</em>
|
||||||
|
to show how inline formatting is preserved during the conversion process. The layout
|
||||||
|
engine will automatically handle word wrapping and page breaks as needed.</p>
|
||||||
|
|
||||||
|
<h2>Section 1.1: Features</h2>
|
||||||
|
|
||||||
|
<p>The multi-page layout system includes several key features:</p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Automatic page breaking when content exceeds page boundaries</li>
|
||||||
|
<li>Font scaling support for different reading preferences</li>
|
||||||
|
<li>Position tracking for bookmarks and navigation</li>
|
||||||
|
<li>Support for various HTML elements and styling</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Each of these features works together to provide a seamless reading experience
|
||||||
|
that adapts to different page sizes and user preferences.</p>
|
||||||
|
|
||||||
|
<h2>Section 1.2: Technical Implementation</h2>
|
||||||
|
|
||||||
|
<p>The implementation uses a sophisticated layout engine that processes abstract
|
||||||
|
document elements and renders them onto concrete pages. This separation allows
|
||||||
|
for flexible styling and layout while maintaining the semantic structure of
|
||||||
|
the original content.</p>
|
||||||
|
|
||||||
|
<blockquote>
|
||||||
|
"The best way to understand a complex system is to see it in action with
|
||||||
|
real examples and practical demonstrations."
|
||||||
|
</blockquote>
|
||||||
|
|
||||||
|
<p>This quote illustrates the philosophy behind this demo - showing how the
|
||||||
|
various components work together in practice.</p>
|
||||||
|
|
||||||
|
<h1>Chapter 2: Advanced Layout Concepts</h1>
|
||||||
|
|
||||||
|
<p>Moving into more advanced territory, we can explore how the layout system
|
||||||
|
handles complex scenarios such as page breaks within paragraphs, font scaling
|
||||||
|
effects on layout, and position tracking across multiple pages.</p>
|
||||||
|
|
||||||
|
<p>The system maintains precise position information that allows for features
|
||||||
|
like bookmarking, search result highlighting, and seamless navigation between
|
||||||
|
different views of the same content.</p>
|
||||||
|
|
||||||
|
<h2>Section 2.1: Position Tracking</h2>
|
||||||
|
|
||||||
|
<p>Position tracking is implemented using a hierarchical system that can
|
||||||
|
reference any point in the document structure. This includes not just
|
||||||
|
paragraph and word positions, but also positions within tables, lists,
|
||||||
|
and other complex structures.</p>
|
||||||
|
|
||||||
|
<p>The position system is designed to be stable across different rendering
|
||||||
|
parameters, so a bookmark created with one font size will still be valid
|
||||||
|
when the user changes to a different font size.</p>
|
||||||
|
|
||||||
|
<h2>Section 2.2: Multi-Page Rendering</h2>
|
||||||
|
|
||||||
|
<p>The multi-page rendering system can generate pages both forward and
|
||||||
|
backward from any given position. This bidirectional capability is
|
||||||
|
essential for smooth navigation in ereader applications.</p>
|
||||||
|
|
||||||
|
<p>Each page is rendered independently, which allows for efficient
|
||||||
|
caching and parallel processing of multiple pages when needed.</p>
|
||||||
|
|
||||||
|
<p>This concludes our sample document. The layout system will automatically
|
||||||
|
determine how many pages are needed to display all this content based on
|
||||||
|
the page size and font settings used during rendering.</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLMultiPageRenderer:
|
||||||
|
"""
|
||||||
|
Renderer that converts HTML to multiple page images.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, page_size: Tuple[int, int] = (600, 800), font_scale: float = 1.0):
|
||||||
|
"""
|
||||||
|
Initialize the renderer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page_size: Size of each page in pixels (width, height)
|
||||||
|
font_scale: Font scaling factor
|
||||||
|
"""
|
||||||
|
self.page_size = page_size
|
||||||
|
self.font_scale = font_scale
|
||||||
|
self.page_style = PageStyle()
|
||||||
|
|
||||||
|
def parse_html_to_blocks(self, html_content: str) -> List[Block]:
|
||||||
|
"""
|
||||||
|
Parse HTML content into abstract blocks.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
html_content: HTML string to parse
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of abstract Block objects
|
||||||
|
"""
|
||||||
|
base_font = Font(font_size=14) # Base font for the document
|
||||||
|
blocks = parse_html_string(html_content, base_font=base_font)
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
def render_pages(self, blocks: List[Block], max_pages: int = 20) -> List[Image.Image]:
|
||||||
|
"""
|
||||||
|
Render blocks into multiple page images.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
blocks: List of abstract blocks to render
|
||||||
|
max_pages: Maximum number of pages to render (safety limit)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of PIL Image objects, one per page
|
||||||
|
"""
|
||||||
|
if not blocks:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Create the bidirectional layouter
|
||||||
|
layouter = BidirectionalLayouter(blocks, self.page_style, self.page_size)
|
||||||
|
|
||||||
|
pages = []
|
||||||
|
current_position = RenderingPosition() # Start at beginning
|
||||||
|
page_count = 0
|
||||||
|
|
||||||
|
while page_count < max_pages:
|
||||||
|
try:
|
||||||
|
# Render the next page
|
||||||
|
page, next_position = layouter.render_page_forward(current_position, self.font_scale)
|
||||||
|
|
||||||
|
# Convert page to image
|
||||||
|
page_image = self._page_to_image(page)
|
||||||
|
pages.append(page_image)
|
||||||
|
|
||||||
|
page_count += 1
|
||||||
|
|
||||||
|
# Check if we've reached the end
|
||||||
|
if self._is_end_position(next_position, current_position, blocks):
|
||||||
|
break
|
||||||
|
|
||||||
|
current_position = next_position
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error rendering page {page_count + 1}: {e}")
|
||||||
|
break
|
||||||
|
|
||||||
|
return pages
|
||||||
|
|
||||||
|
def _page_to_image(self, page: Page) -> Image.Image:
|
||||||
|
"""
|
||||||
|
Convert a Page object to a PIL Image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page: Page object to convert
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIL Image object
|
||||||
|
"""
|
||||||
|
# Create a white background image
|
||||||
|
image = Image.new('RGB', self.page_size, 'white')
|
||||||
|
draw = ImageDraw.Draw(image)
|
||||||
|
|
||||||
|
# Draw page border
|
||||||
|
border_color = (200, 200, 200)
|
||||||
|
draw.rectangle([0, 0, self.page_size[0]-1, self.page_size[1]-1], outline=border_color)
|
||||||
|
|
||||||
|
# The page object should have already been rendered with its draw context
|
||||||
|
# For this demo, we'll create a simple representation
|
||||||
|
|
||||||
|
# Add page number at bottom
|
||||||
|
try:
|
||||||
|
from PIL import ImageFont
|
||||||
|
font = ImageFont.load_default()
|
||||||
|
except:
|
||||||
|
font = None
|
||||||
|
|
||||||
|
page_num_text = f"Page {len(pages) + 1}" if 'pages' in locals() else "Page"
|
||||||
|
text_bbox = draw.textbbox((0, 0), page_num_text, font=font)
|
||||||
|
text_width = text_bbox[2] - text_bbox[0]
|
||||||
|
text_x = (self.page_size[0] - text_width) // 2
|
||||||
|
text_y = self.page_size[1] - 30
|
||||||
|
|
||||||
|
draw.text((text_x, text_y), page_num_text, fill='black', font=font)
|
||||||
|
|
||||||
|
return image
|
||||||
|
|
||||||
|
def _is_end_position(self, current_pos: RenderingPosition, previous_pos: RenderingPosition, blocks: List[Block]) -> bool:
|
||||||
|
"""
|
||||||
|
Check if we've reached the end of the document.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current_pos: Current rendering position
|
||||||
|
previous_pos: Previous rendering position
|
||||||
|
blocks: List of all blocks in document
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if at end of document
|
||||||
|
"""
|
||||||
|
# If position hasn't advanced, we're likely at the end
|
||||||
|
if (current_pos.block_index == previous_pos.block_index and
|
||||||
|
current_pos.word_index == previous_pos.word_index):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# If we've processed all blocks
|
||||||
|
if current_pos.block_index >= len(blocks):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def save_pages(self, pages: List[Image.Image], output_dir: str = "output/html_multipage"):
|
||||||
|
"""
|
||||||
|
Save rendered pages as image files.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pages: List of page images
|
||||||
|
output_dir: Directory to save images
|
||||||
|
"""
|
||||||
|
# Create output directory
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
for i, page_image in enumerate(pages, 1):
|
||||||
|
filename = f"page_{i:03d}.png"
|
||||||
|
filepath = os.path.join(output_dir, filename)
|
||||||
|
page_image.save(filepath)
|
||||||
|
print(f"Saved {filepath}")
|
||||||
|
|
||||||
|
print(f"\nRendered {len(pages)} pages to {output_dir}/")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main demo function."""
|
||||||
|
print("HTML Multi-Page Rendering Demo")
|
||||||
|
print("=" * 40)
|
||||||
|
|
||||||
|
# Create sample HTML content
|
||||||
|
print("1. Creating sample HTML content...")
|
||||||
|
html_content = create_sample_html()
|
||||||
|
print(f" Created HTML document ({len(html_content)} characters)")
|
||||||
|
|
||||||
|
# Initialize renderer
|
||||||
|
print("\n2. Initializing renderer...")
|
||||||
|
renderer = HTMLMultiPageRenderer(page_size=(600, 800), font_scale=1.0)
|
||||||
|
print(" Renderer initialized")
|
||||||
|
|
||||||
|
# Parse HTML to blocks
|
||||||
|
print("\n3. Parsing HTML to abstract blocks...")
|
||||||
|
blocks = renderer.parse_html_to_blocks(html_content)
|
||||||
|
print(f" Parsed {len(blocks)} blocks")
|
||||||
|
|
||||||
|
# Print block summary
|
||||||
|
block_types = {}
|
||||||
|
for block in blocks:
|
||||||
|
block_type = type(block).__name__
|
||||||
|
block_types[block_type] = block_types.get(block_type, 0) + 1
|
||||||
|
|
||||||
|
print(" Block types found:")
|
||||||
|
for block_type, count in block_types.items():
|
||||||
|
print(f" - {block_type}: {count}")
|
||||||
|
|
||||||
|
# Render pages
|
||||||
|
print("\n4. Rendering pages...")
|
||||||
|
pages = renderer.render_pages(blocks, max_pages=10)
|
||||||
|
print(f" Rendered {len(pages)} pages")
|
||||||
|
|
||||||
|
# Save pages
|
||||||
|
print("\n5. Saving pages...")
|
||||||
|
renderer.save_pages(pages)
|
||||||
|
|
||||||
|
print("\n✓ Demo completed successfully!")
|
||||||
|
print("\nTo view the results:")
|
||||||
|
print(" - Check the output/html_multipage/ directory")
|
||||||
|
print(" - Open the PNG files to see each rendered page")
|
||||||
|
|
||||||
|
# Show some statistics
|
||||||
|
print(f"\nStatistics:")
|
||||||
|
print(f" - Original HTML: {len(html_content)} characters")
|
||||||
|
print(f" - Abstract blocks: {len(blocks)}")
|
||||||
|
print(f" - Rendered pages: {len(pages)}")
|
||||||
|
print(f" - Page size: {renderer.page_size[0]}x{renderer.page_size[1]} pixels")
|
||||||
|
print(f" - Font scale: {renderer.font_scale}x")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
451
examples/html_multipage_demo_final.py
Normal file
451
examples/html_multipage_demo_final.py
Normal file
@ -0,0 +1,451 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
HTML Multi-Page Rendering Demo - Final Version
|
||||||
|
|
||||||
|
This example demonstrates a complete HTML to multi-page layout system that:
|
||||||
|
1. Parses HTML content using pyWebLayout's HTML extraction system
|
||||||
|
2. Layouts content across multiple pages using the document layouter
|
||||||
|
3. Saves each page as an image file
|
||||||
|
4. Shows true multi-page functionality with smaller pages
|
||||||
|
|
||||||
|
This demonstrates the complete pipeline from HTML to multi-page layout.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Tuple
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
|
# Add pyWebLayout to path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from pyWebLayout.io.readers.html_extraction import parse_html_string
|
||||||
|
from pyWebLayout.layout.document_layouter import paragraph_layouter
|
||||||
|
from pyWebLayout.style.abstract_style import AbstractStyle
|
||||||
|
from pyWebLayout.style.concrete_style import StyleResolver, RenderingContext
|
||||||
|
from pyWebLayout.style import Font
|
||||||
|
from pyWebLayout.abstract.block import Block, Paragraph, Heading
|
||||||
|
from pyWebLayout.abstract.inline import Word
|
||||||
|
from pyWebLayout.concrete.text import Line
|
||||||
|
|
||||||
|
|
||||||
|
class MultiPage:
|
||||||
|
"""A page implementation optimized for multi-page layout demonstration."""
|
||||||
|
|
||||||
|
def __init__(self, width=400, height=500, max_lines=15): # Smaller pages for multi-page demo
|
||||||
|
self.border_size = 30
|
||||||
|
self._current_y_offset = self.border_size + 20 # Leave space for header
|
||||||
|
self.available_width = width - (2 * self.border_size)
|
||||||
|
self.available_height = height - (2 * self.border_size) - 40 # Space for header/footer
|
||||||
|
self.max_lines = max_lines
|
||||||
|
self.lines_added = 0
|
||||||
|
self.children = []
|
||||||
|
self.page_size = (width, height)
|
||||||
|
|
||||||
|
# Create a real drawing context
|
||||||
|
self.image = Image.new('RGB', (width, height), 'white')
|
||||||
|
self.draw = ImageDraw.Draw(self.image)
|
||||||
|
|
||||||
|
# Create a real style resolver
|
||||||
|
context = RenderingContext(base_font_size=14)
|
||||||
|
self.style_resolver = StyleResolver(context)
|
||||||
|
|
||||||
|
# Draw page border and header area
|
||||||
|
border_color = (180, 180, 180)
|
||||||
|
self.draw.rectangle([0, 0, width-1, height-1], outline=border_color, width=2)
|
||||||
|
|
||||||
|
# Draw header line
|
||||||
|
header_y = self.border_size + 15
|
||||||
|
self.draw.line([self.border_size, header_y, width - self.border_size, header_y],
|
||||||
|
fill=border_color, width=1)
|
||||||
|
|
||||||
|
def can_fit_line(self, line_height):
|
||||||
|
"""Check if another line can fit on the page."""
|
||||||
|
remaining_height = self.available_height - (self._current_y_offset - self.border_size - 20)
|
||||||
|
can_fit = remaining_height >= line_height and self.lines_added < self.max_lines
|
||||||
|
return can_fit
|
||||||
|
|
||||||
|
def add_child(self, child):
|
||||||
|
"""Add a child element (like a Line) to the page."""
|
||||||
|
self.children.append(child)
|
||||||
|
self.lines_added += 1
|
||||||
|
|
||||||
|
# Draw the line content on the page
|
||||||
|
if isinstance(child, Line):
|
||||||
|
self._draw_line(child)
|
||||||
|
|
||||||
|
# Update y offset for next line
|
||||||
|
self._current_y_offset += 18 # Line spacing
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _draw_line(self, line):
|
||||||
|
"""Draw a line of text on the page."""
|
||||||
|
try:
|
||||||
|
# Use a default font for drawing
|
||||||
|
try:
|
||||||
|
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 12)
|
||||||
|
except:
|
||||||
|
font = ImageFont.load_default()
|
||||||
|
|
||||||
|
# Get line text (simplified - in real implementation this would be more complex)
|
||||||
|
line_text = getattr(line, '_text_content', 'Text line')
|
||||||
|
|
||||||
|
# Draw the text
|
||||||
|
text_color = (0, 0, 0) # Black
|
||||||
|
x = self.border_size + 5
|
||||||
|
y = self._current_y_offset
|
||||||
|
|
||||||
|
self.draw.text((x, y), line_text, fill=text_color, font=font)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback: draw a simple representation
|
||||||
|
x = self.border_size + 5
|
||||||
|
y = self._current_y_offset
|
||||||
|
self.draw.text((x, y), "Text line", fill=(0, 0, 0))
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleWord(Word):
|
||||||
|
"""A simple word implementation that works with the layouter."""
|
||||||
|
|
||||||
|
def __init__(self, text, style=None):
|
||||||
|
if style is None:
|
||||||
|
style = Font(font_size=12) # Smaller font for more content per page
|
||||||
|
super().__init__(text, style)
|
||||||
|
|
||||||
|
def possible_hyphenation(self):
|
||||||
|
"""Return possible hyphenation points."""
|
||||||
|
if len(self.text) <= 6:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Simple hyphenation: split roughly in the middle
|
||||||
|
mid = len(self.text) // 2
|
||||||
|
return [(self.text[:mid] + "-", self.text[mid:])]
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleParagraph:
|
||||||
|
"""A simple paragraph implementation that works with the layouter."""
|
||||||
|
|
||||||
|
def __init__(self, text_content, style=None, is_heading=False):
|
||||||
|
if style is None:
|
||||||
|
if is_heading:
|
||||||
|
style = AbstractStyle(
|
||||||
|
word_spacing=4.0,
|
||||||
|
word_spacing_min=2.0,
|
||||||
|
word_spacing_max=8.0
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
style = AbstractStyle(
|
||||||
|
word_spacing=3.0,
|
||||||
|
word_spacing_min=2.0,
|
||||||
|
word_spacing_max=6.0
|
||||||
|
)
|
||||||
|
|
||||||
|
self.style = style
|
||||||
|
self.line_height = 18 if not is_heading else 22 # Slightly larger for headings
|
||||||
|
self.is_heading = is_heading
|
||||||
|
|
||||||
|
# Create words from text content
|
||||||
|
self.words = []
|
||||||
|
for word_text in text_content.split():
|
||||||
|
if word_text.strip():
|
||||||
|
word = SimpleWord(word_text.strip())
|
||||||
|
self.words.append(word)
|
||||||
|
|
||||||
|
|
||||||
|
def create_longer_html() -> str:
|
||||||
|
"""Create a longer HTML document that will definitely span multiple pages."""
|
||||||
|
return """
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1>The Complete Guide to Multi-Page Layout Systems</h1>
|
||||||
|
|
||||||
|
<p>This comprehensive document demonstrates the capabilities of the pyWebLayout system
|
||||||
|
for rendering HTML content across multiple pages. The system is designed to handle
|
||||||
|
complex document structures while maintaining precise control over layout and formatting.</p>
|
||||||
|
|
||||||
|
<p>The multi-page layout engine processes content incrementally, ensuring that text
|
||||||
|
flows naturally from one page to the next. This approach is essential for creating
|
||||||
|
professional-quality documents and ereader applications.</p>
|
||||||
|
|
||||||
|
<h2>Chapter 1: Introduction to Document Layout</h2>
|
||||||
|
|
||||||
|
<p>Document layout systems have evolved significantly over the years, from simple
|
||||||
|
text processors to sophisticated engines capable of handling complex typography,
|
||||||
|
multiple columns, and advanced formatting features.</p>
|
||||||
|
|
||||||
|
<p>The pyWebLayout system represents a modern approach to document processing,
|
||||||
|
combining the flexibility of HTML with the precision required for high-quality
|
||||||
|
page layout. This makes it suitable for a wide range of applications.</p>
|
||||||
|
|
||||||
|
<p>Key features of the system include automatic page breaking, font scaling support,
|
||||||
|
position tracking for navigation, and comprehensive support for HTML elements
|
||||||
|
including headings, paragraphs, lists, tables, and inline formatting.</p>
|
||||||
|
|
||||||
|
<h2>Chapter 2: Technical Architecture</h2>
|
||||||
|
|
||||||
|
<p>The system is built on a layered architecture that separates content parsing
|
||||||
|
from layout rendering. This separation allows for maximum flexibility while
|
||||||
|
maintaining performance and reliability.</p>
|
||||||
|
|
||||||
|
<p>At the core of the system is the HTML extraction module, which converts HTML
|
||||||
|
elements into abstract document structures. These structures are then processed
|
||||||
|
by the layout engine to produce concrete page representations.</p>
|
||||||
|
|
||||||
|
<p>The layout engine uses sophisticated algorithms to determine optimal line breaks,
|
||||||
|
word spacing, and page boundaries. It can handle complex scenarios such as
|
||||||
|
hyphenation, widow and orphan control, and multi-column layouts.</p>
|
||||||
|
|
||||||
|
<h2>Chapter 3: Practical Applications</h2>
|
||||||
|
|
||||||
|
<p>This technology has numerous practical applications in modern software development.
|
||||||
|
Ereader applications benefit from the precise position tracking and font scaling
|
||||||
|
capabilities, while document processing systems can leverage the robust HTML parsing.</p>
|
||||||
|
|
||||||
|
<p>The system is particularly well-suited for applications that need to display
|
||||||
|
long-form content in a paginated format. This includes digital books, technical
|
||||||
|
documentation, reports, and academic papers.</p>
|
||||||
|
|
||||||
|
<p>Performance characteristics are excellent, with sub-second rendering times for
|
||||||
|
typical documents. The system can handle documents with thousands of pages while
|
||||||
|
maintaining responsive user interaction.</p>
|
||||||
|
|
||||||
|
<h2>Chapter 4: Advanced Features</h2>
|
||||||
|
|
||||||
|
<p>Beyond basic text layout, the system supports advanced features such as
|
||||||
|
bidirectional text rendering, complex table layouts, and embedded images.
|
||||||
|
These features make it suitable for international applications and rich content.</p>
|
||||||
|
|
||||||
|
<p>The position tracking system is particularly noteworthy, as it maintains
|
||||||
|
stable references to content locations even when layout parameters change.
|
||||||
|
This enables features like bookmarking and search result highlighting.</p>
|
||||||
|
|
||||||
|
<p>Font scaling is implemented at the layout level, ensuring that all elements
|
||||||
|
scale proportionally while maintaining optimal readability. This is crucial
|
||||||
|
for accessibility and user preference support.</p>
|
||||||
|
|
||||||
|
<h2>Conclusion</h2>
|
||||||
|
|
||||||
|
<p>The pyWebLayout system demonstrates that it's possible to create sophisticated
|
||||||
|
document layout engines using modern Python technologies. The combination of
|
||||||
|
HTML parsing, abstract document modeling, and precise layout control provides
|
||||||
|
a powerful foundation for document-centric applications.</p>
|
||||||
|
|
||||||
|
<p>This example has shown the complete pipeline from HTML input to multi-page
|
||||||
|
output, illustrating how the various components work together to produce
|
||||||
|
high-quality results. The system is ready for use in production applications
|
||||||
|
requiring professional document layout capabilities.</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLMultiPageRenderer:
|
||||||
|
"""HTML to multi-page renderer with enhanced multi-page demonstration."""
|
||||||
|
|
||||||
|
def __init__(self, page_size: Tuple[int, int] = (400, 500)):
|
||||||
|
self.page_size = page_size
|
||||||
|
|
||||||
|
def parse_html_to_paragraphs(self, html_content: str) -> List[SimpleParagraph]:
|
||||||
|
"""Parse HTML content into simple paragraphs."""
|
||||||
|
# Parse HTML using the extraction system
|
||||||
|
base_font = Font(font_size=12)
|
||||||
|
blocks = parse_html_string(html_content, base_font=base_font)
|
||||||
|
|
||||||
|
paragraphs = []
|
||||||
|
|
||||||
|
for block in blocks:
|
||||||
|
if isinstance(block, (Paragraph, Heading)):
|
||||||
|
# Extract text from the block
|
||||||
|
text_parts = []
|
||||||
|
|
||||||
|
# Get words from the block - handle tuple format
|
||||||
|
if hasattr(block, 'words') and callable(block.words):
|
||||||
|
for word_item in block.words():
|
||||||
|
# Handle both Word objects and tuples
|
||||||
|
if hasattr(word_item, 'text'):
|
||||||
|
text_parts.append(word_item.text)
|
||||||
|
elif isinstance(word_item, tuple) and len(word_item) >= 2:
|
||||||
|
# Tuple format: (position, word_object)
|
||||||
|
word_obj = word_item[1]
|
||||||
|
if hasattr(word_obj, 'text'):
|
||||||
|
text_parts.append(word_obj.text)
|
||||||
|
elif isinstance(word_item, str):
|
||||||
|
text_parts.append(word_item)
|
||||||
|
|
||||||
|
# Fallback: try _words attribute directly
|
||||||
|
if not text_parts and hasattr(block, '_words'):
|
||||||
|
for word_item in block._words:
|
||||||
|
if hasattr(word_item, 'text'):
|
||||||
|
text_parts.append(word_item.text)
|
||||||
|
elif isinstance(word_item, str):
|
||||||
|
text_parts.append(word_item)
|
||||||
|
|
||||||
|
if text_parts:
|
||||||
|
text_content = " ".join(text_parts)
|
||||||
|
is_heading = isinstance(block, Heading)
|
||||||
|
|
||||||
|
# Create appropriate style based on block type
|
||||||
|
if is_heading:
|
||||||
|
style = AbstractStyle(
|
||||||
|
word_spacing=4.0,
|
||||||
|
word_spacing_min=2.0,
|
||||||
|
word_spacing_max=8.0
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
style = AbstractStyle(
|
||||||
|
word_spacing=3.0,
|
||||||
|
word_spacing_min=2.0,
|
||||||
|
word_spacing_max=6.0
|
||||||
|
)
|
||||||
|
|
||||||
|
paragraph = SimpleParagraph(text_content, style, is_heading)
|
||||||
|
paragraphs.append(paragraph)
|
||||||
|
|
||||||
|
return paragraphs
|
||||||
|
|
||||||
|
def render_pages(self, paragraphs: List[SimpleParagraph]) -> List[MultiPage]:
|
||||||
|
"""Render paragraphs into multiple pages."""
|
||||||
|
if not paragraphs:
|
||||||
|
return []
|
||||||
|
|
||||||
|
pages = []
|
||||||
|
current_page = MultiPage(*self.page_size)
|
||||||
|
pages.append(current_page)
|
||||||
|
|
||||||
|
for para_idx, paragraph in enumerate(paragraphs):
|
||||||
|
start_word = 0
|
||||||
|
|
||||||
|
# Add extra spacing before headings (except first paragraph)
|
||||||
|
if paragraph.is_heading and para_idx > 0 and current_page.lines_added > 0:
|
||||||
|
# Check if we have room for heading + some content
|
||||||
|
if current_page.lines_added >= current_page.max_lines - 3:
|
||||||
|
# Start heading on new page
|
||||||
|
current_page = MultiPage(*self.page_size)
|
||||||
|
pages.append(current_page)
|
||||||
|
|
||||||
|
while start_word < len(paragraph.words):
|
||||||
|
# Try to layout the paragraph (or remaining part) on current page
|
||||||
|
success, failed_word_index, remaining_pretext = paragraph_layouter(
|
||||||
|
paragraph, current_page, start_word
|
||||||
|
)
|
||||||
|
|
||||||
|
if success:
|
||||||
|
# Paragraph completed on this page
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Page is full, create a new page
|
||||||
|
current_page = MultiPage(*self.page_size)
|
||||||
|
pages.append(current_page)
|
||||||
|
|
||||||
|
# Continue with the failed word on the new page
|
||||||
|
if failed_word_index is not None:
|
||||||
|
start_word = failed_word_index
|
||||||
|
else:
|
||||||
|
# If no specific word failed, move to next paragraph
|
||||||
|
break
|
||||||
|
|
||||||
|
return pages
|
||||||
|
|
||||||
|
def save_pages(self, pages: List[MultiPage], output_dir: str = "output/html_multipage_final"):
|
||||||
|
"""Save pages as image files with enhanced formatting."""
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
for i, page in enumerate(pages, 1):
|
||||||
|
# Add page header and footer
|
||||||
|
try:
|
||||||
|
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 10)
|
||||||
|
title_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 11)
|
||||||
|
except:
|
||||||
|
font = ImageFont.load_default()
|
||||||
|
title_font = font
|
||||||
|
|
||||||
|
# Add document title in header
|
||||||
|
header_text = "HTML Multi-Page Layout Demo"
|
||||||
|
text_bbox = page.draw.textbbox((0, 0), header_text, font=title_font)
|
||||||
|
text_width = text_bbox[2] - text_bbox[0]
|
||||||
|
text_x = (page.page_size[0] - text_width) // 2
|
||||||
|
text_y = 8
|
||||||
|
|
||||||
|
page.draw.text((text_x, text_y), header_text, fill=(100, 100, 100), font=title_font)
|
||||||
|
|
||||||
|
# Add page number in footer
|
||||||
|
page_text = f"Page {i} of {len(pages)}"
|
||||||
|
text_bbox = page.draw.textbbox((0, 0), page_text, font=font)
|
||||||
|
text_width = text_bbox[2] - text_bbox[0]
|
||||||
|
text_x = (page.page_size[0] - text_width) // 2
|
||||||
|
text_y = page.page_size[1] - 20
|
||||||
|
|
||||||
|
page.draw.text((text_x, text_y), page_text, fill=(120, 120, 120), font=font)
|
||||||
|
|
||||||
|
# Save the page
|
||||||
|
filename = f"page_{i:03d}.png"
|
||||||
|
filepath = os.path.join(output_dir, filename)
|
||||||
|
page.image.save(filepath)
|
||||||
|
print(f"Saved {filepath}")
|
||||||
|
|
||||||
|
print(f"\nRendered {len(pages)} pages to {output_dir}/")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main demo function."""
|
||||||
|
print("HTML Multi-Page Rendering Demo - Final Version")
|
||||||
|
print("=" * 55)
|
||||||
|
|
||||||
|
# Create longer HTML content for multi-page demo
|
||||||
|
print("1. Creating comprehensive HTML content...")
|
||||||
|
html_content = create_longer_html()
|
||||||
|
print(f" Created HTML document ({len(html_content)} characters)")
|
||||||
|
|
||||||
|
# Initialize renderer with smaller pages to force multi-page layout
|
||||||
|
print("\n2. Initializing renderer with smaller pages...")
|
||||||
|
renderer = HTMLMultiPageRenderer(page_size=(400, 500)) # Smaller pages
|
||||||
|
print(" Renderer initialized (400x500 pixel pages)")
|
||||||
|
|
||||||
|
# Parse HTML to paragraphs
|
||||||
|
print("\n3. Parsing HTML to paragraphs...")
|
||||||
|
paragraphs = renderer.parse_html_to_paragraphs(html_content)
|
||||||
|
print(f" Parsed {len(paragraphs)} paragraphs")
|
||||||
|
|
||||||
|
# Show paragraph preview
|
||||||
|
heading_count = sum(1 for p in paragraphs if p.is_heading)
|
||||||
|
regular_count = len(paragraphs) - heading_count
|
||||||
|
print(f" Found {heading_count} headings and {regular_count} regular paragraphs")
|
||||||
|
|
||||||
|
# Render pages
|
||||||
|
print("\n4. Rendering pages...")
|
||||||
|
pages = renderer.render_pages(paragraphs)
|
||||||
|
print(f" Rendered {len(pages)} pages")
|
||||||
|
|
||||||
|
# Show page statistics
|
||||||
|
total_lines = 0
|
||||||
|
for i, page in enumerate(pages, 1):
|
||||||
|
total_lines += page.lines_added
|
||||||
|
print(f" Page {i}: {page.lines_added} lines")
|
||||||
|
|
||||||
|
# Save pages
|
||||||
|
print("\n5. Saving pages...")
|
||||||
|
renderer.save_pages(pages)
|
||||||
|
|
||||||
|
print("\n✓ Multi-page demo completed successfully!")
|
||||||
|
print("\nTo view the results:")
|
||||||
|
print(" - Check the output/html_multipage_final/ directory")
|
||||||
|
print(" - Open the PNG files to see each rendered page")
|
||||||
|
print(" - Notice how content flows naturally across pages")
|
||||||
|
|
||||||
|
# Show final statistics
|
||||||
|
print(f"\nFinal Statistics:")
|
||||||
|
print(f" - Original HTML: {len(html_content)} characters")
|
||||||
|
print(f" - Parsed paragraphs: {len(paragraphs)} ({heading_count} headings, {regular_count} regular)")
|
||||||
|
print(f" - Rendered pages: {len(pages)}")
|
||||||
|
print(f" - Total lines: {total_lines}")
|
||||||
|
print(f" - Average lines per page: {total_lines / len(pages):.1f}")
|
||||||
|
print(f" - Page size: {renderer.page_size[0]}x{renderer.page_size[1]} pixels")
|
||||||
|
|
||||||
|
print(f"\n🎉 This demonstrates the complete HTML → Multi-Page pipeline!")
|
||||||
|
print(f" The system successfully parsed HTML and laid it out across {len(pages)} pages.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
365
examples/html_multipage_simple.py
Normal file
365
examples/html_multipage_simple.py
Normal file
@ -0,0 +1,365 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Simple HTML Multi-Page Rendering Demo
|
||||||
|
|
||||||
|
This example demonstrates a working HTML to multi-page layout system using
|
||||||
|
the proven patterns from the integration tests. It shows:
|
||||||
|
|
||||||
|
1. Parse HTML content using pyWebLayout's HTML extraction system
|
||||||
|
2. Layout the parsed content across multiple pages using the document layouter
|
||||||
|
3. Save each page as an image file
|
||||||
|
|
||||||
|
This is a simplified but functional implementation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Tuple
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
|
# Add pyWebLayout to path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from pyWebLayout.io.readers.html_extraction import parse_html_string
|
||||||
|
from pyWebLayout.layout.document_layouter import paragraph_layouter
|
||||||
|
from pyWebLayout.style.abstract_style import AbstractStyle
|
||||||
|
from pyWebLayout.style.concrete_style import StyleResolver, RenderingContext
|
||||||
|
from pyWebLayout.style import Font
|
||||||
|
from pyWebLayout.abstract.block import Block, Paragraph, Heading
|
||||||
|
from pyWebLayout.abstract.inline import Word
|
||||||
|
from pyWebLayout.concrete.text import Line
|
||||||
|
|
||||||
|
|
||||||
|
class SimplePage:
|
||||||
|
"""A simple page implementation for multi-page layout."""
|
||||||
|
|
||||||
|
def __init__(self, width=600, height=800, max_lines=30):
|
||||||
|
self.border_size = 40
|
||||||
|
self._current_y_offset = self.border_size
|
||||||
|
self.available_width = width - (2 * self.border_size)
|
||||||
|
self.available_height = height - (2 * self.border_size)
|
||||||
|
self.max_lines = max_lines
|
||||||
|
self.lines_added = 0
|
||||||
|
self.children = []
|
||||||
|
self.page_size = (width, height)
|
||||||
|
|
||||||
|
# Create a real drawing context
|
||||||
|
self.image = Image.new('RGB', (width, height), 'white')
|
||||||
|
self.draw = ImageDraw.Draw(self.image)
|
||||||
|
|
||||||
|
# Create a real style resolver
|
||||||
|
context = RenderingContext(base_font_size=16)
|
||||||
|
self.style_resolver = StyleResolver(context)
|
||||||
|
|
||||||
|
# Draw page border
|
||||||
|
border_color = (220, 220, 220)
|
||||||
|
self.draw.rectangle([0, 0, width-1, height-1], outline=border_color, width=2)
|
||||||
|
|
||||||
|
def can_fit_line(self, line_height):
|
||||||
|
"""Check if another line can fit on the page."""
|
||||||
|
remaining_height = self.available_height - (self._current_y_offset - self.border_size)
|
||||||
|
can_fit = remaining_height >= line_height and self.lines_added < self.max_lines
|
||||||
|
return can_fit
|
||||||
|
|
||||||
|
def add_child(self, child):
|
||||||
|
"""Add a child element (like a Line) to the page."""
|
||||||
|
self.children.append(child)
|
||||||
|
self.lines_added += 1
|
||||||
|
|
||||||
|
# Draw the line content on the page
|
||||||
|
if isinstance(child, Line):
|
||||||
|
self._draw_line(child)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _draw_line(self, line):
|
||||||
|
"""Draw a line of text on the page."""
|
||||||
|
try:
|
||||||
|
# Use a default font for drawing
|
||||||
|
try:
|
||||||
|
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 14)
|
||||||
|
except:
|
||||||
|
font = ImageFont.load_default()
|
||||||
|
|
||||||
|
# Get line text (simplified)
|
||||||
|
line_text = getattr(line, '_text_content', 'Line content')
|
||||||
|
|
||||||
|
# Draw the text
|
||||||
|
text_color = (0, 0, 0) # Black
|
||||||
|
x = self.border_size + 10
|
||||||
|
y = self._current_y_offset
|
||||||
|
|
||||||
|
self.draw.text((x, y), line_text, fill=text_color, font=font)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback: draw a simple representation
|
||||||
|
x = self.border_size + 10
|
||||||
|
y = self._current_y_offset
|
||||||
|
self.draw.text((x, y), "Text line", fill=(0, 0, 0))
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleWord(Word):
|
||||||
|
"""A simple word implementation that works with the layouter."""
|
||||||
|
|
||||||
|
def __init__(self, text, style=None):
|
||||||
|
if style is None:
|
||||||
|
style = Font(font_size=14)
|
||||||
|
super().__init__(text, style)
|
||||||
|
|
||||||
|
def possible_hyphenation(self):
|
||||||
|
"""Return possible hyphenation points."""
|
||||||
|
if len(self.text) <= 6:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Simple hyphenation: split roughly in the middle
|
||||||
|
mid = len(self.text) // 2
|
||||||
|
return [(self.text[:mid] + "-", self.text[mid:])]
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleParagraph:
|
||||||
|
"""A simple paragraph implementation that works with the layouter."""
|
||||||
|
|
||||||
|
def __init__(self, text_content, style=None):
|
||||||
|
if style is None:
|
||||||
|
style = AbstractStyle(
|
||||||
|
word_spacing=4.0,
|
||||||
|
word_spacing_min=2.0,
|
||||||
|
word_spacing_max=8.0
|
||||||
|
)
|
||||||
|
|
||||||
|
self.style = style
|
||||||
|
self.line_height = 20
|
||||||
|
|
||||||
|
# Create words from text content
|
||||||
|
self.words = []
|
||||||
|
for word_text in text_content.split():
|
||||||
|
if word_text.strip():
|
||||||
|
word = SimpleWord(word_text.strip())
|
||||||
|
self.words.append(word)
|
||||||
|
|
||||||
|
|
||||||
|
def create_sample_html() -> str:
|
||||||
|
"""Create a sample HTML document for testing."""
|
||||||
|
return """
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1>Chapter 1: Introduction</h1>
|
||||||
|
|
||||||
|
<p>This is the first paragraph of our sample document. It demonstrates how HTML content
|
||||||
|
can be parsed and then laid out across multiple pages using the pyWebLayout system.</p>
|
||||||
|
|
||||||
|
<p>Here's another paragraph with some more text to show how the system handles
|
||||||
|
multiple paragraphs and automatic page breaking when content exceeds page boundaries.</p>
|
||||||
|
|
||||||
|
<h2>Section 1.1: Features</h2>
|
||||||
|
|
||||||
|
<p>The multi-page layout system includes several key features that make it suitable
|
||||||
|
for ereader applications and document processing systems.</p>
|
||||||
|
|
||||||
|
<p>Each paragraph is processed individually and can span multiple lines or even
|
||||||
|
multiple pages if the content is long enough to require it.</p>
|
||||||
|
|
||||||
|
<h1>Chapter 2: Implementation</h1>
|
||||||
|
|
||||||
|
<p>The implementation uses a sophisticated layout engine that processes abstract
|
||||||
|
document elements and renders them onto concrete pages.</p>
|
||||||
|
|
||||||
|
<p>This separation allows for flexible styling and layout while maintaining
|
||||||
|
the semantic structure of the original content.</p>
|
||||||
|
|
||||||
|
<p>The system can handle various HTML elements including headings, paragraphs,
|
||||||
|
lists, and other block-level elements commonly found in documents.</p>
|
||||||
|
|
||||||
|
<p>Position tracking is maintained throughout the layout process, enabling
|
||||||
|
features like bookmarking and navigation between different views of the content.</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLMultiPageRenderer:
|
||||||
|
"""Simple HTML to multi-page renderer."""
|
||||||
|
|
||||||
|
def __init__(self, page_size: Tuple[int, int] = (600, 800)):
|
||||||
|
self.page_size = page_size
|
||||||
|
|
||||||
|
def parse_html_to_paragraphs(self, html_content: str) -> List[SimpleParagraph]:
|
||||||
|
"""Parse HTML content into simple paragraphs."""
|
||||||
|
# Parse HTML using the extraction system
|
||||||
|
base_font = Font(font_size=14)
|
||||||
|
blocks = parse_html_string(html_content, base_font=base_font)
|
||||||
|
|
||||||
|
paragraphs = []
|
||||||
|
|
||||||
|
for block in blocks:
|
||||||
|
if isinstance(block, (Paragraph, Heading)):
|
||||||
|
# Extract text from the block
|
||||||
|
text_parts = []
|
||||||
|
|
||||||
|
# Get words from the block - handle tuple format
|
||||||
|
if hasattr(block, 'words') and callable(block.words):
|
||||||
|
for word_item in block.words():
|
||||||
|
# Handle both Word objects and tuples
|
||||||
|
if hasattr(word_item, 'text'):
|
||||||
|
text_parts.append(word_item.text)
|
||||||
|
elif isinstance(word_item, tuple) and len(word_item) >= 2:
|
||||||
|
# Tuple format: (position, word_object)
|
||||||
|
word_obj = word_item[1]
|
||||||
|
if hasattr(word_obj, 'text'):
|
||||||
|
text_parts.append(word_obj.text)
|
||||||
|
elif isinstance(word_item, str):
|
||||||
|
text_parts.append(word_item)
|
||||||
|
|
||||||
|
# Fallback: try _words attribute directly
|
||||||
|
if not text_parts and hasattr(block, '_words'):
|
||||||
|
for word_item in block._words:
|
||||||
|
if hasattr(word_item, 'text'):
|
||||||
|
text_parts.append(word_item.text)
|
||||||
|
elif isinstance(word_item, str):
|
||||||
|
text_parts.append(word_item)
|
||||||
|
|
||||||
|
if text_parts:
|
||||||
|
text_content = " ".join(text_parts)
|
||||||
|
|
||||||
|
# Create appropriate style based on block type
|
||||||
|
if isinstance(block, Heading):
|
||||||
|
style = AbstractStyle(
|
||||||
|
word_spacing=5.0,
|
||||||
|
word_spacing_min=3.0,
|
||||||
|
word_spacing_max=10.0
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
style = AbstractStyle(
|
||||||
|
word_spacing=4.0,
|
||||||
|
word_spacing_min=2.0,
|
||||||
|
word_spacing_max=8.0
|
||||||
|
)
|
||||||
|
|
||||||
|
paragraph = SimpleParagraph(text_content, style)
|
||||||
|
paragraphs.append(paragraph)
|
||||||
|
|
||||||
|
return paragraphs
|
||||||
|
|
||||||
|
def render_pages(self, paragraphs: List[SimpleParagraph]) -> List[SimplePage]:
|
||||||
|
"""Render paragraphs into multiple pages."""
|
||||||
|
if not paragraphs:
|
||||||
|
return []
|
||||||
|
|
||||||
|
pages = []
|
||||||
|
current_page = SimplePage(*self.page_size)
|
||||||
|
pages.append(current_page)
|
||||||
|
|
||||||
|
for paragraph in paragraphs:
|
||||||
|
start_word = 0
|
||||||
|
|
||||||
|
while start_word < len(paragraph.words):
|
||||||
|
# Try to layout the paragraph (or remaining part) on current page
|
||||||
|
success, failed_word_index, remaining_pretext = paragraph_layouter(
|
||||||
|
paragraph, current_page, start_word
|
||||||
|
)
|
||||||
|
|
||||||
|
if success:
|
||||||
|
# Paragraph completed on this page
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Page is full, create a new page
|
||||||
|
current_page = SimplePage(*self.page_size)
|
||||||
|
pages.append(current_page)
|
||||||
|
|
||||||
|
# Continue with the failed word on the new page
|
||||||
|
if failed_word_index is not None:
|
||||||
|
start_word = failed_word_index
|
||||||
|
else:
|
||||||
|
# If no specific word failed, move to next paragraph
|
||||||
|
break
|
||||||
|
|
||||||
|
return pages
|
||||||
|
|
||||||
|
def save_pages(self, pages: List[SimplePage], output_dir: str = "output/html_simple"):
|
||||||
|
"""Save pages as image files."""
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
for i, page in enumerate(pages, 1):
|
||||||
|
# Add page number
|
||||||
|
try:
|
||||||
|
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 12)
|
||||||
|
except:
|
||||||
|
font = ImageFont.load_default()
|
||||||
|
|
||||||
|
page_text = f"Page {i}"
|
||||||
|
text_bbox = page.draw.textbbox((0, 0), page_text, font=font)
|
||||||
|
text_width = text_bbox[2] - text_bbox[0]
|
||||||
|
text_x = (page.page_size[0] - text_width) // 2
|
||||||
|
text_y = page.page_size[1] - 25
|
||||||
|
|
||||||
|
page.draw.text((text_x, text_y), page_text, fill=(100, 100, 100), font=font)
|
||||||
|
|
||||||
|
# Save the page
|
||||||
|
filename = f"page_{i:03d}.png"
|
||||||
|
filepath = os.path.join(output_dir, filename)
|
||||||
|
page.image.save(filepath)
|
||||||
|
print(f"Saved {filepath}")
|
||||||
|
|
||||||
|
print(f"\nRendered {len(pages)} pages to {output_dir}/")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main demo function."""
|
||||||
|
print("Simple HTML Multi-Page Rendering Demo")
|
||||||
|
print("=" * 45)
|
||||||
|
|
||||||
|
# Create sample HTML content
|
||||||
|
print("1. Creating sample HTML content...")
|
||||||
|
html_content = create_sample_html()
|
||||||
|
print(f" Created HTML document ({len(html_content)} characters)")
|
||||||
|
|
||||||
|
# Initialize renderer
|
||||||
|
print("\n2. Initializing renderer...")
|
||||||
|
renderer = HTMLMultiPageRenderer(page_size=(600, 800))
|
||||||
|
print(" Renderer initialized")
|
||||||
|
|
||||||
|
# Parse HTML to paragraphs
|
||||||
|
print("\n3. Parsing HTML to paragraphs...")
|
||||||
|
paragraphs = renderer.parse_html_to_paragraphs(html_content)
|
||||||
|
print(f" Parsed {len(paragraphs)} paragraphs")
|
||||||
|
|
||||||
|
# Show paragraph preview
|
||||||
|
for i, para in enumerate(paragraphs[:3]): # Show first 3
|
||||||
|
preview = " ".join(word.text for word in para.words[:8]) # First 8 words
|
||||||
|
if len(para.words) > 8:
|
||||||
|
preview += "..."
|
||||||
|
print(f" Paragraph {i+1}: {preview}")
|
||||||
|
|
||||||
|
if len(paragraphs) > 3:
|
||||||
|
print(f" ... and {len(paragraphs) - 3} more paragraphs")
|
||||||
|
|
||||||
|
# Render pages
|
||||||
|
print("\n4. Rendering pages...")
|
||||||
|
pages = renderer.render_pages(paragraphs)
|
||||||
|
print(f" Rendered {len(pages)} pages")
|
||||||
|
|
||||||
|
# Show page statistics
|
||||||
|
for i, page in enumerate(pages, 1):
|
||||||
|
print(f" Page {i}: {page.lines_added} lines")
|
||||||
|
|
||||||
|
# Save pages
|
||||||
|
print("\n5. Saving pages...")
|
||||||
|
renderer.save_pages(pages)
|
||||||
|
|
||||||
|
print("\n✓ Demo completed successfully!")
|
||||||
|
print("\nTo view the results:")
|
||||||
|
print(" - Check the output/html_simple/ directory")
|
||||||
|
print(" - Open the PNG files to see each rendered page")
|
||||||
|
|
||||||
|
# Show statistics
|
||||||
|
print(f"\nStatistics:")
|
||||||
|
print(f" - Original HTML: {len(html_content)} characters")
|
||||||
|
print(f" - Parsed paragraphs: {len(paragraphs)}")
|
||||||
|
print(f" - Rendered pages: {len(pages)}")
|
||||||
|
print(f" - Total lines: {sum(page.lines_added for page in pages)}")
|
||||||
|
print(f" - Page size: {renderer.page_size[0]}x{renderer.page_size[1]} pixels")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
386
examples/recursive_position_demo.py
Normal file
386
examples/recursive_position_demo.py
Normal file
@ -0,0 +1,386 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Demonstration of the Recursive Position System
|
||||||
|
|
||||||
|
This example shows how to use the hierarchical position tracking system
|
||||||
|
that can reference any type of content (words, images, table cells, etc.)
|
||||||
|
in a nested document structure.
|
||||||
|
|
||||||
|
Key Features Demonstrated:
|
||||||
|
- Hierarchical position tracking
|
||||||
|
- Dynamic content type support
|
||||||
|
- JSON and shelf serialization
|
||||||
|
- Position relationships (ancestor/descendant)
|
||||||
|
- Bookmark management
|
||||||
|
- Real-world ereader scenarios
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||||
|
|
||||||
|
from pyWebLayout.layout.recursive_position import (
|
||||||
|
ContentType, LocationNode, RecursivePosition, PositionBuilder, PositionStorage,
|
||||||
|
create_word_position, create_image_position, create_table_cell_position, create_list_item_position
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def demonstrate_basic_position_creation():
|
||||||
|
"""Show basic position creation and manipulation"""
|
||||||
|
print("=== Basic Position Creation ===")
|
||||||
|
|
||||||
|
# Create a position using the builder pattern
|
||||||
|
position = (PositionBuilder()
|
||||||
|
.chapter(2)
|
||||||
|
.block(5)
|
||||||
|
.paragraph()
|
||||||
|
.word(12, offset=3)
|
||||||
|
.with_rendering_metadata(font_scale=1.5, page_size=[800, 600])
|
||||||
|
.build())
|
||||||
|
|
||||||
|
print(f"Position path: {position}")
|
||||||
|
print(f"Depth: {position.get_depth()}")
|
||||||
|
print(f"Leaf node: {position.get_leaf_node()}")
|
||||||
|
|
||||||
|
# Query specific nodes
|
||||||
|
chapter_node = position.get_node(ContentType.CHAPTER)
|
||||||
|
word_node = position.get_node(ContentType.WORD)
|
||||||
|
|
||||||
|
print(f"Chapter: {chapter_node.index}")
|
||||||
|
print(f"Word: {word_node.index}, offset: {word_node.offset}")
|
||||||
|
print(f"Font scale: {position.rendering_metadata.get('font_scale')}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def demonstrate_different_content_types():
|
||||||
|
"""Show positions for different content types"""
|
||||||
|
print("=== Different Content Types ===")
|
||||||
|
|
||||||
|
# Word position
|
||||||
|
word_pos = create_word_position(1, 3, 15, 2)
|
||||||
|
print(f"Word position: {word_pos}")
|
||||||
|
|
||||||
|
# Image position
|
||||||
|
image_pos = create_image_position(2, 1, 0)
|
||||||
|
print(f"Image position: {image_pos}")
|
||||||
|
|
||||||
|
# Table cell position
|
||||||
|
table_pos = create_table_cell_position(0, 4, 2, 1, 5)
|
||||||
|
print(f"Table cell position: {table_pos}")
|
||||||
|
|
||||||
|
# List item position
|
||||||
|
list_pos = create_list_item_position(1, 2, 3, 0)
|
||||||
|
print(f"List item position: {list_pos}")
|
||||||
|
|
||||||
|
# Complex nested structure
|
||||||
|
complex_pos = (PositionBuilder()
|
||||||
|
.chapter(3)
|
||||||
|
.block(7)
|
||||||
|
.table(0, table_type="data", columns=4)
|
||||||
|
.table_row(2, row_type="header")
|
||||||
|
.table_cell(1, cell_type="data", colspan=2)
|
||||||
|
.link(0, url="https://example.com", text="Click here")
|
||||||
|
.build())
|
||||||
|
print(f"Complex nested position: {complex_pos}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def demonstrate_position_relationships():
|
||||||
|
"""Show ancestor/descendant relationships"""
|
||||||
|
print("=== Position Relationships ===")
|
||||||
|
|
||||||
|
# Create related positions
|
||||||
|
chapter_pos = (PositionBuilder()
|
||||||
|
.chapter(1)
|
||||||
|
.block(2)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
paragraph_pos = (PositionBuilder()
|
||||||
|
.chapter(1)
|
||||||
|
.block(2)
|
||||||
|
.paragraph()
|
||||||
|
.build())
|
||||||
|
|
||||||
|
word_pos = (PositionBuilder()
|
||||||
|
.chapter(1)
|
||||||
|
.block(2)
|
||||||
|
.paragraph()
|
||||||
|
.word(5)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Test relationships
|
||||||
|
print(f"Chapter position: {chapter_pos}")
|
||||||
|
print(f"Paragraph position: {paragraph_pos}")
|
||||||
|
print(f"Word position: {word_pos}")
|
||||||
|
|
||||||
|
print(f"Chapter is ancestor of paragraph: {chapter_pos.is_ancestor_of(paragraph_pos)}")
|
||||||
|
print(f"Chapter is ancestor of word: {chapter_pos.is_ancestor_of(word_pos)}")
|
||||||
|
print(f"Word is descendant of chapter: {word_pos.is_descendant_of(chapter_pos)}")
|
||||||
|
|
||||||
|
# Find common ancestors
|
||||||
|
unrelated_pos = create_word_position(2, 1, 0) # Different chapter
|
||||||
|
common = word_pos.get_common_ancestor(unrelated_pos)
|
||||||
|
print(f"Common ancestor of word and unrelated: {common}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def demonstrate_serialization():
|
||||||
|
"""Show JSON and shelf serialization"""
|
||||||
|
print("=== Serialization ===")
|
||||||
|
|
||||||
|
# Create a complex position
|
||||||
|
position = (PositionBuilder()
|
||||||
|
.chapter(4)
|
||||||
|
.block(8)
|
||||||
|
.table(0, table_type="financial", columns=5, rows=20)
|
||||||
|
.table_row(3, row_type="data", category="Q2")
|
||||||
|
.table_cell(2, cell_type="currency", format="USD")
|
||||||
|
.word(0, text="$1,234.56")
|
||||||
|
.with_rendering_metadata(
|
||||||
|
font_scale=1.2,
|
||||||
|
page_size=[600, 800],
|
||||||
|
theme="light",
|
||||||
|
currency_format="USD"
|
||||||
|
)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# JSON serialization
|
||||||
|
json_str = position.to_json()
|
||||||
|
print("JSON serialization:")
|
||||||
|
print(json_str[:200] + "..." if len(json_str) > 200 else json_str)
|
||||||
|
|
||||||
|
# Deserialize and verify
|
||||||
|
restored = RecursivePosition.from_json(json_str)
|
||||||
|
print(f"Restored position equals original: {position == restored}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def demonstrate_storage_systems():
|
||||||
|
"""Show both JSON and shelf storage"""
|
||||||
|
print("=== Storage Systems ===")
|
||||||
|
|
||||||
|
# Create test positions
|
||||||
|
positions = {
|
||||||
|
"bookmark1": create_word_position(1, 5, 20, 3),
|
||||||
|
"bookmark2": create_image_position(2, 3, 1),
|
||||||
|
"bookmark3": create_table_cell_position(3, 1, 2, 1, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Test JSON storage
|
||||||
|
print("JSON Storage:")
|
||||||
|
json_storage = PositionStorage("demo_positions_json", use_shelf=False)
|
||||||
|
|
||||||
|
for name, pos in positions.items():
|
||||||
|
json_storage.save_position("demo_doc", name, pos)
|
||||||
|
print(f" Saved {name}: {pos}")
|
||||||
|
|
||||||
|
# List and load positions
|
||||||
|
saved_positions = json_storage.list_positions("demo_doc")
|
||||||
|
print(f" Saved positions: {saved_positions}")
|
||||||
|
|
||||||
|
loaded = json_storage.load_position("demo_doc", "bookmark1")
|
||||||
|
print(f" Loaded bookmark1: {loaded}")
|
||||||
|
print(f" Matches original: {loaded == positions['bookmark1']}")
|
||||||
|
|
||||||
|
# Test shelf storage
|
||||||
|
print("\nShelf Storage:")
|
||||||
|
shelf_storage = PositionStorage("demo_positions_shelf", use_shelf=True)
|
||||||
|
|
||||||
|
for name, pos in positions.items():
|
||||||
|
shelf_storage.save_position("demo_doc", name, pos)
|
||||||
|
|
||||||
|
shelf_positions = shelf_storage.list_positions("demo_doc")
|
||||||
|
print(f" Shelf positions: {shelf_positions}")
|
||||||
|
|
||||||
|
# Clean up demo files
|
||||||
|
import shutil
|
||||||
|
try:
|
||||||
|
shutil.rmtree("demo_positions_json")
|
||||||
|
shutil.rmtree("demo_positions_shelf")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def demonstrate_ereader_scenario():
|
||||||
|
"""Show realistic ereader bookmark scenario"""
|
||||||
|
print("=== Ereader Bookmark Scenario ===")
|
||||||
|
|
||||||
|
# Simulate user reading progress
|
||||||
|
reading_positions = [
|
||||||
|
# User starts reading chapter 1
|
||||||
|
(PositionBuilder()
|
||||||
|
.chapter(1)
|
||||||
|
.block(0)
|
||||||
|
.paragraph()
|
||||||
|
.word(0)
|
||||||
|
.with_rendering_metadata(font_scale=1.0, page_size=[600, 800], theme="light")
|
||||||
|
.build(), "Chapter 1 Start"),
|
||||||
|
|
||||||
|
# User bookmarks an interesting quote in chapter 2
|
||||||
|
(PositionBuilder()
|
||||||
|
.chapter(2)
|
||||||
|
.block(15)
|
||||||
|
.paragraph()
|
||||||
|
.word(8, offset=0)
|
||||||
|
.with_rendering_metadata(font_scale=1.2, page_size=[600, 800], theme="sepia")
|
||||||
|
.build(), "Interesting Quote"),
|
||||||
|
|
||||||
|
# User bookmarks a table in chapter 3
|
||||||
|
(PositionBuilder()
|
||||||
|
.chapter(3)
|
||||||
|
.block(22)
|
||||||
|
.table(0, table_type="data", title="Sales Figures")
|
||||||
|
.table_row(1, row_type="header")
|
||||||
|
.table_cell(0, cell_type="header", text="Quarter")
|
||||||
|
.with_rendering_metadata(font_scale=1.1, page_size=[600, 800], theme="dark")
|
||||||
|
.build(), "Sales Table"),
|
||||||
|
|
||||||
|
# User bookmarks an image caption
|
||||||
|
(PositionBuilder()
|
||||||
|
.chapter(4)
|
||||||
|
.block(8)
|
||||||
|
.image(0, alt_text="Company Logo", caption="Figure 4.1: Corporate Identity")
|
||||||
|
.with_rendering_metadata(font_scale=1.0, page_size=[600, 800], theme="light")
|
||||||
|
.build(), "Logo Image"),
|
||||||
|
|
||||||
|
# User's current reading position (with character-level precision)
|
||||||
|
(PositionBuilder()
|
||||||
|
.chapter(5)
|
||||||
|
.block(12)
|
||||||
|
.paragraph()
|
||||||
|
.word(23, offset=7) # 7 characters into word 23
|
||||||
|
.with_rendering_metadata(font_scale=1.3, page_size=[600, 800], theme="dark")
|
||||||
|
.build(), "Current Position")
|
||||||
|
]
|
||||||
|
|
||||||
|
# Save all bookmarks
|
||||||
|
storage = PositionStorage("ereader_bookmarks", use_shelf=False)
|
||||||
|
|
||||||
|
for position, description in reading_positions:
|
||||||
|
bookmark_name = description.lower().replace(" ", "_")
|
||||||
|
storage.save_position("my_novel", bookmark_name, position)
|
||||||
|
print(f"Saved bookmark '{description}': {position}")
|
||||||
|
|
||||||
|
print(f"\nTotal bookmarks: {len(storage.list_positions('my_novel'))}")
|
||||||
|
|
||||||
|
# Demonstrate bookmark navigation
|
||||||
|
print("\n--- Bookmark Navigation ---")
|
||||||
|
current_pos = reading_positions[-1][0] # Current reading position
|
||||||
|
|
||||||
|
for position, description in reading_positions[:-1]: # All except current
|
||||||
|
# Calculate relationship to current position
|
||||||
|
if position.is_ancestor_of(current_pos):
|
||||||
|
relationship = "ancestor of current"
|
||||||
|
elif current_pos.is_ancestor_of(position):
|
||||||
|
relationship = "descendant of current"
|
||||||
|
else:
|
||||||
|
common = position.get_common_ancestor(current_pos)
|
||||||
|
if len(common.path) > 1:
|
||||||
|
relationship = f"shares {common.get_leaf_node().content_type.value} with current"
|
||||||
|
else:
|
||||||
|
relationship = "unrelated to current"
|
||||||
|
|
||||||
|
print(f"'{description}' is {relationship}")
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
try:
|
||||||
|
shutil.rmtree("ereader_bookmarks")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def demonstrate_advanced_navigation():
|
||||||
|
"""Show advanced navigation scenarios"""
|
||||||
|
print("=== Advanced Navigation Scenarios ===")
|
||||||
|
|
||||||
|
# Multi-level list navigation
|
||||||
|
print("Multi-level List Navigation:")
|
||||||
|
nested_list_pos = (PositionBuilder()
|
||||||
|
.chapter(2)
|
||||||
|
.block(5)
|
||||||
|
.list(0, list_type="ordered", title="Main Topics")
|
||||||
|
.list_item(2, text="Data Structures")
|
||||||
|
.list(1, list_type="unordered", title="Subtopics")
|
||||||
|
.list_item(1, text="Hash Tables")
|
||||||
|
.word(3, text="implementation")
|
||||||
|
.build())
|
||||||
|
|
||||||
|
print(f" Nested list position: {nested_list_pos}")
|
||||||
|
|
||||||
|
# Navigate to parent list item
|
||||||
|
parent_item_pos = nested_list_pos.copy().truncate_to_type(ContentType.LIST_ITEM)
|
||||||
|
print(f" Parent list item: {parent_item_pos}")
|
||||||
|
|
||||||
|
# Navigate to main list
|
||||||
|
main_list_pos = nested_list_pos.copy().truncate_to_type(ContentType.LIST)
|
||||||
|
print(f" Main list: {main_list_pos}")
|
||||||
|
|
||||||
|
# Table navigation
|
||||||
|
print("\nTable Navigation:")
|
||||||
|
table_pos = (PositionBuilder()
|
||||||
|
.chapter(3)
|
||||||
|
.block(10)
|
||||||
|
.table(0, table_type="comparison", rows=5, columns=3)
|
||||||
|
.table_row(2, row_type="data")
|
||||||
|
.table_cell(1, cell_type="data", header="Price")
|
||||||
|
.word(0, text="$99.99")
|
||||||
|
.build())
|
||||||
|
|
||||||
|
print(f" Table cell position: {table_pos}")
|
||||||
|
|
||||||
|
# Navigate to different cells in same row
|
||||||
|
next_cell_pos = table_pos.copy()
|
||||||
|
cell_node = next_cell_pos.get_node(ContentType.TABLE_CELL)
|
||||||
|
cell_node.index = 2 # Move to next column
|
||||||
|
cell_node.metadata["header"] = "Quantity"
|
||||||
|
word_node = next_cell_pos.get_node(ContentType.WORD)
|
||||||
|
word_node.text = "5"
|
||||||
|
|
||||||
|
print(f" Next cell position: {next_cell_pos}")
|
||||||
|
|
||||||
|
# Verify they share the same row
|
||||||
|
common = table_pos.get_common_ancestor(next_cell_pos)
|
||||||
|
row_node = common.get_node(ContentType.TABLE_ROW)
|
||||||
|
print(f" Shared row index: {row_node.index if row_node else 'None'}")
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run all demonstrations"""
|
||||||
|
print("Recursive Position System Demonstration")
|
||||||
|
print("=" * 50)
|
||||||
|
print()
|
||||||
|
|
||||||
|
demonstrate_basic_position_creation()
|
||||||
|
demonstrate_different_content_types()
|
||||||
|
demonstrate_position_relationships()
|
||||||
|
demonstrate_serialization()
|
||||||
|
demonstrate_storage_systems()
|
||||||
|
demonstrate_ereader_scenario()
|
||||||
|
demonstrate_advanced_navigation()
|
||||||
|
|
||||||
|
print("=== Summary ===")
|
||||||
|
print("The Recursive Position System provides:")
|
||||||
|
print("✓ Hierarchical position tracking for any content type")
|
||||||
|
print("✓ Dynamic content type support (words, images, tables, lists, etc.)")
|
||||||
|
print("✓ Flexible serialization (JSON and Python shelf)")
|
||||||
|
print("✓ Position relationships (ancestor/descendant queries)")
|
||||||
|
print("✓ Fluent builder pattern for easy position creation")
|
||||||
|
print("✓ Metadata support for rendering context")
|
||||||
|
print("✓ Real-world ereader bookmark management")
|
||||||
|
print("✓ Advanced navigation capabilities")
|
||||||
|
print()
|
||||||
|
print("This system is ideal for:")
|
||||||
|
print("• Ereader applications with precise bookmarking")
|
||||||
|
print("• Document editors with complex navigation")
|
||||||
|
print("• Content management systems")
|
||||||
|
print("• Any application requiring hierarchical position tracking")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
42
positions/my_novel_chapter3_climax.json
Normal file
42
positions/my_novel_chapter3_climax.json
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
{
|
||||||
|
"path": [
|
||||||
|
{
|
||||||
|
"content_type": "document",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "chapter",
|
||||||
|
"index": 3,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "block",
|
||||||
|
"index": 8,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"index": 0,
|
||||||
|
"offset": 0,
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content_type": "word",
|
||||||
|
"index": 15,
|
||||||
|
"offset": 5,
|
||||||
|
"metadata": {}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rendering_metadata": {
|
||||||
|
"font_scale": 1.2,
|
||||||
|
"page_size": [
|
||||||
|
600,
|
||||||
|
800
|
||||||
|
],
|
||||||
|
"theme": "dark"
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -14,7 +14,7 @@ __version__ = '0.1.0'
|
|||||||
from pyWebLayout.core import Renderable, Interactable, Layoutable, Queriable
|
from pyWebLayout.core import Renderable, Interactable, Layoutable, Queriable
|
||||||
|
|
||||||
# Style components
|
# Style components
|
||||||
from pyWebLayout.style import Alignment, Font, FontWeight, FontStyle, TextDecoration
|
from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration
|
||||||
|
|
||||||
|
|
||||||
# Abstract document model
|
# Abstract document model
|
||||||
|
|||||||
@ -166,7 +166,12 @@ class Paragraph(Block):
|
|||||||
"""
|
"""
|
||||||
return FormattedSpan.create_and_add_to(self, style, background)
|
return FormattedSpan.create_and_add_to(self, style, background)
|
||||||
|
|
||||||
def words(self) -> Iterator[Tuple[int, Word]]:
|
@property
|
||||||
|
def words(self) -> List[Word]:
|
||||||
|
"""Get the list of words in this paragraph"""
|
||||||
|
return self._words
|
||||||
|
|
||||||
|
def words_iter(self) -> Iterator[Tuple[int, Word]]:
|
||||||
"""
|
"""
|
||||||
Iterate over the words in this paragraph.
|
Iterate over the words in this paragraph.
|
||||||
|
|
||||||
|
|||||||
@ -4,7 +4,7 @@ from PIL import Image
|
|||||||
from typing import Tuple, Union, List, Optional, Dict
|
from typing import Tuple, Union, List, Optional, Dict
|
||||||
|
|
||||||
from pyWebLayout.core.base import Renderable, Queriable
|
from pyWebLayout.core.base import Renderable, Queriable
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
|
|
||||||
class Box(Renderable, Queriable):
|
class Box(Renderable, Queriable):
|
||||||
|
|
||||||
@ -21,7 +21,16 @@ class Box(Renderable, Queriable):
|
|||||||
self._halign = halign
|
self._halign = halign
|
||||||
self._valign = valign
|
self._valign = valign
|
||||||
|
|
||||||
|
@property
|
||||||
|
def origin(self) -> np.ndarray:
|
||||||
|
"""Get the origin (top-left corner) of the box"""
|
||||||
|
return self._origin
|
||||||
|
|
||||||
|
@property
|
||||||
|
def size(self) -> np.ndarray:
|
||||||
|
"""Get the size (width, height) of the box"""
|
||||||
|
return self._size
|
||||||
|
|
||||||
def in_shape(self, point):
|
def in_shape(self, point):
|
||||||
|
|
||||||
return np.all((point >= self._origin) & (point < self._end), axis=-1)
|
return np.all((point >= self._origin) & (point < self._end), axis=-1)
|
||||||
|
|
||||||
|
|||||||
@ -5,7 +5,7 @@ from PIL import Image as PILImage, ImageDraw, ImageFont
|
|||||||
from pyWebLayout.core.base import Renderable, Queriable
|
from pyWebLayout.core.base import Renderable, Queriable
|
||||||
from pyWebLayout.abstract.block import Image as AbstractImage
|
from pyWebLayout.abstract.block import Image as AbstractImage
|
||||||
from .box import Box
|
from .box import Box
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
|
|
||||||
|
|
||||||
class RenderableImage(Renderable, Queriable):
|
class RenderableImage(Renderable, Queriable):
|
||||||
|
|||||||
@ -4,21 +4,20 @@ from PIL import Image, ImageDraw
|
|||||||
|
|
||||||
from pyWebLayout.core.base import Renderable, Layoutable, Queriable
|
from pyWebLayout.core.base import Renderable, Layoutable, Queriable
|
||||||
from pyWebLayout.style.page_style import PageStyle
|
from pyWebLayout.style.page_style import PageStyle
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
from .box import Box
|
from .box import Box
|
||||||
|
|
||||||
|
|
||||||
class Page(Renderable, Queriable):
|
class Page(Renderable, Queriable):
|
||||||
"""
|
"""
|
||||||
A page represents a canvas that can hold and render child renderable objects.
|
A page represents a canvas that can hold and render child renderable objects.
|
||||||
It handles layout, rendering, and provides query capabilities to find which child
|
It handles layout, rendering, and provides query capabilities to find which child
|
||||||
contains a given point.
|
contains a given point.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, size: Tuple[int, int], style: Optional[PageStyle] = None):
|
def __init__(self, size: Tuple[int, int], style: Optional[PageStyle] = None):
|
||||||
"""
|
"""
|
||||||
Initialize a new page.
|
Initialize a new page.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
size: The total size of the page (width, height) including borders
|
size: The total size of the page (width, height) including borders
|
||||||
style: The PageStyle defining borders, spacing, and appearance
|
style: The PageStyle defining borders, spacing, and appearance
|
||||||
@ -29,16 +28,21 @@ class Page(Renderable, Queriable):
|
|||||||
self._canvas: Optional[Image.Image] = None
|
self._canvas: Optional[Image.Image] = None
|
||||||
self._draw: Optional[ImageDraw.Draw] = None
|
self._draw: Optional[ImageDraw.Draw] = None
|
||||||
self._current_y_offset = 0 # Track vertical position for layout
|
self._current_y_offset = 0 # Track vertical position for layout
|
||||||
|
|
||||||
def free_space(self) -> Tuple[int, int]:
|
def free_space(self) -> Tuple[int, int]:
|
||||||
"""Get the remaining space on the page"""
|
"""Get the remaining space on the page"""
|
||||||
return (self._size[0], self._size[1] - self._current_y_offset)
|
return (self._size[0], self._size[1] - self._current_y_offset)
|
||||||
|
|
||||||
|
def can_fit_line(self, line_height: int) -> bool:
|
||||||
|
"""Check if a line of the given height can fit on the page."""
|
||||||
|
remaining_height = self.content_size[1] - (self._current_y_offset - self._style.border_width - self._style.padding_top)
|
||||||
|
return remaining_height >= line_height
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def size(self) -> Tuple[int, int]:
|
def size(self) -> Tuple[int, int]:
|
||||||
"""Get the total page size including borders"""
|
"""Get the total page size including borders"""
|
||||||
return self._size
|
return self._size
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def canvas_size(self) -> Tuple[int, int]:
|
def canvas_size(self) -> Tuple[int, int]:
|
||||||
"""Get the canvas size (page size minus borders)"""
|
"""Get the canvas size (page size minus borders)"""
|
||||||
@ -47,7 +51,7 @@ class Page(Renderable, Queriable):
|
|||||||
self._size[0] - border_reduction,
|
self._size[0] - border_reduction,
|
||||||
self._size[1] - border_reduction
|
self._size[1] - border_reduction
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def content_size(self) -> Tuple[int, int]:
|
def content_size(self) -> Tuple[int, int]:
|
||||||
"""Get the content area size (canvas minus padding)"""
|
"""Get the content area size (canvas minus padding)"""
|
||||||
@ -56,29 +60,33 @@ class Page(Renderable, Queriable):
|
|||||||
canvas_w - self._style.total_horizontal_padding,
|
canvas_w - self._style.total_horizontal_padding,
|
||||||
canvas_h - self._style.total_vertical_padding
|
canvas_h - self._style.total_vertical_padding
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def border_size(self) -> int:
|
def border_size(self) -> int:
|
||||||
"""Get the border width"""
|
"""Get the border width"""
|
||||||
return self._style.border_width
|
return self._style.border_width
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def style(self) -> PageStyle:
|
def style(self) -> PageStyle:
|
||||||
"""Get the page style"""
|
"""Get the page style"""
|
||||||
return self._style
|
return self._style
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def draw(self) -> Optional[ImageDraw.Draw]:
|
def draw(self) -> Optional[ImageDraw.Draw]:
|
||||||
"""Get the ImageDraw object for drawing on this page's canvas"""
|
"""Get the ImageDraw object for drawing on this page's canvas"""
|
||||||
|
if self._draw is None:
|
||||||
|
# Initialize canvas and draw context if not already done
|
||||||
|
self._canvas = self._create_canvas()
|
||||||
|
self._draw = ImageDraw.Draw(self._canvas)
|
||||||
return self._draw
|
return self._draw
|
||||||
|
|
||||||
def add_child(self, child: Renderable) -> 'Page':
|
def add_child(self, child: Renderable) -> 'Page':
|
||||||
"""
|
"""
|
||||||
Add a child renderable object to this page.
|
Add a child renderable object to this page.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
child: The renderable object to add
|
child: The renderable object to add
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Self for method chaining
|
Self for method chaining
|
||||||
"""
|
"""
|
||||||
@ -87,14 +95,14 @@ class Page(Renderable, Queriable):
|
|||||||
# Invalidate the canvas when children change
|
# Invalidate the canvas when children change
|
||||||
self._canvas = None
|
self._canvas = None
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def remove_child(self, child: Renderable) -> bool:
|
def remove_child(self, child: Renderable) -> bool:
|
||||||
"""
|
"""
|
||||||
Remove a child from the page.
|
Remove a child from the page.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
child: The child to remove
|
child: The child to remove
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
True if the child was found and removed, False otherwise
|
True if the child was found and removed, False otherwise
|
||||||
"""
|
"""
|
||||||
@ -104,11 +112,11 @@ class Page(Renderable, Queriable):
|
|||||||
return True
|
return True
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def clear_children(self) -> 'Page':
|
def clear_children(self) -> 'Page':
|
||||||
"""
|
"""
|
||||||
Remove all children from the page.
|
Remove all children from the page.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Self for method chaining
|
Self for method chaining
|
||||||
"""
|
"""
|
||||||
@ -116,93 +124,95 @@ class Page(Renderable, Queriable):
|
|||||||
self._canvas = None
|
self._canvas = None
|
||||||
self._current_y_offset = 0
|
self._current_y_offset = 0
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def children(self) -> List[Renderable]:
|
def children(self) -> List[Renderable]:
|
||||||
"""Get a copy of the children list"""
|
"""Get a copy of the children list"""
|
||||||
return self._children.copy()
|
return self._children.copy()
|
||||||
|
|
||||||
|
|
||||||
def _get_child_height(self, child: Renderable) -> int:
|
def _get_child_height(self, child: Renderable) -> int:
|
||||||
"""
|
"""
|
||||||
Get the height of a child object.
|
Get the height of a child object.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
child: The child to measure
|
child: The child to measure
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Height in pixels
|
Height in pixels
|
||||||
"""
|
"""
|
||||||
if hasattr(child, '_size') and child._size is not None:
|
if hasattr(child, '_size') and child._size is not None:
|
||||||
if isinstance(child._size, (list, tuple, np.ndarray)) and len(child._size) >= 2:
|
if isinstance(child._size, (list, tuple, np.ndarray)) and len(child._size) >= 2:
|
||||||
return int(child._size[1])
|
return int(child._size[1])
|
||||||
|
|
||||||
if hasattr(child, 'size') and child.size is not None:
|
if hasattr(child, 'size') and child.size is not None:
|
||||||
if isinstance(child.size, (list, tuple, np.ndarray)) and len(child.size) >= 2:
|
if isinstance(child.size, (list, tuple, np.ndarray)) and len(child.size) >= 2:
|
||||||
return int(child.size[1])
|
return int(child.size[1])
|
||||||
|
|
||||||
if hasattr(child, 'height'):
|
if hasattr(child, 'height'):
|
||||||
return int(child.height)
|
return int(child.height)
|
||||||
|
|
||||||
# Default fallback height
|
# Default fallback height
|
||||||
return 20
|
return 20
|
||||||
|
|
||||||
def render_children(self):
|
def render_children(self):
|
||||||
"""
|
"""
|
||||||
Call render on all children in the list.
|
Call render on all children in the list.
|
||||||
Children draw directly onto the page's canvas via the shared ImageDraw object.
|
Children draw directly onto the page's canvas via the shared ImageDraw object.
|
||||||
"""
|
"""
|
||||||
for child in self._children:
|
for child in self._children:
|
||||||
|
# Synchronize draw context for Line objects before rendering
|
||||||
|
if hasattr(child, '_draw'):
|
||||||
|
child._draw = self._draw
|
||||||
if hasattr(child, 'render'):
|
if hasattr(child, 'render'):
|
||||||
child.render()
|
child.render()
|
||||||
|
|
||||||
def render(self) -> Image.Image:
|
def render(self) -> Image.Image:
|
||||||
"""
|
"""
|
||||||
Render the page with all its children.
|
Render the page with all its children.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
PIL Image containing the rendered page
|
PIL Image containing the rendered page
|
||||||
"""
|
"""
|
||||||
# Create the base canvas and draw object
|
# Create the base canvas and draw object
|
||||||
self._canvas = self._create_canvas()
|
self._canvas = self._create_canvas()
|
||||||
self._draw = ImageDraw.Draw(self._canvas)
|
self._draw = ImageDraw.Draw(self._canvas)
|
||||||
|
|
||||||
# Render all children - they draw directly onto the canvas
|
# Render all children - they draw directly onto the canvas
|
||||||
self.render_children()
|
self.render_children()
|
||||||
|
|
||||||
return self._canvas
|
return self._canvas
|
||||||
|
|
||||||
def _create_canvas(self) -> Image.Image:
|
def _create_canvas(self) -> Image.Image:
|
||||||
"""
|
"""
|
||||||
Create the base canvas with background and borders.
|
Create the base canvas with background and borders.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
PIL Image with background and borders applied
|
PIL Image with background and borders applied
|
||||||
"""
|
"""
|
||||||
# Create base image
|
# Create base image
|
||||||
canvas = Image.new('RGBA', self._size, (*self._style.background_color, 255))
|
canvas = Image.new('RGBA', self._size, (*self._style.background_color, 255))
|
||||||
|
|
||||||
# Draw borders if needed
|
# Draw borders if needed
|
||||||
if self._style.border_width > 0:
|
if self._style.border_width > 0:
|
||||||
draw = ImageDraw.Draw(canvas)
|
draw = ImageDraw.Draw(canvas)
|
||||||
border_color = (*self._style.border_color, 255)
|
border_color = (*self._style.border_color, 255)
|
||||||
|
|
||||||
# Draw border rectangle
|
# Draw border rectangle inside the content area
|
||||||
for i in range(self._style.border_width):
|
border_offset = self._style.border_width
|
||||||
draw.rectangle([
|
draw.rectangle([
|
||||||
(i, i),
|
(border_offset, border_offset),
|
||||||
(self._size[0] - 1 - i, self._size[1] - 1 - i)
|
(self._size[0] - border_offset - 1, self._size[1] - border_offset - 1)
|
||||||
], outline=border_color)
|
], outline=border_color)
|
||||||
|
|
||||||
return canvas
|
return canvas
|
||||||
|
|
||||||
def _get_child_position(self, child: Renderable) -> Tuple[int, int]:
|
def _get_child_position(self, child: Renderable) -> Tuple[int, int]:
|
||||||
"""
|
"""
|
||||||
Get the position where a child should be rendered.
|
Get the position where a child should be rendered.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
child: The child object
|
child: The child object
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (x, y) coordinates
|
Tuple of (x, y) coordinates
|
||||||
"""
|
"""
|
||||||
@ -211,42 +221,42 @@ class Page(Renderable, Queriable):
|
|||||||
return (int(child._origin[0]), int(child._origin[1]))
|
return (int(child._origin[0]), int(child._origin[1]))
|
||||||
elif isinstance(child._origin, (list, tuple)) and len(child._origin) >= 2:
|
elif isinstance(child._origin, (list, tuple)) and len(child._origin) >= 2:
|
||||||
return (int(child._origin[0]), int(child._origin[1]))
|
return (int(child._origin[0]), int(child._origin[1]))
|
||||||
|
|
||||||
if hasattr(child, 'position'):
|
if hasattr(child, 'position'):
|
||||||
pos = child.position
|
pos = child.position
|
||||||
if isinstance(pos, (list, tuple)) and len(pos) >= 2:
|
if isinstance(pos, (list, tuple)) and len(pos) >= 2:
|
||||||
return (int(pos[0]), int(pos[1]))
|
return (int(pos[0]), int(pos[1]))
|
||||||
|
|
||||||
# Default to origin
|
# Default to origin
|
||||||
return (0, 0)
|
return (0, 0)
|
||||||
|
|
||||||
def query_point(self, point: Tuple[int, int]) -> Optional[Renderable]:
|
def query_point(self, point: Tuple[int, int]) -> Optional[Renderable]:
|
||||||
"""
|
"""
|
||||||
Query a point to determine which child it belongs to.
|
Query a point to determine which child it belongs to.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
point: The (x, y) coordinates to query
|
point: The (x, y) coordinates to query
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The child object that contains the point, or None if no child contains it
|
The child object that contains the point, or None if no child contains it
|
||||||
"""
|
"""
|
||||||
point_array = np.array(point)
|
point_array = np.array(point)
|
||||||
|
|
||||||
# Check each child (in reverse order so topmost child is found first)
|
# Check each child (in reverse order so topmost child is found first)
|
||||||
for child in reversed(self._children):
|
for child in reversed(self._children):
|
||||||
if self._point_in_child(point_array, child):
|
if self._point_in_child(point_array, child):
|
||||||
return child
|
return child
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _point_in_child(self, point: np.ndarray, child: Renderable) -> bool:
|
def _point_in_child(self, point: np.ndarray, child: Renderable) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if a point is within a child's bounds.
|
Check if a point is within a child's bounds.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
point: The point to check
|
point: The point to check
|
||||||
child: The child to check against
|
child: The child to check against
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
True if the point is within the child's bounds
|
True if the point is within the child's bounds
|
||||||
"""
|
"""
|
||||||
@ -256,50 +266,50 @@ class Page(Renderable, Queriable):
|
|||||||
return child.in_object(point)
|
return child.in_object(point)
|
||||||
except:
|
except:
|
||||||
pass # Fall back to bounds checking
|
pass # Fall back to bounds checking
|
||||||
|
|
||||||
# Get child position and size for bounds checking
|
# Get child position and size for bounds checking
|
||||||
child_pos = self._get_child_position(child)
|
child_pos = self._get_child_position(child)
|
||||||
child_size = self._get_child_size(child)
|
child_size = self._get_child_size(child)
|
||||||
|
|
||||||
if child_size is None:
|
if child_size is None:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Check if point is within child bounds
|
# Check if point is within child bounds
|
||||||
return (
|
return (
|
||||||
child_pos[0] <= point[0] < child_pos[0] + child_size[0] and
|
child_pos[0] <= point[0] < child_pos[0] + child_size[0] and
|
||||||
child_pos[1] <= point[1] < child_pos[1] + child_size[1]
|
child_pos[1] <= point[1] < child_pos[1] + child_size[1]
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_child_size(self, child: Renderable) -> Optional[Tuple[int, int]]:
|
def _get_child_size(self, child: Renderable) -> Optional[Tuple[int, int]]:
|
||||||
"""
|
"""
|
||||||
Get the size of a child object.
|
Get the size of a child object.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
child: The child to measure
|
child: The child to measure
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (width, height) or None if size cannot be determined
|
Tuple of (width, height) or None if size cannot be determined
|
||||||
"""
|
"""
|
||||||
if hasattr(child, '_size') and child._size is not None:
|
if hasattr(child, '_size') and child._size is not None:
|
||||||
if isinstance(child._size, (list, tuple, np.ndarray)) and len(child._size) >= 2:
|
if isinstance(child._size, (list, tuple, np.ndarray)) and len(child._size) >= 2:
|
||||||
return (int(child._size[0]), int(child._size[1]))
|
return (int(child._size[0]), int(child._size[1]))
|
||||||
|
|
||||||
if hasattr(child, 'size') and child.size is not None:
|
if hasattr(child, 'size') and child.size is not None:
|
||||||
if isinstance(child.size, (list, tuple, np.ndarray)) and len(child.size) >= 2:
|
if isinstance(child.size, (list, tuple, np.ndarray)) and len(child.size) >= 2:
|
||||||
return (int(child.size[0]), int(child.size[1]))
|
return (int(child.size[0]), int(child.size[1]))
|
||||||
|
|
||||||
if hasattr(child, 'width') and hasattr(child, 'height'):
|
if hasattr(child, 'width') and hasattr(child, 'height'):
|
||||||
return (int(child.width), int(child.height))
|
return (int(child.width), int(child.height))
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def in_object(self, point: Tuple[int, int]) -> bool:
|
def in_object(self, point: Tuple[int, int]) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if a point is within this page's bounds.
|
Check if a point is within this page's bounds.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
point: The (x, y) coordinates to check
|
point: The (x, y) coordinates to check
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
True if the point is within the page bounds
|
True if the point is within the page bounds
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -1,44 +1,40 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from pyWebLayout.core.base import Renderable, Queriable
|
from pyWebLayout.core.base import Renderable, Queriable
|
||||||
from .box import Box
|
from .box import Box
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment, Font, FontStyle, FontWeight, TextDecoration
|
||||||
from pyWebLayout.style import Font, FontStyle, FontWeight, TextDecoration
|
|
||||||
from pyWebLayout.abstract import Word
|
from pyWebLayout.abstract import Word
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
from typing import Tuple, Union, List, Optional, Protocol
|
from typing import Tuple, Union, List, Optional, Protocol
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
|
||||||
class AlignmentHandler(ABC):
|
class AlignmentHandler(ABC):
|
||||||
"""
|
"""
|
||||||
Abstract base class for text alignment handlers.
|
Abstract base class for text alignment handlers.
|
||||||
Each handler implements a specific alignment strategy.
|
Each handler implements a specific alignment strategy.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def calculate_spacing_and_position(self, text_objects: List['Text'],
|
def calculate_spacing_and_position(self, text_objects: List['Text'],
|
||||||
available_width: int, min_spacing: int,
|
available_width: int, min_spacing: int,
|
||||||
max_spacing: int) -> Tuple[int, int, bool]:
|
max_spacing: int) -> Tuple[int, int, bool]:
|
||||||
"""
|
"""
|
||||||
Calculate the spacing between words and starting position for the line.
|
Calculate the spacing between words and starting position for the line.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text_objects: List of Text objects in the line
|
text_objects: List of Text objects in the line
|
||||||
available_width: Total width available for the line
|
available_width: Total width available for the line
|
||||||
min_spacing: Minimum spacing between words
|
min_spacing: Minimum spacing between words
|
||||||
max_spacing: Maximum spacing between words
|
max_spacing: Maximum spacing between words
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (spacing_between_words, starting_x_position)
|
Tuple of (spacing_between_words, starting_x_position)
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class LeftAlignmentHandler(AlignmentHandler):
|
class LeftAlignmentHandler(AlignmentHandler):
|
||||||
"""Handler for left-aligned text."""
|
"""Handler for left-aligned text."""
|
||||||
|
|
||||||
def calculate_spacing_and_position(self,
|
def calculate_spacing_and_position(self,
|
||||||
text_objects: List['Text'],
|
text_objects: List['Text'],
|
||||||
available_width: int,
|
available_width: int,
|
||||||
@ -46,6 +42,7 @@ class LeftAlignmentHandler(AlignmentHandler):
|
|||||||
max_spacing: int) -> Tuple[int, int, bool]:
|
max_spacing: int) -> Tuple[int, int, bool]:
|
||||||
"""
|
"""
|
||||||
Calculate spacing and position for left-aligned text objects.
|
Calculate spacing and position for left-aligned text objects.
|
||||||
|
CREngine-inspired: never allow negative spacing, always use minimum spacing for overflow.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text_objects (List[Text]): A list of text objects to be laid out.
|
text_objects (List[Text]): A list of text objects to be laid out.
|
||||||
@ -56,41 +53,52 @@ class LeftAlignmentHandler(AlignmentHandler):
|
|||||||
Returns:
|
Returns:
|
||||||
Tuple[int, int, bool]: Spacing, start position, and overflow flag.
|
Tuple[int, int, bool]: Spacing, start position, and overflow flag.
|
||||||
"""
|
"""
|
||||||
|
print("LeftAlignmentHandler:")
|
||||||
|
# Handle single word case
|
||||||
|
if len(text_objects) <= 1:
|
||||||
|
return 0, 0, False
|
||||||
|
|
||||||
# Calculate the total length of all text objects
|
# Calculate the total length of all text objects
|
||||||
text_length = sum([text.width for text in text_objects])
|
text_length = sum([text.width for text in text_objects])
|
||||||
|
|
||||||
|
# Calculate number of gaps between texts
|
||||||
|
num_gaps = len(text_objects) - 1
|
||||||
|
|
||||||
|
# Calculate minimum space needed (text + minimum gaps)
|
||||||
|
min_total_width = text_length + (min_spacing * num_gaps)
|
||||||
|
|
||||||
|
# Check if we have overflow (CREngine pattern: always use min_spacing for overflow)
|
||||||
|
if min_total_width > available_width:
|
||||||
|
return min_spacing, 0, True # Overflow - but use safe minimum spacing
|
||||||
|
|
||||||
# Calculate residual space left after accounting for text lengths
|
# Calculate residual space left after accounting for text lengths
|
||||||
residual_space = available_width - text_length
|
residual_space = available_width - text_length
|
||||||
|
|
||||||
# Calculate number of gaps between texts
|
# Calculate ideal spacing
|
||||||
num_gaps = max(1, len(text_objects) - 1)
|
|
||||||
|
|
||||||
# Initial spacing based on equal distribution of residual space
|
|
||||||
ideal_space = (min_spacing + max_spacing)/2
|
|
||||||
actual_spacing = residual_space // num_gaps
|
actual_spacing = residual_space // num_gaps
|
||||||
|
print(actual_spacing)
|
||||||
# Clamp the calculated spacing within min and max limits
|
# Clamp within bounds (CREngine pattern: respect max_spacing)
|
||||||
if actual_spacing < min_spacing:
|
if actual_spacing > max_spacing:
|
||||||
return actual_spacing, 0, True
|
return max_spacing, 0, False
|
||||||
|
elif actual_spacing < min_spacing:
|
||||||
return ideal_space, 0, False
|
# Ensure we never return spacing less than min_spacing
|
||||||
|
return min_spacing, 0, False
|
||||||
|
else:
|
||||||
|
return actual_spacing, 0, False # Use calculated spacing
|
||||||
|
|
||||||
class CenterRightAlignmentHandler(AlignmentHandler):
|
class CenterRightAlignmentHandler(AlignmentHandler):
|
||||||
"""Handler for center and right-aligned text."""
|
"""Handler for center and right-aligned text."""
|
||||||
|
|
||||||
def __init__(self, alignment: Alignment):
|
def __init__(self, alignment: Alignment):
|
||||||
self._alignment = alignment
|
self._alignment = alignment
|
||||||
|
|
||||||
def calculate_spacing_and_position(self, text_objects: List['Text'],
|
def calculate_spacing_and_position(self, text_objects: List['Text'],
|
||||||
available_width: int, min_spacing: int,
|
available_width: int, min_spacing: int,
|
||||||
max_spacing: int) -> Tuple[int, int, bool]:
|
max_spacing: int) -> Tuple[int, int, bool]:
|
||||||
"""Center/right alignment uses minimum spacing with calculated start position."""
|
"""Center/right alignment uses minimum spacing with calculated start position."""
|
||||||
word_length = sum([word.width for word in text_objects])
|
word_length = sum([word.width for word in text_objects])
|
||||||
residual_space = available_width - word_length
|
residual_space = available_width - word_length
|
||||||
|
|
||||||
# Handle single word case
|
# Handle single word case
|
||||||
if len(text_objects) <= 1:
|
if len(text_objects) <= 1:
|
||||||
if self._alignment == Alignment.CENTER:
|
if self._alignment == Alignment.CENTER:
|
||||||
@ -98,14 +106,13 @@ class CenterRightAlignmentHandler(AlignmentHandler):
|
|||||||
else: # RIGHT
|
else: # RIGHT
|
||||||
start_position = available_width - word_length
|
start_position = available_width - word_length
|
||||||
return 0, max(0, start_position), False
|
return 0, max(0, start_position), False
|
||||||
|
|
||||||
actual_spacing = residual_space // (len(text_objects)-1)
|
|
||||||
|
|
||||||
|
actual_spacing = residual_space // (len(text_objects)-1)
|
||||||
|
print(actual_spacing)
|
||||||
ideal_space = (min_spacing + max_spacing)/2
|
ideal_space = (min_spacing + max_spacing)/2
|
||||||
if actual_spacing > 0.5*(min_spacing + max_spacing):
|
if actual_spacing > 0.5*(min_spacing + max_spacing):
|
||||||
actual_spacing = 0.5*(min_spacing + max_spacing)
|
actual_spacing = 0.5*(min_spacing + max_spacing)
|
||||||
|
|
||||||
|
|
||||||
content_length = word_length + (len(text_objects)-1) * actual_spacing
|
content_length = word_length + (len(text_objects)-1) * actual_spacing
|
||||||
if self._alignment == Alignment.CENTER:
|
if self._alignment == Alignment.CENTER:
|
||||||
start_position = (available_width - content_length) // 2
|
start_position = (available_width - content_length) // 2
|
||||||
@ -114,15 +121,14 @@ class CenterRightAlignmentHandler(AlignmentHandler):
|
|||||||
|
|
||||||
if actual_spacing < min_spacing:
|
if actual_spacing < min_spacing:
|
||||||
return actual_spacing, max(0, start_position), True
|
return actual_spacing, max(0, start_position), True
|
||||||
|
|
||||||
return ideal_space, max(0, start_position), False
|
|
||||||
|
|
||||||
|
return ideal_space, max(0, start_position), False
|
||||||
|
|
||||||
class JustifyAlignmentHandler(AlignmentHandler):
|
class JustifyAlignmentHandler(AlignmentHandler):
|
||||||
"""Handler for justified text with full justification."""
|
"""Handler for justified text with full justification."""
|
||||||
|
|
||||||
def calculate_spacing_and_position(self, text_objects: List['Text'],
|
def calculate_spacing_and_position(self, text_objects: List['Text'],
|
||||||
available_width: int, min_spacing: int,
|
available_width: int, min_spacing: int,
|
||||||
max_spacing: int) -> Tuple[int, int, bool]:
|
max_spacing: int) -> Tuple[int, int, bool]:
|
||||||
"""Justified alignment distributes space to fill the entire line width."""
|
"""Justified alignment distributes space to fill the entire line width."""
|
||||||
|
|
||||||
@ -132,17 +138,14 @@ class JustifyAlignmentHandler(AlignmentHandler):
|
|||||||
|
|
||||||
actual_spacing = residual_space // num_gaps
|
actual_spacing = residual_space // num_gaps
|
||||||
ideal_space = (min_spacing + max_spacing)//2
|
ideal_space = (min_spacing + max_spacing)//2
|
||||||
|
print(actual_spacing)
|
||||||
# can we touch the end?
|
# can we touch the end?
|
||||||
if actual_spacing < max_spacing:
|
if actual_spacing < max_spacing:
|
||||||
if actual_spacing < min_spacing:
|
if actual_spacing < min_spacing:
|
||||||
|
# Ensure we never return spacing less than min_spacing
|
||||||
return min_spacing, 0, True
|
return min_spacing, 0, True
|
||||||
return actual_spacing, 0, False
|
return max(min_spacing, actual_spacing), 0, False
|
||||||
return ideal_space,0,False
|
return ideal_space, 0, False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Text(Renderable, Queriable):
|
class Text(Renderable, Queriable):
|
||||||
"""
|
"""
|
||||||
@ -153,7 +156,7 @@ class Text(Renderable, Queriable):
|
|||||||
def __init__(self, text: str, style: Font, draw: ImageDraw.Draw, source: Optional[Word] = None, line: Optional[Line] = None):
|
def __init__(self, text: str, style: Font, draw: ImageDraw.Draw, source: Optional[Word] = None, line: Optional[Line] = None):
|
||||||
"""
|
"""
|
||||||
Initialize a Text object.
|
Initialize a Text object.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text: The text content to render
|
text: The text content to render
|
||||||
style: The font style to use for rendering
|
style: The font style to use for rendering
|
||||||
@ -165,10 +168,10 @@ class Text(Renderable, Queriable):
|
|||||||
self._source = source
|
self._source = source
|
||||||
self._origin = np.array([0, 0])
|
self._origin = np.array([0, 0])
|
||||||
self._draw = draw
|
self._draw = draw
|
||||||
|
|
||||||
# Calculate dimensions
|
# Calculate dimensions
|
||||||
self._calculate_dimensions()
|
self._calculate_dimensions()
|
||||||
|
|
||||||
def _calculate_dimensions(self):
|
def _calculate_dimensions(self):
|
||||||
"""Calculate the width and height of the text based on the font metrics"""
|
"""Calculate the width and height of the text based on the font metrics"""
|
||||||
# Get the size using PIL's text size functionality
|
# Get the size using PIL's text size functionality
|
||||||
@ -186,12 +189,12 @@ class Text(Renderable, Queriable):
|
|||||||
def text(self) -> str:
|
def text(self) -> str:
|
||||||
"""Get the text content"""
|
"""Get the text content"""
|
||||||
return self._text
|
return self._text
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def style(self) -> Font:
|
def style(self) -> Font:
|
||||||
"""Get the text style"""
|
"""Get the text style"""
|
||||||
return self._style
|
return self._style
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def origin(self) -> np.ndarray:
|
def origin(self) -> np.ndarray:
|
||||||
"""Get the origin of the text"""
|
"""Get the origin of the text"""
|
||||||
@ -201,78 +204,74 @@ class Text(Renderable, Queriable):
|
|||||||
def line(self) -> Optional[Line]:
|
def line(self) -> Optional[Line]:
|
||||||
"""Get the line containing this text"""
|
"""Get the line containing this text"""
|
||||||
return self._line
|
return self._line
|
||||||
|
|
||||||
@line.setter
|
@line.setter
|
||||||
def line(self, line):
|
def line(self, line):
|
||||||
"""Set the line containing this text"""
|
"""Set the line containing this text"""
|
||||||
self._line = line
|
self._line = line
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def width(self) -> int:
|
def width(self) -> int:
|
||||||
"""Get the width of the text"""
|
"""Get the width of the text"""
|
||||||
return self._width
|
return self._width
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def size(self) -> int:
|
def size(self) -> int:
|
||||||
"""Get the width of the text"""
|
"""Get the width of the text"""
|
||||||
return np.array((self._width, self._style.font_size))
|
return np.array((self._width, self._style.font_size))
|
||||||
|
|
||||||
def set_origin(self, origin:np.generic):
|
def set_origin(self, origin:np.generic):
|
||||||
"""Set the origin (left baseline ("ls")) of this text element"""
|
"""Set the origin (left baseline ("ls")) of this text element"""
|
||||||
self._origin = origin
|
self._origin = origin
|
||||||
|
|
||||||
def add_line(self, line):
|
def add_line(self, line):
|
||||||
"""Add this text to a line"""
|
"""Add this text to a line"""
|
||||||
self._line = line
|
self._line = line
|
||||||
|
|
||||||
def _apply_decoration(self):
|
def _apply_decoration(self):
|
||||||
"""Apply text decoration (underline or strikethrough)"""
|
"""Apply text decoration (underline or strikethrough)"""
|
||||||
if self._style.decoration == TextDecoration.UNDERLINE:
|
if self._style.decoration == TextDecoration.UNDERLINE:
|
||||||
# Draw underline at about 90% of the height
|
# Draw underline at about 90% of the height
|
||||||
|
|
||||||
y_position = self._origin[1] - 0.1*self._style.font_size
|
y_position = self._origin[1] - 0.1*self._style.font_size
|
||||||
self._draw.line([(0, y_position), (self._width, y_position)],
|
self._draw.line([(0, y_position), (self._width, y_position)],
|
||||||
fill=self._style.colour, width=max(1, int(self._style.font_size / 15)))
|
fill=self._style.colour, width=max(1, int(self._style.font_size / 15)))
|
||||||
|
|
||||||
elif self._style.decoration == TextDecoration.STRIKETHROUGH:
|
elif self._style.decoration == TextDecoration.STRIKETHROUGH:
|
||||||
# Draw strikethrough at about 50% of the height
|
# Draw strikethrough at about 50% of the height
|
||||||
y_position = self._origin[1] + self._middle_y
|
y_position = self._origin[1] + self._middle_y
|
||||||
self._draw.line([(0, y_position), (self._width, y_position)],
|
self._draw.line([(0, y_position), (self._width, y_position)],
|
||||||
fill=self._style.colour, width=max(1, int(self._style.font_size / 15)))
|
fill=self._style.colour, width=max(1, int(self._style.font_size / 15)))
|
||||||
|
|
||||||
def render(self):
|
def render(self):
|
||||||
"""
|
"""
|
||||||
Render the text to an image.
|
Render the text to an image.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A PIL Image containing the rendered text
|
A PIL Image containing the rendered text
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Draw the text background if specified
|
# Draw the text background if specified
|
||||||
if self._style.background and self._style.background[3] > 0: # If alpha > 0
|
if self._style.background and self._style.background[3] > 0: # If alpha > 0
|
||||||
self._draw.rectangle([self._origin, self._origin+self._size], fill=self._style.background)
|
self._draw.rectangle([self._origin, self._origin+self._size], fill=self._style.background)
|
||||||
|
|
||||||
# Draw the text using calculated offsets to prevent cropping
|
# Draw the text using calculated offsets to prevent cropping
|
||||||
self._draw.text((self.origin[0], self._origin[1]), self._text, font=self._style.font,anchor="ls", fill=self._style.colour)
|
self._draw.text((self.origin[0], self._origin[1]), self._text, font=self._style.font, fill=self._style.colour)
|
||||||
|
|
||||||
# Apply any text decorations
|
# Apply any text decorations
|
||||||
self._apply_decoration()
|
self._apply_decoration()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Line(Box):
|
class Line(Box):
|
||||||
"""
|
"""
|
||||||
A line of text consisting of Text objects with consistent spacing.
|
A line of text consisting of Text objects with consistent spacing.
|
||||||
Each Text represents a word or word fragment that can be rendered.
|
Each Text represents a word or word fragment that can be rendered.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, spacing: Tuple[int, int], origin, size, draw: ImageDraw.Draw,font: Optional[Font] = None,
|
def __init__(self, spacing: Tuple[int, int], origin, size, draw: ImageDraw.Draw,font: Optional[Font] = None,
|
||||||
callback=None, sheet=None, mode=None, halign=Alignment.CENTER,
|
callback=None, sheet=None, mode=None, halign=Alignment.CENTER,
|
||||||
valign=Alignment.CENTER, previous = None):
|
valign=Alignment.CENTER, previous = None):
|
||||||
"""
|
"""
|
||||||
Initialize a new line.
|
Initialize a new line.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
spacing: A tuple of (min_spacing, max_spacing) between words
|
spacing: A tuple of (min_spacing, max_spacing) between words
|
||||||
origin: The top-left position of the line
|
origin: The top-left position of the line
|
||||||
@ -298,20 +297,21 @@ class Line(Box):
|
|||||||
self._draw = draw
|
self._draw = draw
|
||||||
self._spacing_render = (spacing[0] + spacing[1]) //2
|
self._spacing_render = (spacing[0] + spacing[1]) //2
|
||||||
self._position_render = 0
|
self._position_render = 0
|
||||||
|
|
||||||
# Create the appropriate alignment handler
|
# Create the appropriate alignment handler
|
||||||
self._alignment_handler = self._create_alignment_handler(halign)
|
self._alignment_handler = self._create_alignment_handler(halign)
|
||||||
|
|
||||||
def _create_alignment_handler(self, alignment: Alignment) -> AlignmentHandler:
|
def _create_alignment_handler(self, alignment: Alignment) -> AlignmentHandler:
|
||||||
"""
|
"""
|
||||||
Create the appropriate alignment handler based on the alignment type.
|
Create the appropriate alignment handler based on the alignment type.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
alignment: The alignment type
|
alignment: The alignment type
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The appropriate alignment handler instance
|
The appropriate alignment handler instance
|
||||||
"""
|
"""
|
||||||
|
print("HALGIN!!!!!", alignment)
|
||||||
if alignment == Alignment.LEFT:
|
if alignment == Alignment.LEFT:
|
||||||
return LeftAlignmentHandler()
|
return LeftAlignmentHandler()
|
||||||
elif alignment == Alignment.JUSTIFY:
|
elif alignment == Alignment.JUSTIFY:
|
||||||
@ -319,26 +319,23 @@ class Line(Box):
|
|||||||
else: # CENTER or RIGHT
|
else: # CENTER or RIGHT
|
||||||
return CenterRightAlignmentHandler(alignment)
|
return CenterRightAlignmentHandler(alignment)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def text_objects(self) -> List[Text]:
|
def text_objects(self) -> List[Text]:
|
||||||
"""Get the list of Text objects in this line"""
|
"""Get the list of Text objects in this line"""
|
||||||
return self._text_objects
|
return self._text_objects
|
||||||
|
|
||||||
def set_next(self, line: Line):
|
def set_next(self, line: Line):
|
||||||
"""Set the next line in sequence"""
|
"""Set the next line in sequence"""
|
||||||
self._next = line
|
self._next = line
|
||||||
|
|
||||||
|
|
||||||
def add_word(self, word: 'Word', part:Optional[Text]=None) -> Tuple[bool, Optional['Text']]:
|
def add_word(self, word: 'Word', part:Optional[Text]=None) -> Tuple[bool, Optional['Text']]:
|
||||||
"""
|
"""
|
||||||
Add a word to this line using intelligent word fitting strategies.
|
Add a word to this line using intelligent word fitting strategies.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text: The text content of the word
|
text: The text content of the word
|
||||||
font: The font to use for this word, or None to use the line's default font
|
font: The font to use for this word, or None to use the line's default font
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
True if the word was successfully added, False if it couldn't fit, in case of hypenation the hyphenated part is returned
|
True if the word was successfully added, False if it couldn't fit, in case of hypenation the hyphenated part is returned
|
||||||
"""
|
"""
|
||||||
@ -350,7 +347,7 @@ class Line(Box):
|
|||||||
text = Text.from_word(word, self._draw)
|
text = Text.from_word(word, self._draw)
|
||||||
self._text_objects.append(text)
|
self._text_objects.append(text)
|
||||||
spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(self._text_objects, self._size[0],self._spacing[0], self._spacing[1])
|
spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(self._text_objects, self._size[0],self._spacing[0], self._spacing[1])
|
||||||
|
print(self._alignment_handler)
|
||||||
if not overflow:
|
if not overflow:
|
||||||
self._words.append(word)
|
self._words.append(word)
|
||||||
word.add_concete(text)
|
word.add_concete(text)
|
||||||
@ -358,11 +355,9 @@ class Line(Box):
|
|||||||
self._position_render = position
|
self._position_render = position
|
||||||
self._spacing_render = spacing
|
self._spacing_render = spacing
|
||||||
return True, None # no overflow word is just added!
|
return True, None # no overflow word is just added!
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
_=self._text_objects.pop()
|
_=self._text_objects.pop()
|
||||||
splits = [(Text(pair[0], word.style,self._draw, line=self, source=word), Text( pair[1], word.style, self._draw, line=self, source=word)) for pair in word.possible_hyphenation()]
|
splits = [(Text(pair[0]+"-", word.style,self._draw, line=self, source=word), Text( pair[1], word.style, self._draw, line=self, source=word)) for pair in word.possible_hyphenation()]
|
||||||
|
|
||||||
#worst case scenario!
|
#worst case scenario!
|
||||||
if len(splits)==0 and len(word.text)>=6:
|
if len(splits)==0 and len(word.text)>=6:
|
||||||
@ -383,7 +378,7 @@ class Line(Box):
|
|||||||
|
|
||||||
elif len(splits)==0 and len(word.text)<6:
|
elif len(splits)==0 and len(word.text)<6:
|
||||||
return False, None # this endpoint means no words can be added.
|
return False, None # this endpoint means no words can be added.
|
||||||
|
|
||||||
spacings = []
|
spacings = []
|
||||||
positions = []
|
positions = []
|
||||||
|
|
||||||
@ -403,26 +398,28 @@ class Line(Box):
|
|||||||
self._words.append(word)
|
self._words.append(word)
|
||||||
return True, splits[idx][1] # we apply a phyphenated split with best spacing
|
return True, splits[idx][1] # we apply a phyphenated split with best spacing
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def render(self):
|
def render(self):
|
||||||
"""
|
"""
|
||||||
Render the line with all its text objects using the alignment handler system.
|
Render the line with all its text objects using the alignment handler system.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A PIL Image containing the rendered line
|
A PIL Image containing the rendered line
|
||||||
"""
|
"""
|
||||||
|
# Recalculate spacing and position for current text objects to ensure accuracy
|
||||||
|
if len(self._text_objects) > 0:
|
||||||
|
spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(
|
||||||
|
self._text_objects, self._size[0], self._spacing[0], self._spacing[1]
|
||||||
|
)
|
||||||
|
self._spacing_render = spacing
|
||||||
|
self._position_render = position
|
||||||
|
|
||||||
self._position_render # x-offset
|
|
||||||
self._spacing_render # x-spacing
|
|
||||||
y_cursor = self._origin[1] + self._baseline
|
y_cursor = self._origin[1] + self._baseline
|
||||||
|
|
||||||
x_cursor = self._position_render
|
# Start x_cursor at line origin plus any alignment offset
|
||||||
|
x_cursor = self._origin[0] + self._position_render
|
||||||
for text in self._text_objects:
|
for text in self._text_objects:
|
||||||
|
# Update text draw context to current draw context
|
||||||
text.set_origin(np.array([x_cursor,y_cursor]))
|
text._draw = self._draw
|
||||||
|
text.set_origin(np.array([x_cursor, y_cursor]))
|
||||||
text.render()
|
text.render()
|
||||||
x_cursor += self._spacing_render + text.width # x-spacing + width of text object
|
x_cursor += self._spacing_render + text.width # x-spacing + width of text object
|
||||||
|
|||||||
@ -4,7 +4,7 @@ from PIL import Image
|
|||||||
|
|
||||||
from pyWebLayout.core.base import Renderable, Layoutable
|
from pyWebLayout.core.base import Renderable, Layoutable
|
||||||
from .box import Box
|
from .box import Box
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
|
|
||||||
|
|
||||||
class Viewport(Box, Layoutable):
|
class Viewport(Box, Layoutable):
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
from abc import ABC
|
from abc import ABC
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from pyWebLayout.style import Alignment
|
from pyWebLayout.style.alignment import Alignment
|
||||||
|
|
||||||
|
|
||||||
class Renderable(ABC):
|
class Renderable(ABC):
|
||||||
@ -66,4 +66,4 @@ class Queriable(ABC):
|
|||||||
"""
|
"""
|
||||||
point_array = np.array(point)
|
point_array = np.array(point)
|
||||||
relative_point = point_array - self._origin
|
relative_point = point_array - self._origin
|
||||||
return np.all((0 <= relative_point) & (relative_point < self.size))
|
return np.all((0 <= relative_point) & (relative_point < self.size))
|
||||||
|
|||||||
@ -8,7 +8,7 @@ from pyWebLayout.concrete.text import (
|
|||||||
Line, Text,
|
Line, Text,
|
||||||
LeftAlignmentHandler, CenterRightAlignmentHandler, JustifyAlignmentHandler
|
LeftAlignmentHandler, CenterRightAlignmentHandler, JustifyAlignmentHandler
|
||||||
)
|
)
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
from pyWebLayout.style import Font
|
from pyWebLayout.style import Font
|
||||||
|
|
||||||
def demonstrate_handler_system():
|
def demonstrate_handler_system():
|
||||||
|
|||||||
@ -12,7 +12,7 @@ from pyWebLayout.concrete import (
|
|||||||
Viewport, ScrollablePageContent, Text, Box, RenderableImage
|
Viewport, ScrollablePageContent, Text, Box, RenderableImage
|
||||||
)
|
)
|
||||||
from pyWebLayout.style.fonts import Font, FontWeight
|
from pyWebLayout.style.fonts import Font, FontWeight
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
|
|
||||||
|
|
||||||
def create_large_document_content():
|
def create_large_document_content():
|
||||||
|
|||||||
@ -29,7 +29,7 @@ from pyWebLayout.abstract.functional import (
|
|||||||
from pyWebLayout.abstract.block import Paragraph
|
from pyWebLayout.abstract.block import Paragraph
|
||||||
from pyWebLayout.abstract.inline import Word
|
from pyWebLayout.abstract.inline import Word
|
||||||
from pyWebLayout.style.fonts import Font, FontWeight, FontStyle, TextDecoration
|
from pyWebLayout.style.fonts import Font, FontWeight, FontStyle, TextDecoration
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
from pyWebLayout.layout.paragraph_layout import ParagraphLayout, ParagraphLayoutResult
|
from pyWebLayout.layout.paragraph_layout import ParagraphLayout, ParagraphLayoutResult
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -30,7 +30,7 @@ from pyWebLayout.abstract.functional import (
|
|||||||
from pyWebLayout.abstract.block import Paragraph
|
from pyWebLayout.abstract.block import Paragraph
|
||||||
from pyWebLayout.abstract.inline import Word
|
from pyWebLayout.abstract.inline import Word
|
||||||
from pyWebLayout.style.fonts import Font, FontWeight, FontStyle, TextDecoration
|
from pyWebLayout.style.fonts import Font, FontWeight, FontStyle, TextDecoration
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
from pyWebLayout.io.readers.html_extraction import parse_html_string
|
from pyWebLayout.io.readers.html_extraction import parse_html_string
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -62,16 +62,10 @@ class EPUBReader:
|
|||||||
# Extract the EPUB file
|
# Extract the EPUB file
|
||||||
self.temp_dir = tempfile.mkdtemp()
|
self.temp_dir = tempfile.mkdtemp()
|
||||||
self._extract_epub()
|
self._extract_epub()
|
||||||
|
|
||||||
# Parse the package document (content.opf)
|
|
||||||
self._parse_package_document()
|
self._parse_package_document()
|
||||||
|
|
||||||
# Parse the table of contents
|
|
||||||
self._parse_toc()
|
self._parse_toc()
|
||||||
|
|
||||||
# Create a Book object
|
|
||||||
self._create_book()
|
self._create_book()
|
||||||
|
|
||||||
# Add chapters to the book
|
# Add chapters to the book
|
||||||
self._add_chapters()
|
self._add_chapters()
|
||||||
|
|
||||||
@ -377,7 +371,7 @@ class EPUBReader:
|
|||||||
html = f.read()
|
html = f.read()
|
||||||
|
|
||||||
# Parse HTML and add blocks to chapter
|
# Parse HTML and add blocks to chapter
|
||||||
blocks = parse_html_string(html)
|
blocks = parse_html_string(html, document=self.book)
|
||||||
|
|
||||||
# Copy blocks to the chapter
|
# Copy blocks to the chapter
|
||||||
for block in blocks:
|
for block in blocks:
|
||||||
|
|||||||
@ -27,7 +27,8 @@ from pyWebLayout.abstract.block import (
|
|||||||
Image,
|
Image,
|
||||||
)
|
)
|
||||||
from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration
|
from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration
|
||||||
from pyWebLayout.style.abstract_style import AbstractStyle, FontFamily, FontSize, TextAlign
|
from pyWebLayout.style.abstract_style import AbstractStyle, FontFamily, FontSize
|
||||||
|
from pyWebLayout.style import Alignment as TextAlign
|
||||||
|
|
||||||
|
|
||||||
class StyleContext(NamedTuple):
|
class StyleContext(NamedTuple):
|
||||||
|
|||||||
@ -1,16 +1,15 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import List, Tuple, Optional
|
from typing import List, Tuple, Optional, Union
|
||||||
|
|
||||||
from pyWebLayout.concrete import Page, Line, Text
|
from pyWebLayout.concrete import Page, Line, Text
|
||||||
from pyWebLayout.abstract import Paragraph, Word, Link
|
from pyWebLayout.abstract import Paragraph, Word, Link
|
||||||
from pyWebLayout.style.concrete_style import ConcreteStyleRegistry
|
from pyWebLayout.style.concrete_style import ConcreteStyleRegistry
|
||||||
|
|
||||||
|
|
||||||
def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
|
def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
|
||||||
"""
|
"""
|
||||||
Layout a paragraph of text within a given page.
|
Layout a paragraph of text within a given page.
|
||||||
|
|
||||||
This function extracts word spacing constraints from the style system
|
This function extracts word spacing constraints from the style system
|
||||||
and uses them to create properly spaced lines of text.
|
and uses them to create properly spaced lines of text.
|
||||||
|
|
||||||
@ -19,7 +18,7 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
|
|||||||
page: The page to layout the paragraph on
|
page: The page to layout the paragraph on
|
||||||
start_word: Index of the first word to process (for continuation)
|
start_word: Index of the first word to process (for continuation)
|
||||||
pretext: Optional pretext from a previous hyphenated word
|
pretext: Optional pretext from a previous hyphenated word
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of:
|
Tuple of:
|
||||||
- bool: True if paragraph was completely laid out, False if page ran out of space
|
- bool: True if paragraph was completely laid out, False if page ran out of space
|
||||||
@ -28,29 +27,36 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
|
|||||||
"""
|
"""
|
||||||
if not paragraph.words:
|
if not paragraph.words:
|
||||||
return True, None, None
|
return True, None, None
|
||||||
|
|
||||||
# Validate inputs
|
# Validate inputs
|
||||||
if start_word >= len(paragraph.words):
|
if start_word >= len(paragraph.words):
|
||||||
return True, None, None
|
return True, None, None
|
||||||
|
|
||||||
# Get the concrete style with resolved word spacing constraints
|
# Get the concrete style with resolved word spacing constraints
|
||||||
style_registry = ConcreteStyleRegistry(page.style_resolver)
|
style_registry = ConcreteStyleRegistry(page.style_resolver)
|
||||||
concrete_style = style_registry.get_concrete_style(paragraph.style)
|
concrete_style = style_registry.get_concrete_style(paragraph.style)
|
||||||
|
|
||||||
# Extract word spacing constraints (min, max) for Line constructor
|
# Extract word spacing constraints (min, max) for Line constructor
|
||||||
word_spacing_constraints = (
|
word_spacing_constraints = (
|
||||||
int(concrete_style.word_spacing_min),
|
int(concrete_style.word_spacing_min),
|
||||||
int(concrete_style.word_spacing_max)
|
int(concrete_style.word_spacing_max)
|
||||||
)
|
)
|
||||||
|
|
||||||
def create_new_line() -> Optional[Line]:
|
def create_new_line(word: Optional[Union[Word, Text]] = None) -> Optional[Line]:
|
||||||
"""Helper function to create a new line, returns None if page is full."""
|
"""Helper function to create a new line, returns None if page is full."""
|
||||||
if not page.can_fit_line(paragraph.line_height):
|
if not page.can_fit_line(paragraph.line_height):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
y_cursor = page._current_y_offset
|
y_cursor = page._current_y_offset
|
||||||
x_cursor = page.border_size
|
x_cursor = page.border_size
|
||||||
|
|
||||||
|
# Create a temporary Text object to calculate word width
|
||||||
|
if word:
|
||||||
|
temp_text = Text.from_word(word, page.draw)
|
||||||
|
word_width = temp_text.width
|
||||||
|
else:
|
||||||
|
word_width = 0
|
||||||
|
|
||||||
return Line(
|
return Line(
|
||||||
spacing=word_spacing_constraints,
|
spacing=word_spacing_constraints,
|
||||||
origin=(x_cursor, y_cursor),
|
origin=(x_cursor, y_cursor),
|
||||||
@ -59,38 +65,67 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
|
|||||||
font=concrete_style.create_font(),
|
font=concrete_style.create_font(),
|
||||||
halign=concrete_style.text_align
|
halign=concrete_style.text_align
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create initial line
|
# Create initial line
|
||||||
current_line = create_new_line()
|
current_line = create_new_line()
|
||||||
if not current_line:
|
if not current_line:
|
||||||
return False, start_word, pretext
|
return False, start_word, pretext
|
||||||
|
|
||||||
page.add_child(current_line)
|
page.add_child(current_line)
|
||||||
page._current_y_offset += paragraph.line_height
|
page._current_y_offset += paragraph.line_height
|
||||||
|
|
||||||
# Track current position in paragraph
|
# Track current position in paragraph
|
||||||
current_pretext = pretext
|
current_pretext = pretext
|
||||||
|
|
||||||
# Process words starting from start_word
|
# Process words starting from start_word
|
||||||
for i, word in enumerate(paragraph.words[start_word:], start=start_word):
|
for i, word in enumerate(paragraph.words[start_word:], start=start_word):
|
||||||
|
if current_pretext:
|
||||||
|
print(current_pretext.text)
|
||||||
success, overflow_text = current_line.add_word(word, current_pretext)
|
success, overflow_text = current_line.add_word(word, current_pretext)
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
# Word fit successfully
|
# Word fit successfully
|
||||||
current_pretext = None # Clear pretext after successful placement
|
if overflow_text is not None:
|
||||||
|
# If there's overflow text, we need to start a new line with it
|
||||||
|
current_pretext = overflow_text
|
||||||
|
current_line = create_new_line(overflow_text)
|
||||||
|
if not current_line:
|
||||||
|
# If we can't create a new line, return with the current state
|
||||||
|
return False, i, overflow_text
|
||||||
|
page.add_child(current_line)
|
||||||
|
page._current_y_offset += paragraph.line_height
|
||||||
|
# Continue to the next word
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# No overflow, clear pretext
|
||||||
|
current_pretext = None
|
||||||
else:
|
else:
|
||||||
# Word didn't fit, need a new line
|
# Word didn't fit, need a new line
|
||||||
current_line = create_new_line()
|
current_line = create_new_line(word)
|
||||||
if not current_line:
|
if not current_line:
|
||||||
# Page is full, return current position
|
# Page is full, return current position
|
||||||
return False, i, overflow_text
|
return False, i, overflow_text
|
||||||
|
|
||||||
|
# Check if the word will fit on the new line before adding it
|
||||||
|
temp_text = Text.from_word(word, page.draw)
|
||||||
|
if temp_text.width > current_line.size[0]:
|
||||||
|
# Word is too wide for the line, we need to hyphenate it
|
||||||
|
if len(word.text) >= 6:
|
||||||
|
# Try to hyphenate the word
|
||||||
|
splits = [(Text(pair[0], word.style, page.draw, line=current_line, source=word), Text(pair[1], word.style, page.draw, line=current_line, source=word)) for pair in word.possible_hyphenation()]
|
||||||
|
if len(splits) > 0:
|
||||||
|
# Use the first hyphenation point
|
||||||
|
first_part, second_part = splits[0]
|
||||||
|
current_line.add_word(word, first_part)
|
||||||
|
current_pretext = second_part
|
||||||
|
continue
|
||||||
|
|
||||||
page.add_child(current_line)
|
page.add_child(current_line)
|
||||||
page._current_y_offset += paragraph.line_height
|
page._current_y_offset += paragraph.line_height
|
||||||
|
|
||||||
# Try to add the word to the new line
|
# Try to add the word to the new line
|
||||||
success, overflow_text = current_line.add_word(word, current_pretext)
|
success, overflow_text = current_line.add_word(word, current_pretext)
|
||||||
|
|
||||||
if not success:
|
if not success:
|
||||||
# Word still doesn't fit even on a new line
|
# Word still doesn't fit even on a new line
|
||||||
# This might happen with very long words or narrow pages
|
# This might happen with very long words or narrow pages
|
||||||
@ -104,59 +139,58 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
|
|||||||
return False, i, None
|
return False, i, None
|
||||||
else:
|
else:
|
||||||
current_pretext = overflow_text # May be None or hyphenated remainder
|
current_pretext = overflow_text # May be None or hyphenated remainder
|
||||||
|
|
||||||
# All words processed successfully
|
# All words processed successfully
|
||||||
return True, None, None
|
return True, None, None
|
||||||
|
|
||||||
|
|
||||||
class DocumentLayouter:
|
class DocumentLayouter:
|
||||||
"""
|
"""
|
||||||
Class-based document layouter for more complex layout operations.
|
Class-based document layouter for more complex layout operations.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, page: Page):
|
def __init__(self, page: Page):
|
||||||
"""Initialize the layouter with a page."""
|
"""Initialize the layouter with a page."""
|
||||||
self.page = page
|
self.page = page
|
||||||
self.style_registry = ConcreteStyleRegistry(page.style_resolver)
|
self.style_registry = ConcreteStyleRegistry(page.style_resolver)
|
||||||
|
|
||||||
def layout_paragraph(self, paragraph: Paragraph, start_word: int = 0, pretext: Optional[Text] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
|
def layout_paragraph(self, paragraph: Paragraph, start_word: int = 0, pretext: Optional[Text] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
|
||||||
"""
|
"""
|
||||||
Layout a paragraph using the class-based approach.
|
Layout a paragraph using the class-based approach.
|
||||||
|
|
||||||
This method provides the same functionality as the standalone function
|
This method provides the same functionality as the standalone function
|
||||||
but with better state management and reusability.
|
but with better state management and reusability.
|
||||||
"""
|
"""
|
||||||
return paragraph_layouter(paragraph, self.page, start_word, pretext)
|
return paragraph_layouter(paragraph, self.page, start_word, pretext)
|
||||||
|
|
||||||
def layout_document(self, paragraphs: List[Paragraph]) -> bool:
|
def layout_document(self, paragraphs: List[Paragraph]) -> bool:
|
||||||
"""
|
"""
|
||||||
Layout multiple paragraphs in sequence.
|
Layout multiple paragraphs in sequence.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
paragraphs: List of paragraphs to layout
|
paragraphs: List of paragraphs to layout
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
True if all paragraphs were laid out successfully, False otherwise
|
True if all paragraphs were laid out successfully, False otherwise
|
||||||
"""
|
"""
|
||||||
for paragraph in paragraphs:
|
for paragraph in paragraphs:
|
||||||
start_word = 0
|
start_word = 0
|
||||||
pretext = None
|
pretext = None
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
complete, next_word, remaining_pretext = self.layout_paragraph(
|
complete, next_word, remaining_pretext = self.layout_paragraph(
|
||||||
paragraph, start_word, pretext
|
paragraph, start_word, pretext
|
||||||
)
|
)
|
||||||
|
|
||||||
if complete:
|
if complete:
|
||||||
# Paragraph finished
|
# Paragraph finished
|
||||||
break
|
break
|
||||||
|
|
||||||
if next_word is None:
|
if next_word is None:
|
||||||
# Error condition
|
# Error condition
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Continue on next page or handle page break
|
# Continue on next page or handle page break
|
||||||
# For now, we'll just return False indicating we need more space
|
# For now, we'll just return False indicating we need more space
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|||||||
450
pyWebLayout/layout/ereader_layout.py
Normal file
450
pyWebLayout/layout/ereader_layout.py
Normal file
@ -0,0 +1,450 @@
|
|||||||
|
"""
|
||||||
|
Enhanced ereader layout system with position tracking, font scaling, and multi-page support.
|
||||||
|
|
||||||
|
This module provides the core infrastructure for building high-performance ereader applications
|
||||||
|
with features like:
|
||||||
|
- Precise position tracking tied to abstract document structure
|
||||||
|
- Font scaling support
|
||||||
|
- Bidirectional page rendering (forward/backward)
|
||||||
|
- Chapter navigation based on HTML headings
|
||||||
|
- Multi-process page buffering
|
||||||
|
- Sub-second page rendering performance
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
from dataclasses import dataclass, asdict
|
||||||
|
from typing import List, Dict, Tuple, Optional, Union, Generator, Any
|
||||||
|
from enum import Enum
|
||||||
|
import json
|
||||||
|
import multiprocessing
|
||||||
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
|
from pyWebLayout.abstract.block import Block, Paragraph, Heading, HeadingLevel, Table, HList
|
||||||
|
from pyWebLayout.abstract.inline import Word
|
||||||
|
from pyWebLayout.concrete.page import Page
|
||||||
|
from pyWebLayout.concrete.text import Line, Text
|
||||||
|
from pyWebLayout.style.page_style import PageStyle
|
||||||
|
from pyWebLayout.style import Font
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RenderingPosition:
|
||||||
|
"""
|
||||||
|
Complete state for resuming rendering at any point in a document.
|
||||||
|
Position is tied to abstract document structure for stability across font changes.
|
||||||
|
"""
|
||||||
|
chapter_index: int = 0 # Which chapter (based on headings)
|
||||||
|
block_index: int = 0 # Which block within chapter
|
||||||
|
word_index: int = 0 # Which word within block (for paragraphs)
|
||||||
|
table_row: int = 0 # Which row for tables
|
||||||
|
table_col: int = 0 # Which column for tables
|
||||||
|
list_item_index: int = 0 # Which item for lists
|
||||||
|
remaining_pretext: Optional[str] = None # Hyphenated word continuation
|
||||||
|
page_y_offset: int = 0 # Vertical position on page
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Serialize position for saving to file/database"""
|
||||||
|
return asdict(self)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: Dict[str, Any]) -> 'RenderingPosition':
|
||||||
|
"""Deserialize position from saved state"""
|
||||||
|
return cls(**data)
|
||||||
|
|
||||||
|
def copy(self) -> 'RenderingPosition':
|
||||||
|
"""Create a copy of this position"""
|
||||||
|
return RenderingPosition(**asdict(self))
|
||||||
|
|
||||||
|
def __eq__(self, other) -> bool:
|
||||||
|
"""Check if two positions are equal"""
|
||||||
|
if not isinstance(other, RenderingPosition):
|
||||||
|
return False
|
||||||
|
return asdict(self) == asdict(other)
|
||||||
|
|
||||||
|
def __hash__(self) -> int:
|
||||||
|
"""Make position hashable for use as dict key"""
|
||||||
|
return hash(tuple(asdict(self).values()))
|
||||||
|
|
||||||
|
|
||||||
|
class ChapterInfo:
|
||||||
|
"""Information about a chapter/section in the document"""
|
||||||
|
|
||||||
|
def __init__(self, title: str, level: HeadingLevel, position: RenderingPosition, block_index: int):
|
||||||
|
self.title = title
|
||||||
|
self.level = level
|
||||||
|
self.position = position
|
||||||
|
self.block_index = block_index
|
||||||
|
|
||||||
|
|
||||||
|
class ChapterNavigator:
|
||||||
|
"""
|
||||||
|
Handles chapter/section navigation based on HTML heading structure (H1-H6).
|
||||||
|
Builds a table of contents and provides navigation capabilities.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, blocks: List[Block]):
|
||||||
|
self.blocks = blocks
|
||||||
|
self.chapters: List[ChapterInfo] = []
|
||||||
|
self._build_chapter_map()
|
||||||
|
|
||||||
|
def _build_chapter_map(self):
|
||||||
|
"""Scan blocks for headings and build chapter navigation map"""
|
||||||
|
current_chapter_index = 0
|
||||||
|
|
||||||
|
for block_index, block in enumerate(self.blocks):
|
||||||
|
if isinstance(block, Heading):
|
||||||
|
# Create position for this heading
|
||||||
|
position = RenderingPosition(
|
||||||
|
chapter_index=current_chapter_index,
|
||||||
|
block_index=0, # Heading is first block in its chapter
|
||||||
|
word_index=0,
|
||||||
|
table_row=0,
|
||||||
|
table_col=0,
|
||||||
|
list_item_index=0
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract heading text
|
||||||
|
heading_text = self._extract_heading_text(block)
|
||||||
|
|
||||||
|
chapter_info = ChapterInfo(
|
||||||
|
title=heading_text,
|
||||||
|
level=block.level,
|
||||||
|
position=position,
|
||||||
|
block_index=block_index
|
||||||
|
)
|
||||||
|
|
||||||
|
self.chapters.append(chapter_info)
|
||||||
|
|
||||||
|
# Only increment chapter index for top-level headings (H1)
|
||||||
|
if block.level == HeadingLevel.H1:
|
||||||
|
current_chapter_index += 1
|
||||||
|
|
||||||
|
def _extract_heading_text(self, heading: Heading) -> str:
|
||||||
|
"""Extract text content from a heading block"""
|
||||||
|
words = []
|
||||||
|
for word in heading.words():
|
||||||
|
if isinstance(word, Word):
|
||||||
|
words.append(word.text)
|
||||||
|
return " ".join(words)
|
||||||
|
|
||||||
|
def get_table_of_contents(self) -> List[Tuple[str, HeadingLevel, RenderingPosition]]:
|
||||||
|
"""Generate table of contents from heading structure"""
|
||||||
|
return [(chapter.title, chapter.level, chapter.position) for chapter in self.chapters]
|
||||||
|
|
||||||
|
def get_chapter_position(self, chapter_title: str) -> Optional[RenderingPosition]:
|
||||||
|
"""Get rendering position for a chapter by title"""
|
||||||
|
for chapter in self.chapters:
|
||||||
|
if chapter.title.lower() == chapter_title.lower():
|
||||||
|
return chapter.position
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_current_chapter(self, position: RenderingPosition) -> Optional[ChapterInfo]:
|
||||||
|
"""Determine which chapter contains the current position"""
|
||||||
|
if not self.chapters:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Find the chapter that contains this position
|
||||||
|
for i, chapter in enumerate(self.chapters):
|
||||||
|
# Check if this is the last chapter or if position is before next chapter
|
||||||
|
if i == len(self.chapters) - 1:
|
||||||
|
return chapter
|
||||||
|
|
||||||
|
next_chapter = self.chapters[i + 1]
|
||||||
|
if position.chapter_index < next_chapter.position.chapter_index:
|
||||||
|
return chapter
|
||||||
|
|
||||||
|
return self.chapters[0] if self.chapters else None
|
||||||
|
|
||||||
|
|
||||||
|
class FontScaler:
|
||||||
|
"""
|
||||||
|
Handles font scaling operations for ereader font size adjustments.
|
||||||
|
Applies scaling at layout/render time while preserving original font objects.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def scale_font(font: Font, scale_factor: float) -> Font:
|
||||||
|
"""
|
||||||
|
Create a scaled version of a font for layout calculations.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
font: Original font object
|
||||||
|
scale_factor: Scaling factor (1.0 = no change, 2.0 = double size, etc.)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
New Font object with scaled size
|
||||||
|
"""
|
||||||
|
if scale_factor == 1.0:
|
||||||
|
return font
|
||||||
|
|
||||||
|
scaled_size = max(1, int(font.font_size * scale_factor))
|
||||||
|
|
||||||
|
return Font(
|
||||||
|
font_path=font._font_path,
|
||||||
|
font_size=scaled_size,
|
||||||
|
colour=font.colour,
|
||||||
|
weight=font.weight,
|
||||||
|
style=font.style,
|
||||||
|
decoration=font.decoration,
|
||||||
|
background=font.background,
|
||||||
|
language=font.language,
|
||||||
|
min_hyphenation_width=font.min_hyphenation_width
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def scale_word_spacing(spacing: Tuple[int, int], scale_factor: float) -> Tuple[int, int]:
|
||||||
|
"""Scale word spacing constraints proportionally"""
|
||||||
|
if scale_factor == 1.0:
|
||||||
|
return spacing
|
||||||
|
|
||||||
|
min_spacing, max_spacing = spacing
|
||||||
|
return (
|
||||||
|
max(1, int(min_spacing * scale_factor)),
|
||||||
|
max(2, int(max_spacing * scale_factor))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BidirectionalLayouter:
|
||||||
|
"""
|
||||||
|
Core layout engine supporting both forward and backward page rendering.
|
||||||
|
Handles font scaling and maintains position state.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, blocks: List[Block], page_style: PageStyle, page_size: Tuple[int, int] = (800, 600)):
|
||||||
|
self.blocks = blocks
|
||||||
|
self.page_style = page_style
|
||||||
|
self.page_size = page_size
|
||||||
|
self.chapter_navigator = ChapterNavigator(blocks)
|
||||||
|
|
||||||
|
def render_page_forward(self, position: RenderingPosition, font_scale: float = 1.0) -> Tuple[Page, RenderingPosition]:
|
||||||
|
"""
|
||||||
|
Render a page starting from the given position, moving forward through the document.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
position: Starting position in document
|
||||||
|
font_scale: Font scaling factor
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (rendered_page, next_position)
|
||||||
|
"""
|
||||||
|
page = Page(size=self.page_size, style=self.page_style)
|
||||||
|
current_pos = position.copy()
|
||||||
|
|
||||||
|
# Start laying out blocks from the current position
|
||||||
|
while current_pos.chapter_index < len(self.blocks) and page.free_space()[1] > 0:
|
||||||
|
block = self.blocks[current_pos.block_index]
|
||||||
|
|
||||||
|
# Apply font scaling to the block
|
||||||
|
scaled_block = self._scale_block_fonts(block, font_scale)
|
||||||
|
|
||||||
|
# Try to fit the block on the current page
|
||||||
|
success, new_pos = self._layout_block_on_page(scaled_block, page, current_pos, font_scale)
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
# Block doesn't fit, we're done with this page
|
||||||
|
break
|
||||||
|
|
||||||
|
current_pos = new_pos
|
||||||
|
|
||||||
|
return page, current_pos
|
||||||
|
|
||||||
|
def render_page_backward(self, end_position: RenderingPosition, font_scale: float = 1.0) -> Tuple[Page, RenderingPosition]:
|
||||||
|
"""
|
||||||
|
Render a page that ends at the given position, filling backward.
|
||||||
|
Critical for "previous page" navigation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
end_position: Position where page should end
|
||||||
|
font_scale: Font scaling factor
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (rendered_page, start_position)
|
||||||
|
"""
|
||||||
|
# This is a complex operation that requires iterative refinement
|
||||||
|
# We'll start with an estimated start position and refine it
|
||||||
|
|
||||||
|
estimated_start = self._estimate_page_start(end_position, font_scale)
|
||||||
|
|
||||||
|
# Render forward from estimated start and see if we reach the target
|
||||||
|
page, actual_end = self.render_page_forward(estimated_start, font_scale)
|
||||||
|
|
||||||
|
# If we overshot or undershot, adjust and try again
|
||||||
|
# This is a simplified implementation - a full version would be more sophisticated
|
||||||
|
if self._position_compare(actual_end, end_position) != 0:
|
||||||
|
# Adjust estimate and try again (simplified)
|
||||||
|
estimated_start = self._adjust_start_estimate(estimated_start, end_position, actual_end)
|
||||||
|
page, actual_end = self.render_page_forward(estimated_start, font_scale)
|
||||||
|
|
||||||
|
return page, estimated_start
|
||||||
|
|
||||||
|
def _scale_block_fonts(self, block: Block, font_scale: float) -> Block:
|
||||||
|
"""Apply font scaling to all fonts in a block"""
|
||||||
|
if font_scale == 1.0:
|
||||||
|
return block
|
||||||
|
|
||||||
|
# This is a simplified implementation
|
||||||
|
# In practice, we'd need to handle each block type appropriately
|
||||||
|
if isinstance(block, Paragraph):
|
||||||
|
scaled_block = Paragraph(FontScaler.scale_font(block.style, font_scale))
|
||||||
|
for word in block.words():
|
||||||
|
if isinstance(word, Word):
|
||||||
|
scaled_word = Word(word.text, FontScaler.scale_font(word.style, font_scale))
|
||||||
|
scaled_block.add_word(scaled_word)
|
||||||
|
return scaled_block
|
||||||
|
|
||||||
|
return block
|
||||||
|
|
||||||
|
def _layout_block_on_page(self, block: Block, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]:
|
||||||
|
"""
|
||||||
|
Try to layout a block on the page starting from the given position.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (success, new_position)
|
||||||
|
"""
|
||||||
|
if isinstance(block, Paragraph):
|
||||||
|
return self._layout_paragraph_on_page(block, page, position, font_scale)
|
||||||
|
elif isinstance(block, Heading):
|
||||||
|
return self._layout_heading_on_page(block, page, position, font_scale)
|
||||||
|
elif isinstance(block, Table):
|
||||||
|
return self._layout_table_on_page(block, page, position, font_scale)
|
||||||
|
elif isinstance(block, HList):
|
||||||
|
return self._layout_list_on_page(block, page, position, font_scale)
|
||||||
|
else:
|
||||||
|
# Skip unknown block types
|
||||||
|
new_pos = position.copy()
|
||||||
|
new_pos.block_index += 1
|
||||||
|
return True, new_pos
|
||||||
|
|
||||||
|
def _layout_paragraph_on_page(self, paragraph: Paragraph, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]:
|
||||||
|
"""Layout a paragraph on the page with font scaling support"""
|
||||||
|
# This would integrate with the existing paragraph_layouter but with font scaling
|
||||||
|
# For now, this is a placeholder implementation
|
||||||
|
|
||||||
|
# Calculate scaled line height
|
||||||
|
line_height = int(paragraph.style.font_size * font_scale * 1.2) # 1.2 is line spacing factor
|
||||||
|
|
||||||
|
if not page.can_fit_line(line_height):
|
||||||
|
return False, position
|
||||||
|
|
||||||
|
# Create a line and try to fit words
|
||||||
|
y_cursor = page._current_y_offset
|
||||||
|
x_cursor = page.border_size
|
||||||
|
|
||||||
|
# Scale word spacing constraints
|
||||||
|
word_spacing = FontScaler.scale_word_spacing((5, 15), font_scale) # Default spacing
|
||||||
|
|
||||||
|
line = Line(
|
||||||
|
spacing=word_spacing,
|
||||||
|
origin=(x_cursor, y_cursor),
|
||||||
|
size=(page.available_width, line_height),
|
||||||
|
draw=page.draw,
|
||||||
|
font=FontScaler.scale_font(paragraph.style, font_scale)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add words starting from position.word_index
|
||||||
|
words_added = 0
|
||||||
|
for i, word in enumerate(paragraph.words[position.word_index:], start=position.word_index):
|
||||||
|
success, overflow = line.add_word(word)
|
||||||
|
if not success:
|
||||||
|
break
|
||||||
|
words_added += 1
|
||||||
|
|
||||||
|
if words_added > 0:
|
||||||
|
page.add_child(line)
|
||||||
|
page._current_y_offset += line_height
|
||||||
|
|
||||||
|
new_pos = position.copy()
|
||||||
|
new_pos.word_index += words_added
|
||||||
|
|
||||||
|
# If we finished the paragraph, move to next block
|
||||||
|
if new_pos.word_index >= len(paragraph.words):
|
||||||
|
new_pos.block_index += 1
|
||||||
|
new_pos.word_index = 0
|
||||||
|
|
||||||
|
return True, new_pos
|
||||||
|
|
||||||
|
return False, position
|
||||||
|
|
||||||
|
def _layout_heading_on_page(self, heading: Heading, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]:
|
||||||
|
"""Layout a heading on the page"""
|
||||||
|
# Similar to paragraph but with heading-specific styling
|
||||||
|
return self._layout_paragraph_on_page(heading, page, position, font_scale)
|
||||||
|
|
||||||
|
def _layout_table_on_page(self, table: Table, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]:
|
||||||
|
"""Layout a table on the page with column fitting and row continuation"""
|
||||||
|
# This is a complex operation that would need full table layout logic
|
||||||
|
# For now, skip tables
|
||||||
|
new_pos = position.copy()
|
||||||
|
new_pos.block_index += 1
|
||||||
|
new_pos.table_row = 0
|
||||||
|
new_pos.table_col = 0
|
||||||
|
return True, new_pos
|
||||||
|
|
||||||
|
def _layout_list_on_page(self, hlist: HList, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]:
|
||||||
|
"""Layout a list on the page"""
|
||||||
|
# This would need list-specific layout logic
|
||||||
|
# For now, skip lists
|
||||||
|
new_pos = position.copy()
|
||||||
|
new_pos.block_index += 1
|
||||||
|
new_pos.list_item_index = 0
|
||||||
|
return True, new_pos
|
||||||
|
|
||||||
|
def _estimate_page_start(self, end_position: RenderingPosition, font_scale: float) -> RenderingPosition:
|
||||||
|
"""Estimate where a page should start to end at the given position"""
|
||||||
|
# This is a simplified heuristic - a full implementation would be more sophisticated
|
||||||
|
estimated_start = end_position.copy()
|
||||||
|
|
||||||
|
# Move back by an estimated number of blocks that would fit on a page
|
||||||
|
estimated_blocks_per_page = max(1, int(10 / font_scale)) # Rough estimate
|
||||||
|
estimated_start.block_index = max(0, end_position.block_index - estimated_blocks_per_page)
|
||||||
|
estimated_start.word_index = 0
|
||||||
|
|
||||||
|
return estimated_start
|
||||||
|
|
||||||
|
def _adjust_start_estimate(self, current_start: RenderingPosition, target_end: RenderingPosition, actual_end: RenderingPosition) -> RenderingPosition:
|
||||||
|
"""Adjust start position estimate based on overshoot/undershoot"""
|
||||||
|
# Simplified adjustment logic
|
||||||
|
adjusted = current_start.copy()
|
||||||
|
|
||||||
|
comparison = self._position_compare(actual_end, target_end)
|
||||||
|
if comparison > 0: # Overshot
|
||||||
|
adjusted.block_index = max(0, adjusted.block_index + 1)
|
||||||
|
elif comparison < 0: # Undershot
|
||||||
|
adjusted.block_index = max(0, adjusted.block_index - 1)
|
||||||
|
|
||||||
|
return adjusted
|
||||||
|
|
||||||
|
def _position_compare(self, pos1: RenderingPosition, pos2: RenderingPosition) -> int:
|
||||||
|
"""Compare two positions (-1: pos1 < pos2, 0: equal, 1: pos1 > pos2)"""
|
||||||
|
if pos1.chapter_index != pos2.chapter_index:
|
||||||
|
return 1 if pos1.chapter_index > pos2.chapter_index else -1
|
||||||
|
if pos1.block_index != pos2.block_index:
|
||||||
|
return 1 if pos1.block_index > pos2.block_index else -1
|
||||||
|
if pos1.word_index != pos2.word_index:
|
||||||
|
return 1 if pos1.word_index > pos2.word_index else -1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
# Add can_fit_line method to Page class if it doesn't exist
|
||||||
|
def _add_page_methods():
|
||||||
|
"""Add missing methods to Page class"""
|
||||||
|
if not hasattr(Page, 'can_fit_line'):
|
||||||
|
def can_fit_line(self, line_height: int) -> bool:
|
||||||
|
"""Check if a line of given height can fit on the page"""
|
||||||
|
available_height = self.content_size[1] - self._current_y_offset
|
||||||
|
return available_height >= line_height
|
||||||
|
|
||||||
|
Page.can_fit_line = can_fit_line
|
||||||
|
|
||||||
|
if not hasattr(Page, 'available_width'):
|
||||||
|
@property
|
||||||
|
def available_width(self) -> int:
|
||||||
|
"""Get available width for content"""
|
||||||
|
return self.content_size[0]
|
||||||
|
|
||||||
|
Page.available_width = available_width
|
||||||
|
|
||||||
|
# Apply the page methods
|
||||||
|
_add_page_methods()
|
||||||
493
pyWebLayout/layout/ereader_manager.py
Normal file
493
pyWebLayout/layout/ereader_manager.py
Normal file
@ -0,0 +1,493 @@
|
|||||||
|
"""
|
||||||
|
High-performance ereader layout manager with sub-second page rendering.
|
||||||
|
|
||||||
|
This module provides the main interface for ereader applications, combining
|
||||||
|
position tracking, font scaling, chapter navigation, and intelligent page buffering
|
||||||
|
into a unified, easy-to-use API.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
from typing import List, Dict, Optional, Tuple, Any, Callable
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from .ereader_layout import RenderingPosition, ChapterNavigator, ChapterInfo
|
||||||
|
from .page_buffer import BufferedPageRenderer
|
||||||
|
from pyWebLayout.abstract.block import Block, HeadingLevel
|
||||||
|
from pyWebLayout.concrete.page import Page
|
||||||
|
from pyWebLayout.style.page_style import PageStyle
|
||||||
|
|
||||||
|
|
||||||
|
class BookmarkManager:
|
||||||
|
"""
|
||||||
|
Manages bookmarks and reading position persistence for ereader applications.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, document_id: str, bookmarks_dir: str = "bookmarks"):
|
||||||
|
"""
|
||||||
|
Initialize bookmark manager.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document_id: Unique identifier for the document
|
||||||
|
bookmarks_dir: Directory to store bookmark files
|
||||||
|
"""
|
||||||
|
self.document_id = document_id
|
||||||
|
self.bookmarks_dir = Path(bookmarks_dir)
|
||||||
|
self.bookmarks_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
self.bookmarks_file = self.bookmarks_dir / f"{document_id}_bookmarks.json"
|
||||||
|
self.position_file = self.bookmarks_dir / f"{document_id}_position.json"
|
||||||
|
|
||||||
|
self._bookmarks: Dict[str, RenderingPosition] = {}
|
||||||
|
self._load_bookmarks()
|
||||||
|
|
||||||
|
def _load_bookmarks(self):
|
||||||
|
"""Load bookmarks from file"""
|
||||||
|
if self.bookmarks_file.exists():
|
||||||
|
try:
|
||||||
|
with open(self.bookmarks_file, 'r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
self._bookmarks = {
|
||||||
|
name: RenderingPosition.from_dict(pos_data)
|
||||||
|
for name, pos_data in data.items()
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to load bookmarks: {e}")
|
||||||
|
self._bookmarks = {}
|
||||||
|
|
||||||
|
def _save_bookmarks(self):
|
||||||
|
"""Save bookmarks to file"""
|
||||||
|
try:
|
||||||
|
data = {
|
||||||
|
name: position.to_dict()
|
||||||
|
for name, position in self._bookmarks.items()
|
||||||
|
}
|
||||||
|
with open(self.bookmarks_file, 'w') as f:
|
||||||
|
json.dump(data, f, indent=2)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to save bookmarks: {e}")
|
||||||
|
|
||||||
|
def add_bookmark(self, name: str, position: RenderingPosition):
|
||||||
|
"""
|
||||||
|
Add a bookmark at the given position.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Bookmark name
|
||||||
|
position: Position to bookmark
|
||||||
|
"""
|
||||||
|
self._bookmarks[name] = position
|
||||||
|
self._save_bookmarks()
|
||||||
|
|
||||||
|
def remove_bookmark(self, name: str) -> bool:
|
||||||
|
"""
|
||||||
|
Remove a bookmark.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Bookmark name to remove
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if bookmark was removed, False if not found
|
||||||
|
"""
|
||||||
|
if name in self._bookmarks:
|
||||||
|
del self._bookmarks[name]
|
||||||
|
self._save_bookmarks()
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_bookmark(self, name: str) -> Optional[RenderingPosition]:
|
||||||
|
"""
|
||||||
|
Get a bookmark position.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Bookmark name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Bookmark position or None if not found
|
||||||
|
"""
|
||||||
|
return self._bookmarks.get(name)
|
||||||
|
|
||||||
|
def list_bookmarks(self) -> List[Tuple[str, RenderingPosition]]:
|
||||||
|
"""
|
||||||
|
Get all bookmarks.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (name, position) tuples
|
||||||
|
"""
|
||||||
|
return list(self._bookmarks.items())
|
||||||
|
|
||||||
|
def save_reading_position(self, position: RenderingPosition):
|
||||||
|
"""
|
||||||
|
Save the current reading position.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
position: Current reading position
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(self.position_file, 'w') as f:
|
||||||
|
json.dump(position.to_dict(), f, indent=2)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to save reading position: {e}")
|
||||||
|
|
||||||
|
def load_reading_position(self) -> Optional[RenderingPosition]:
|
||||||
|
"""
|
||||||
|
Load the last reading position.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Last reading position or None if not found
|
||||||
|
"""
|
||||||
|
if self.position_file.exists():
|
||||||
|
try:
|
||||||
|
with open(self.position_file, 'r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return RenderingPosition.from_dict(data)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to load reading position: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class EreaderLayoutManager:
|
||||||
|
"""
|
||||||
|
High-level ereader layout manager providing a complete interface for ereader applications.
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Sub-second page rendering with intelligent buffering
|
||||||
|
- Font scaling support
|
||||||
|
- Chapter navigation
|
||||||
|
- Bookmark management
|
||||||
|
- Position persistence
|
||||||
|
- Progress tracking
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
blocks: List[Block],
|
||||||
|
page_size: Tuple[int, int],
|
||||||
|
document_id: str = "default",
|
||||||
|
buffer_size: int = 5,
|
||||||
|
page_style: Optional[PageStyle] = None):
|
||||||
|
"""
|
||||||
|
Initialize the ereader layout manager.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
blocks: Document blocks to render
|
||||||
|
page_size: Page size (width, height) in pixels
|
||||||
|
document_id: Unique identifier for the document (for bookmarks/position)
|
||||||
|
buffer_size: Number of pages to cache in each direction
|
||||||
|
page_style: Custom page styling (uses default if None)
|
||||||
|
"""
|
||||||
|
self.blocks = blocks
|
||||||
|
self.page_size = page_size
|
||||||
|
self.document_id = document_id
|
||||||
|
|
||||||
|
# Initialize page style
|
||||||
|
if page_style is None:
|
||||||
|
page_style = PageStyle()
|
||||||
|
self.page_style = page_style
|
||||||
|
|
||||||
|
# Initialize core components
|
||||||
|
self.renderer = BufferedPageRenderer(blocks, page_style, buffer_size, page_size)
|
||||||
|
self.chapter_navigator = ChapterNavigator(blocks)
|
||||||
|
self.bookmark_manager = BookmarkManager(document_id)
|
||||||
|
|
||||||
|
# Current state
|
||||||
|
self.current_position = RenderingPosition()
|
||||||
|
self.font_scale = 1.0
|
||||||
|
|
||||||
|
# Load last reading position if available
|
||||||
|
saved_position = self.bookmark_manager.load_reading_position()
|
||||||
|
if saved_position:
|
||||||
|
self.current_position = saved_position
|
||||||
|
|
||||||
|
# Callbacks for UI updates
|
||||||
|
self.position_changed_callback: Optional[Callable[[RenderingPosition], None]] = None
|
||||||
|
self.chapter_changed_callback: Optional[Callable[[Optional[ChapterInfo]], None]] = None
|
||||||
|
|
||||||
|
def set_position_changed_callback(self, callback: Callable[[RenderingPosition], None]):
|
||||||
|
"""Set callback for position changes"""
|
||||||
|
self.position_changed_callback = callback
|
||||||
|
|
||||||
|
def set_chapter_changed_callback(self, callback: Callable[[Optional[ChapterInfo]], None]):
|
||||||
|
"""Set callback for chapter changes"""
|
||||||
|
self.chapter_changed_callback = callback
|
||||||
|
|
||||||
|
def _notify_position_changed(self):
|
||||||
|
"""Notify UI of position change"""
|
||||||
|
if self.position_changed_callback:
|
||||||
|
self.position_changed_callback(self.current_position)
|
||||||
|
|
||||||
|
# Check if chapter changed
|
||||||
|
current_chapter = self.chapter_navigator.get_current_chapter(self.current_position)
|
||||||
|
if self.chapter_changed_callback:
|
||||||
|
self.chapter_changed_callback(current_chapter)
|
||||||
|
|
||||||
|
# Auto-save reading position
|
||||||
|
self.bookmark_manager.save_reading_position(self.current_position)
|
||||||
|
|
||||||
|
def get_current_page(self) -> Page:
|
||||||
|
"""
|
||||||
|
Get the page at the current reading position.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Rendered page
|
||||||
|
"""
|
||||||
|
page, _ = self.renderer.render_page(self.current_position, self.font_scale)
|
||||||
|
return page
|
||||||
|
|
||||||
|
def next_page(self) -> Optional[Page]:
|
||||||
|
"""
|
||||||
|
Advance to the next page.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Next page or None if at end of document
|
||||||
|
"""
|
||||||
|
page, next_position = self.renderer.render_page(self.current_position, self.font_scale)
|
||||||
|
|
||||||
|
# Check if we made progress
|
||||||
|
if next_position != self.current_position:
|
||||||
|
self.current_position = next_position
|
||||||
|
self._notify_position_changed()
|
||||||
|
return self.get_current_page()
|
||||||
|
|
||||||
|
return None # At end of document
|
||||||
|
|
||||||
|
def previous_page(self) -> Optional[Page]:
|
||||||
|
"""
|
||||||
|
Go to the previous page.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Previous page or None if at beginning of document
|
||||||
|
"""
|
||||||
|
if self._is_at_beginning():
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Use backward rendering to find the previous page
|
||||||
|
page, start_position = self.renderer.render_page_backward(self.current_position, self.font_scale)
|
||||||
|
|
||||||
|
if start_position != self.current_position:
|
||||||
|
self.current_position = start_position
|
||||||
|
self._notify_position_changed()
|
||||||
|
return page
|
||||||
|
|
||||||
|
return None # At beginning of document
|
||||||
|
|
||||||
|
def _is_at_beginning(self) -> bool:
|
||||||
|
"""Check if we're at the beginning of the document"""
|
||||||
|
return (self.current_position.chapter_index == 0 and
|
||||||
|
self.current_position.block_index == 0 and
|
||||||
|
self.current_position.word_index == 0)
|
||||||
|
|
||||||
|
def jump_to_position(self, position: RenderingPosition) -> Page:
|
||||||
|
"""
|
||||||
|
Jump to a specific position in the document.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
position: Position to jump to
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Page at the new position
|
||||||
|
"""
|
||||||
|
self.current_position = position
|
||||||
|
self._notify_position_changed()
|
||||||
|
return self.get_current_page()
|
||||||
|
|
||||||
|
def jump_to_chapter(self, chapter_title: str) -> Optional[Page]:
|
||||||
|
"""
|
||||||
|
Jump to a specific chapter by title.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chapter_title: Title of the chapter to jump to
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Page at chapter start or None if chapter not found
|
||||||
|
"""
|
||||||
|
position = self.chapter_navigator.get_chapter_position(chapter_title)
|
||||||
|
if position:
|
||||||
|
return self.jump_to_position(position)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def jump_to_chapter_index(self, chapter_index: int) -> Optional[Page]:
|
||||||
|
"""
|
||||||
|
Jump to a chapter by index.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chapter_index: Index of the chapter (0-based)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Page at chapter start or None if index invalid
|
||||||
|
"""
|
||||||
|
chapters = self.chapter_navigator.chapters
|
||||||
|
if 0 <= chapter_index < len(chapters):
|
||||||
|
return self.jump_to_position(chapters[chapter_index].position)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def set_font_scale(self, scale: float) -> Page:
|
||||||
|
"""
|
||||||
|
Change the font scale and re-render current page.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
scale: Font scaling factor (1.0 = normal, 2.0 = double size, etc.)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Re-rendered page with new font scale
|
||||||
|
"""
|
||||||
|
if scale != self.font_scale:
|
||||||
|
self.font_scale = scale
|
||||||
|
# The renderer will handle cache invalidation
|
||||||
|
|
||||||
|
return self.get_current_page()
|
||||||
|
|
||||||
|
def get_font_scale(self) -> float:
|
||||||
|
"""Get the current font scale"""
|
||||||
|
return self.font_scale
|
||||||
|
|
||||||
|
def get_table_of_contents(self) -> List[Tuple[str, HeadingLevel, RenderingPosition]]:
|
||||||
|
"""
|
||||||
|
Get the table of contents.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (title, level, position) tuples
|
||||||
|
"""
|
||||||
|
return self.chapter_navigator.get_table_of_contents()
|
||||||
|
|
||||||
|
def get_current_chapter(self) -> Optional[ChapterInfo]:
|
||||||
|
"""
|
||||||
|
Get information about the current chapter.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Current chapter info or None if no chapters
|
||||||
|
"""
|
||||||
|
return self.chapter_navigator.get_current_chapter(self.current_position)
|
||||||
|
|
||||||
|
def add_bookmark(self, name: str) -> bool:
|
||||||
|
"""
|
||||||
|
Add a bookmark at the current position.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Bookmark name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if bookmark was added successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.bookmark_manager.add_bookmark(name, self.current_position)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def remove_bookmark(self, name: str) -> bool:
|
||||||
|
"""
|
||||||
|
Remove a bookmark.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Bookmark name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if bookmark was removed
|
||||||
|
"""
|
||||||
|
return self.bookmark_manager.remove_bookmark(name)
|
||||||
|
|
||||||
|
def jump_to_bookmark(self, name: str) -> Optional[Page]:
|
||||||
|
"""
|
||||||
|
Jump to a bookmark.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Bookmark name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Page at bookmark position or None if bookmark not found
|
||||||
|
"""
|
||||||
|
position = self.bookmark_manager.get_bookmark(name)
|
||||||
|
if position:
|
||||||
|
return self.jump_to_position(position)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def list_bookmarks(self) -> List[Tuple[str, RenderingPosition]]:
|
||||||
|
"""
|
||||||
|
Get all bookmarks.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (name, position) tuples
|
||||||
|
"""
|
||||||
|
return self.bookmark_manager.list_bookmarks()
|
||||||
|
|
||||||
|
def get_reading_progress(self) -> float:
|
||||||
|
"""
|
||||||
|
Get reading progress as a percentage.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Progress from 0.0 to 1.0
|
||||||
|
"""
|
||||||
|
if not self.blocks:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# Simple progress calculation based on block index
|
||||||
|
# A more sophisticated version would consider word positions
|
||||||
|
total_blocks = len(self.blocks)
|
||||||
|
current_block = min(self.current_position.block_index, total_blocks - 1)
|
||||||
|
|
||||||
|
return current_block / max(1, total_blocks - 1)
|
||||||
|
|
||||||
|
def get_position_info(self) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Get detailed information about the current position.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with position details
|
||||||
|
"""
|
||||||
|
current_chapter = self.get_current_chapter()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'position': self.current_position.to_dict(),
|
||||||
|
'chapter': {
|
||||||
|
'title': current_chapter.title if current_chapter else None,
|
||||||
|
'level': current_chapter.level if current_chapter else None,
|
||||||
|
'index': current_chapter.block_index if current_chapter else None
|
||||||
|
},
|
||||||
|
'progress': self.get_reading_progress(),
|
||||||
|
'font_scale': self.font_scale,
|
||||||
|
'page_size': self.page_size
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_cache_stats(self) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Get cache statistics for debugging/monitoring.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with cache statistics
|
||||||
|
"""
|
||||||
|
return self.renderer.get_cache_stats()
|
||||||
|
|
||||||
|
def shutdown(self):
|
||||||
|
"""
|
||||||
|
Shutdown the ereader manager and clean up resources.
|
||||||
|
Call this when the application is closing.
|
||||||
|
"""
|
||||||
|
# Save current position
|
||||||
|
self.bookmark_manager.save_reading_position(self.current_position)
|
||||||
|
|
||||||
|
# Shutdown renderer and buffer
|
||||||
|
self.renderer.shutdown()
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
"""Cleanup on destruction"""
|
||||||
|
self.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
# Convenience function for quick setup
|
||||||
|
def create_ereader_manager(blocks: List[Block],
|
||||||
|
page_size: Tuple[int, int],
|
||||||
|
document_id: str = "default",
|
||||||
|
**kwargs) -> EreaderLayoutManager:
|
||||||
|
"""
|
||||||
|
Convenience function to create an ereader manager with sensible defaults.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
blocks: Document blocks to render
|
||||||
|
page_size: Page size (width, height) in pixels
|
||||||
|
document_id: Unique identifier for the document
|
||||||
|
**kwargs: Additional arguments passed to EreaderLayoutManager
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured EreaderLayoutManager instance
|
||||||
|
"""
|
||||||
|
return EreaderLayoutManager(blocks, page_size, document_id, **kwargs)
|
||||||
411
pyWebLayout/layout/page_buffer.py
Normal file
411
pyWebLayout/layout/page_buffer.py
Normal file
@ -0,0 +1,411 @@
|
|||||||
|
"""
|
||||||
|
Multi-process page buffering system for high-performance ereader navigation.
|
||||||
|
|
||||||
|
This module provides intelligent page caching with background rendering using
|
||||||
|
multiprocessing to achieve sub-second page navigation performance.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
from typing import Dict, Optional, List, Tuple, Any
|
||||||
|
from collections import OrderedDict
|
||||||
|
import multiprocessing
|
||||||
|
from concurrent.futures import ProcessPoolExecutor, as_completed, Future
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import pickle
|
||||||
|
from dataclasses import asdict
|
||||||
|
|
||||||
|
from .ereader_layout import RenderingPosition, BidirectionalLayouter
|
||||||
|
from pyWebLayout.concrete.page import Page
|
||||||
|
from pyWebLayout.abstract.block import Block
|
||||||
|
from pyWebLayout.style.page_style import PageStyle
|
||||||
|
|
||||||
|
|
||||||
|
def _render_page_worker(args: Tuple[List[Block], PageStyle, RenderingPosition, float, bool]) -> Tuple[RenderingPosition, bytes, RenderingPosition]:
|
||||||
|
"""
|
||||||
|
Worker function for multiprocess page rendering.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
args: Tuple of (blocks, page_style, position, font_scale, is_backward)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (original_position, pickled_page, next_position)
|
||||||
|
"""
|
||||||
|
blocks, page_style, position, font_scale, is_backward = args
|
||||||
|
|
||||||
|
layouter = BidirectionalLayouter(blocks, page_style)
|
||||||
|
|
||||||
|
if is_backward:
|
||||||
|
page, next_pos = layouter.render_page_backward(position, font_scale)
|
||||||
|
else:
|
||||||
|
page, next_pos = layouter.render_page_forward(position, font_scale)
|
||||||
|
|
||||||
|
# Serialize the page for inter-process communication
|
||||||
|
pickled_page = pickle.dumps(page)
|
||||||
|
|
||||||
|
return position, pickled_page, next_pos
|
||||||
|
|
||||||
|
|
||||||
|
class PageBuffer:
|
||||||
|
"""
|
||||||
|
Intelligent page caching system with LRU eviction and background rendering.
|
||||||
|
Maintains separate forward and backward buffers for optimal navigation performance.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, buffer_size: int = 5, max_workers: int = 4):
|
||||||
|
"""
|
||||||
|
Initialize the page buffer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
buffer_size: Number of pages to cache in each direction
|
||||||
|
max_workers: Maximum number of worker processes for background rendering
|
||||||
|
"""
|
||||||
|
self.buffer_size = buffer_size
|
||||||
|
self.max_workers = max_workers
|
||||||
|
|
||||||
|
# LRU caches for forward and backward pages
|
||||||
|
self.forward_buffer: OrderedDict[RenderingPosition, Page] = OrderedDict()
|
||||||
|
self.backward_buffer: OrderedDict[RenderingPosition, Page] = OrderedDict()
|
||||||
|
|
||||||
|
# Position tracking for next/previous positions
|
||||||
|
self.position_map: Dict[RenderingPosition, RenderingPosition] = {} # current -> next
|
||||||
|
self.reverse_position_map: Dict[RenderingPosition, RenderingPosition] = {} # current -> previous
|
||||||
|
|
||||||
|
# Background rendering
|
||||||
|
self.executor: Optional[ProcessPoolExecutor] = None
|
||||||
|
self.pending_renders: Dict[RenderingPosition, Future] = {}
|
||||||
|
self.render_lock = threading.Lock()
|
||||||
|
|
||||||
|
# Document state
|
||||||
|
self.blocks: Optional[List[Block]] = None
|
||||||
|
self.page_style: Optional[PageStyle] = None
|
||||||
|
self.current_font_scale: float = 1.0
|
||||||
|
|
||||||
|
def initialize(self, blocks: List[Block], page_style: PageStyle, font_scale: float = 1.0):
|
||||||
|
"""
|
||||||
|
Initialize the buffer with document blocks and page style.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
blocks: Document blocks to render
|
||||||
|
page_style: Page styling configuration
|
||||||
|
font_scale: Current font scaling factor
|
||||||
|
"""
|
||||||
|
self.blocks = blocks
|
||||||
|
self.page_style = page_style
|
||||||
|
self.current_font_scale = font_scale
|
||||||
|
|
||||||
|
# Start the process pool
|
||||||
|
if self.executor is None:
|
||||||
|
self.executor = ProcessPoolExecutor(max_workers=self.max_workers)
|
||||||
|
|
||||||
|
def get_page(self, position: RenderingPosition) -> Optional[Page]:
|
||||||
|
"""
|
||||||
|
Get a cached page if available.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
position: Position to get page for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Cached page or None if not available
|
||||||
|
"""
|
||||||
|
# Check forward buffer first
|
||||||
|
if position in self.forward_buffer:
|
||||||
|
# Move to end (most recently used)
|
||||||
|
page = self.forward_buffer.pop(position)
|
||||||
|
self.forward_buffer[position] = page
|
||||||
|
return page
|
||||||
|
|
||||||
|
# Check backward buffer
|
||||||
|
if position in self.backward_buffer:
|
||||||
|
# Move to end (most recently used)
|
||||||
|
page = self.backward_buffer.pop(position)
|
||||||
|
self.backward_buffer[position] = page
|
||||||
|
return page
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def cache_page(self, position: RenderingPosition, page: Page, next_position: Optional[RenderingPosition] = None, is_backward: bool = False):
|
||||||
|
"""
|
||||||
|
Cache a rendered page with LRU eviction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
position: Position of the page
|
||||||
|
page: Rendered page to cache
|
||||||
|
next_position: Position of the next page (for forward navigation)
|
||||||
|
is_backward: Whether this is a backward-rendered page
|
||||||
|
"""
|
||||||
|
target_buffer = self.backward_buffer if is_backward else self.forward_buffer
|
||||||
|
|
||||||
|
# Add to cache
|
||||||
|
target_buffer[position] = page
|
||||||
|
|
||||||
|
# Track position relationships
|
||||||
|
if next_position:
|
||||||
|
if is_backward:
|
||||||
|
self.reverse_position_map[next_position] = position
|
||||||
|
else:
|
||||||
|
self.position_map[position] = next_position
|
||||||
|
|
||||||
|
# Evict oldest if buffer is full
|
||||||
|
if len(target_buffer) > self.buffer_size:
|
||||||
|
oldest_pos, _ = target_buffer.popitem(last=False)
|
||||||
|
# Clean up position maps
|
||||||
|
self.position_map.pop(oldest_pos, None)
|
||||||
|
self.reverse_position_map.pop(oldest_pos, None)
|
||||||
|
|
||||||
|
def start_background_rendering(self, current_position: RenderingPosition, direction: str = 'forward'):
|
||||||
|
"""
|
||||||
|
Start background rendering of upcoming pages.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current_position: Current reading position
|
||||||
|
direction: 'forward', 'backward', or 'both'
|
||||||
|
"""
|
||||||
|
if not self.blocks or not self.page_style or not self.executor:
|
||||||
|
return
|
||||||
|
|
||||||
|
with self.render_lock:
|
||||||
|
if direction in ['forward', 'both']:
|
||||||
|
self._queue_forward_renders(current_position)
|
||||||
|
|
||||||
|
if direction in ['backward', 'both']:
|
||||||
|
self._queue_backward_renders(current_position)
|
||||||
|
|
||||||
|
def _queue_forward_renders(self, start_position: RenderingPosition):
|
||||||
|
"""Queue forward page renders starting from the given position"""
|
||||||
|
current_pos = start_position
|
||||||
|
|
||||||
|
for i in range(self.buffer_size):
|
||||||
|
# Skip if already cached or being rendered
|
||||||
|
if current_pos in self.forward_buffer or current_pos in self.pending_renders:
|
||||||
|
# Try to get next position from cache
|
||||||
|
current_pos = self.position_map.get(current_pos)
|
||||||
|
if not current_pos:
|
||||||
|
break
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Queue render job
|
||||||
|
args = (self.blocks, self.page_style, current_pos, self.current_font_scale, False)
|
||||||
|
future = self.executor.submit(_render_page_worker, args)
|
||||||
|
self.pending_renders[current_pos] = future
|
||||||
|
|
||||||
|
# We don't know the next position yet, so we'll update it when the render completes
|
||||||
|
break
|
||||||
|
|
||||||
|
def _queue_backward_renders(self, start_position: RenderingPosition):
|
||||||
|
"""Queue backward page renders ending at the given position"""
|
||||||
|
current_pos = start_position
|
||||||
|
|
||||||
|
for i in range(self.buffer_size):
|
||||||
|
# Skip if already cached or being rendered
|
||||||
|
if current_pos in self.backward_buffer or current_pos in self.pending_renders:
|
||||||
|
# Try to get previous position from cache
|
||||||
|
current_pos = self.reverse_position_map.get(current_pos)
|
||||||
|
if not current_pos:
|
||||||
|
break
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Queue render job
|
||||||
|
args = (self.blocks, self.page_style, current_pos, self.current_font_scale, True)
|
||||||
|
future = self.executor.submit(_render_page_worker, args)
|
||||||
|
self.pending_renders[current_pos] = future
|
||||||
|
|
||||||
|
# We don't know the previous position yet, so we'll update it when the render completes
|
||||||
|
break
|
||||||
|
|
||||||
|
def check_completed_renders(self):
|
||||||
|
"""Check for completed background renders and cache the results"""
|
||||||
|
if not self.pending_renders:
|
||||||
|
return
|
||||||
|
|
||||||
|
completed = []
|
||||||
|
|
||||||
|
with self.render_lock:
|
||||||
|
for position, future in self.pending_renders.items():
|
||||||
|
if future.done():
|
||||||
|
try:
|
||||||
|
original_pos, pickled_page, next_pos = future.result()
|
||||||
|
|
||||||
|
# Deserialize the page
|
||||||
|
page = pickle.loads(pickled_page)
|
||||||
|
|
||||||
|
# Cache the page
|
||||||
|
self.cache_page(original_pos, page, next_pos, is_backward=False)
|
||||||
|
|
||||||
|
completed.append(position)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Background render failed for position {position}: {e}")
|
||||||
|
completed.append(position)
|
||||||
|
|
||||||
|
# Remove completed renders
|
||||||
|
for pos in completed:
|
||||||
|
self.pending_renders.pop(pos, None)
|
||||||
|
|
||||||
|
def invalidate_all(self):
|
||||||
|
"""Clear all cached pages and cancel pending renders"""
|
||||||
|
with self.render_lock:
|
||||||
|
# Cancel pending renders
|
||||||
|
for future in self.pending_renders.values():
|
||||||
|
future.cancel()
|
||||||
|
self.pending_renders.clear()
|
||||||
|
|
||||||
|
# Clear caches
|
||||||
|
self.forward_buffer.clear()
|
||||||
|
self.backward_buffer.clear()
|
||||||
|
self.position_map.clear()
|
||||||
|
self.reverse_position_map.clear()
|
||||||
|
|
||||||
|
def set_font_scale(self, font_scale: float):
|
||||||
|
"""
|
||||||
|
Update font scale and invalidate cache.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
font_scale: New font scaling factor
|
||||||
|
"""
|
||||||
|
if font_scale != self.current_font_scale:
|
||||||
|
self.current_font_scale = font_scale
|
||||||
|
self.invalidate_all()
|
||||||
|
|
||||||
|
def get_cache_stats(self) -> Dict[str, Any]:
|
||||||
|
"""Get cache statistics for debugging/monitoring"""
|
||||||
|
return {
|
||||||
|
'forward_buffer_size': len(self.forward_buffer),
|
||||||
|
'backward_buffer_size': len(self.backward_buffer),
|
||||||
|
'pending_renders': len(self.pending_renders),
|
||||||
|
'position_mappings': len(self.position_map),
|
||||||
|
'reverse_position_mappings': len(self.reverse_position_map),
|
||||||
|
'current_font_scale': self.current_font_scale
|
||||||
|
}
|
||||||
|
|
||||||
|
def shutdown(self):
|
||||||
|
"""Shutdown the page buffer and clean up resources"""
|
||||||
|
if self.executor:
|
||||||
|
# Cancel pending renders
|
||||||
|
with self.render_lock:
|
||||||
|
for future in self.pending_renders.values():
|
||||||
|
future.cancel()
|
||||||
|
|
||||||
|
# Shutdown executor
|
||||||
|
self.executor.shutdown(wait=True)
|
||||||
|
self.executor = None
|
||||||
|
|
||||||
|
# Clear all caches
|
||||||
|
self.invalidate_all()
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
"""Cleanup on destruction"""
|
||||||
|
self.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
class BufferedPageRenderer:
|
||||||
|
"""
|
||||||
|
High-level interface for buffered page rendering with automatic background caching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, blocks: List[Block], page_style: PageStyle, buffer_size: int = 5, page_size: Tuple[int, int] = (800, 600)):
|
||||||
|
"""
|
||||||
|
Initialize the buffered renderer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
blocks: Document blocks to render
|
||||||
|
page_style: Page styling configuration
|
||||||
|
buffer_size: Number of pages to cache in each direction
|
||||||
|
page_size: Page size (width, height) in pixels
|
||||||
|
"""
|
||||||
|
self.layouter = BidirectionalLayouter(blocks, page_style, page_size)
|
||||||
|
self.buffer = PageBuffer(buffer_size)
|
||||||
|
self.buffer.initialize(blocks, page_style)
|
||||||
|
|
||||||
|
self.current_position = RenderingPosition()
|
||||||
|
self.font_scale = 1.0
|
||||||
|
|
||||||
|
def render_page(self, position: RenderingPosition, font_scale: float = 1.0) -> Tuple[Page, RenderingPosition]:
|
||||||
|
"""
|
||||||
|
Render a page with intelligent caching.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
position: Position to render from
|
||||||
|
font_scale: Font scaling factor
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (rendered_page, next_position)
|
||||||
|
"""
|
||||||
|
# Update font scale if changed
|
||||||
|
if font_scale != self.font_scale:
|
||||||
|
self.font_scale = font_scale
|
||||||
|
self.buffer.set_font_scale(font_scale)
|
||||||
|
|
||||||
|
# Check cache first
|
||||||
|
cached_page = self.buffer.get_page(position)
|
||||||
|
if cached_page:
|
||||||
|
# Get next position from position map
|
||||||
|
next_pos = self.buffer.position_map.get(position, position)
|
||||||
|
|
||||||
|
# Start background rendering for upcoming pages
|
||||||
|
self.buffer.start_background_rendering(position, 'forward')
|
||||||
|
|
||||||
|
return cached_page, next_pos
|
||||||
|
|
||||||
|
# Render the page directly
|
||||||
|
page, next_pos = self.layouter.render_page_forward(position, font_scale)
|
||||||
|
|
||||||
|
# Cache the result
|
||||||
|
self.buffer.cache_page(position, page, next_pos)
|
||||||
|
|
||||||
|
# Start background rendering
|
||||||
|
self.buffer.start_background_rendering(position, 'both')
|
||||||
|
|
||||||
|
# Check for completed background renders
|
||||||
|
self.buffer.check_completed_renders()
|
||||||
|
|
||||||
|
return page, next_pos
|
||||||
|
|
||||||
|
def render_page_backward(self, end_position: RenderingPosition, font_scale: float = 1.0) -> Tuple[Page, RenderingPosition]:
|
||||||
|
"""
|
||||||
|
Render a page ending at the given position with intelligent caching.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
end_position: Position where page should end
|
||||||
|
font_scale: Font scaling factor
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (rendered_page, start_position)
|
||||||
|
"""
|
||||||
|
# Update font scale if changed
|
||||||
|
if font_scale != self.font_scale:
|
||||||
|
self.font_scale = font_scale
|
||||||
|
self.buffer.set_font_scale(font_scale)
|
||||||
|
|
||||||
|
# Check cache first
|
||||||
|
cached_page = self.buffer.get_page(end_position)
|
||||||
|
if cached_page:
|
||||||
|
# Get previous position from reverse position map
|
||||||
|
prev_pos = self.buffer.reverse_position_map.get(end_position, end_position)
|
||||||
|
|
||||||
|
# Start background rendering for previous pages
|
||||||
|
self.buffer.start_background_rendering(end_position, 'backward')
|
||||||
|
|
||||||
|
return cached_page, prev_pos
|
||||||
|
|
||||||
|
# Render the page directly
|
||||||
|
page, start_pos = self.layouter.render_page_backward(end_position, font_scale)
|
||||||
|
|
||||||
|
# Cache the result
|
||||||
|
self.buffer.cache_page(start_pos, page, end_position, is_backward=True)
|
||||||
|
|
||||||
|
# Start background rendering
|
||||||
|
self.buffer.start_background_rendering(end_position, 'both')
|
||||||
|
|
||||||
|
# Check for completed background renders
|
||||||
|
self.buffer.check_completed_renders()
|
||||||
|
|
||||||
|
return page, start_pos
|
||||||
|
|
||||||
|
def get_cache_stats(self) -> Dict[str, Any]:
|
||||||
|
"""Get cache statistics"""
|
||||||
|
return self.buffer.get_cache_stats()
|
||||||
|
|
||||||
|
def shutdown(self):
|
||||||
|
"""Shutdown the renderer and clean up resources"""
|
||||||
|
self.buffer.shutdown()
|
||||||
481
pyWebLayout/layout/recursive_position.py
Normal file
481
pyWebLayout/layout/recursive_position.py
Normal file
@ -0,0 +1,481 @@
|
|||||||
|
"""
|
||||||
|
Recursive location index system for dynamic content positioning.
|
||||||
|
|
||||||
|
This module provides a flexible, hierarchical position tracking system that can
|
||||||
|
reference any type of content (words, images, table cells, list items, etc.)
|
||||||
|
in a nested document structure.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import List, Dict, Any, Optional, Union, Tuple
|
||||||
|
from enum import Enum
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
import shelve
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class ContentType(Enum):
|
||||||
|
"""Types of content that can be referenced in the position index"""
|
||||||
|
DOCUMENT = "document"
|
||||||
|
CHAPTER = "chapter"
|
||||||
|
BLOCK = "block"
|
||||||
|
PARAGRAPH = "paragraph"
|
||||||
|
HEADING = "heading"
|
||||||
|
TABLE = "table"
|
||||||
|
TABLE_ROW = "table_row"
|
||||||
|
TABLE_CELL = "table_cell"
|
||||||
|
LIST = "list"
|
||||||
|
LIST_ITEM = "list_item"
|
||||||
|
WORD = "word"
|
||||||
|
IMAGE = "image"
|
||||||
|
LINK = "link"
|
||||||
|
BUTTON = "button"
|
||||||
|
FORM_FIELD = "form_field"
|
||||||
|
LINE = "line" # Rendered line of text
|
||||||
|
PAGE = "page" # Rendered page
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LocationNode:
|
||||||
|
"""
|
||||||
|
A single node in the recursive location index.
|
||||||
|
Each node represents a position within a specific content type.
|
||||||
|
"""
|
||||||
|
content_type: ContentType
|
||||||
|
index: int = 0 # Position within this content type
|
||||||
|
offset: int = 0 # Offset within the indexed item (e.g., character offset in word)
|
||||||
|
metadata: Dict[str, Any] = field(default_factory=dict) # Additional context
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Serialize node to dictionary"""
|
||||||
|
return {
|
||||||
|
'content_type': self.content_type.value,
|
||||||
|
'index': self.index,
|
||||||
|
'offset': self.offset,
|
||||||
|
'metadata': self.metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: Dict[str, Any]) -> 'LocationNode':
|
||||||
|
"""Deserialize node from dictionary"""
|
||||||
|
return cls(
|
||||||
|
content_type=ContentType(data['content_type']),
|
||||||
|
index=data['index'],
|
||||||
|
offset=data['offset'],
|
||||||
|
metadata=data.get('metadata', {})
|
||||||
|
)
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
"""Human-readable representation"""
|
||||||
|
if self.offset > 0:
|
||||||
|
return f"{self.content_type.value}[{self.index}]+{self.offset}"
|
||||||
|
return f"{self.content_type.value}[{self.index}]"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RecursivePosition:
|
||||||
|
"""
|
||||||
|
Hierarchical position that can reference any nested content structure.
|
||||||
|
|
||||||
|
The path represents a traversal from document root to the specific location:
|
||||||
|
- Document -> Chapter[2] -> Block[5] -> Paragraph -> Word[12] -> Character[3]
|
||||||
|
- Document -> Chapter[1] -> Block[3] -> Table -> Row[2] -> Cell[1] -> Word[0]
|
||||||
|
- Document -> Chapter[0] -> Block[1] -> Image
|
||||||
|
"""
|
||||||
|
path: List[LocationNode] = field(default_factory=list)
|
||||||
|
rendering_metadata: Dict[str, Any] = field(default_factory=dict) # Font scale, page size, etc.
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
"""Ensure we always have at least a document root"""
|
||||||
|
if not self.path:
|
||||||
|
self.path = [LocationNode(ContentType.DOCUMENT)]
|
||||||
|
|
||||||
|
def copy(self) -> 'RecursivePosition':
|
||||||
|
"""Create a deep copy of this position"""
|
||||||
|
return RecursivePosition(
|
||||||
|
path=[LocationNode(node.content_type, node.index, node.offset, node.metadata.copy())
|
||||||
|
for node in self.path],
|
||||||
|
rendering_metadata=self.rendering_metadata.copy()
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_node(self, content_type: ContentType) -> Optional[LocationNode]:
|
||||||
|
"""Get the first node of a specific content type in the path"""
|
||||||
|
for node in self.path:
|
||||||
|
if node.content_type == content_type:
|
||||||
|
return node
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_nodes(self, content_type: ContentType) -> List[LocationNode]:
|
||||||
|
"""Get all nodes of a specific content type in the path"""
|
||||||
|
return [node for node in self.path if node.content_type == content_type]
|
||||||
|
|
||||||
|
def add_node(self, node: LocationNode) -> 'RecursivePosition':
|
||||||
|
"""Add a node to the path (returns self for chaining)"""
|
||||||
|
self.path.append(node)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def pop_node(self) -> Optional[LocationNode]:
|
||||||
|
"""Remove and return the last node in the path"""
|
||||||
|
if len(self.path) > 1: # Keep at least document root
|
||||||
|
return self.path.pop()
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_depth(self) -> int:
|
||||||
|
"""Get the depth of the position (number of nodes)"""
|
||||||
|
return len(self.path)
|
||||||
|
|
||||||
|
def get_leaf_node(self) -> LocationNode:
|
||||||
|
"""Get the deepest (most specific) node in the path"""
|
||||||
|
return self.path[-1] if self.path else LocationNode(ContentType.DOCUMENT)
|
||||||
|
|
||||||
|
def truncate_to_type(self, content_type: ContentType) -> 'RecursivePosition':
|
||||||
|
"""Truncate path to end at the first occurrence of the given content type"""
|
||||||
|
for i, node in enumerate(self.path):
|
||||||
|
if node.content_type == content_type:
|
||||||
|
self.path = self.path[:i+1]
|
||||||
|
break
|
||||||
|
return self
|
||||||
|
|
||||||
|
def is_ancestor_of(self, other: 'RecursivePosition') -> bool:
|
||||||
|
"""Check if this position is an ancestor of another position"""
|
||||||
|
if len(self.path) >= len(other.path):
|
||||||
|
return False
|
||||||
|
|
||||||
|
for i, node in enumerate(self.path):
|
||||||
|
if i >= len(other.path):
|
||||||
|
return False
|
||||||
|
other_node = other.path[i]
|
||||||
|
if (node.content_type != other_node.content_type or
|
||||||
|
node.index != other_node.index):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def is_descendant_of(self, other: 'RecursivePosition') -> bool:
|
||||||
|
"""Check if this position is a descendant of another position"""
|
||||||
|
return other.is_ancestor_of(self)
|
||||||
|
|
||||||
|
def get_common_ancestor(self, other: 'RecursivePosition') -> 'RecursivePosition':
|
||||||
|
"""Find the deepest common ancestor with another position"""
|
||||||
|
common_path = []
|
||||||
|
min_length = min(len(self.path), len(other.path))
|
||||||
|
|
||||||
|
for i in range(min_length):
|
||||||
|
if (self.path[i].content_type == other.path[i].content_type and
|
||||||
|
self.path[i].index == other.path[i].index):
|
||||||
|
common_path.append(self.path[i])
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
return RecursivePosition(path=common_path)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Serialize position to dictionary for JSON storage"""
|
||||||
|
return {
|
||||||
|
'path': [node.to_dict() for node in self.path],
|
||||||
|
'rendering_metadata': self.rendering_metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: Dict[str, Any]) -> 'RecursivePosition':
|
||||||
|
"""Deserialize position from dictionary"""
|
||||||
|
return cls(
|
||||||
|
path=[LocationNode.from_dict(node_data) for node_data in data['path']],
|
||||||
|
rendering_metadata=data.get('rendering_metadata', {})
|
||||||
|
)
|
||||||
|
|
||||||
|
def to_json(self) -> str:
|
||||||
|
"""Serialize to JSON string"""
|
||||||
|
return json.dumps(self.to_dict(), indent=2)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_json(cls, json_str: str) -> 'RecursivePosition':
|
||||||
|
"""Deserialize from JSON string"""
|
||||||
|
return cls.from_dict(json.loads(json_str))
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
"""Human-readable path representation"""
|
||||||
|
return " -> ".join(str(node) for node in self.path)
|
||||||
|
|
||||||
|
def __eq__(self, other) -> bool:
|
||||||
|
"""Check equality with another position"""
|
||||||
|
if not isinstance(other, RecursivePosition):
|
||||||
|
return False
|
||||||
|
return (self.path == other.path and
|
||||||
|
self.rendering_metadata == other.rendering_metadata)
|
||||||
|
|
||||||
|
def __hash__(self) -> int:
|
||||||
|
"""Make position hashable for use as dict key"""
|
||||||
|
path_tuple = tuple((node.content_type, node.index, node.offset) for node in self.path)
|
||||||
|
return hash(path_tuple)
|
||||||
|
|
||||||
|
|
||||||
|
class PositionBuilder:
|
||||||
|
"""
|
||||||
|
Builder class for constructing RecursivePosition objects fluently.
|
||||||
|
|
||||||
|
Example usage:
|
||||||
|
position = (PositionBuilder()
|
||||||
|
.chapter(2)
|
||||||
|
.block(5)
|
||||||
|
.paragraph()
|
||||||
|
.word(12, offset=3)
|
||||||
|
.build())
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._position = RecursivePosition()
|
||||||
|
|
||||||
|
def document(self, index: int = 0, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add document node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.DOCUMENT, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def chapter(self, index: int, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add chapter node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.CHAPTER, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def block(self, index: int, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add block node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.BLOCK, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def paragraph(self, index: int = 0, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add paragraph node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.PARAGRAPH, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def heading(self, index: int = 0, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add heading node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.HEADING, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def table(self, index: int = 0, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add table node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.TABLE, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def table_row(self, index: int, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add table row node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.TABLE_ROW, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def table_cell(self, index: int, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add table cell node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.TABLE_CELL, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def list(self, index: int = 0, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add list node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.LIST, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def list_item(self, index: int, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add list item node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.LIST_ITEM, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def word(self, index: int, offset: int = 0, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add word node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.WORD, index, offset, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def image(self, index: int = 0, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add image node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.IMAGE, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def link(self, index: int, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add link node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.LINK, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def button(self, index: int, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add button node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.BUTTON, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def form_field(self, index: int, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add form field node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.FORM_FIELD, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def line(self, index: int, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add rendered line node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.LINE, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def page(self, index: int, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add page node"""
|
||||||
|
self._position.add_node(LocationNode(ContentType.PAGE, index, metadata=metadata))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def with_rendering_metadata(self, **metadata) -> 'PositionBuilder':
|
||||||
|
"""Add rendering metadata (font scale, page size, etc.)"""
|
||||||
|
self._position.rendering_metadata.update(metadata)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def build(self) -> RecursivePosition:
|
||||||
|
"""Build and return the final position"""
|
||||||
|
return self._position
|
||||||
|
|
||||||
|
|
||||||
|
class PositionStorage:
|
||||||
|
"""
|
||||||
|
Storage manager for recursive positions supporting both JSON and shelf formats.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, storage_dir: str = "positions", use_shelf: bool = False):
|
||||||
|
"""
|
||||||
|
Initialize position storage.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
storage_dir: Directory to store position files
|
||||||
|
use_shelf: If True, use Python shelf format; if False, use JSON
|
||||||
|
"""
|
||||||
|
self.storage_dir = Path(storage_dir)
|
||||||
|
self.storage_dir.mkdir(exist_ok=True)
|
||||||
|
self.use_shelf = use_shelf
|
||||||
|
|
||||||
|
def save_position(self, document_id: str, position_name: str, position: RecursivePosition):
|
||||||
|
"""Save a position to storage"""
|
||||||
|
if self.use_shelf:
|
||||||
|
self._save_to_shelf(document_id, position_name, position)
|
||||||
|
else:
|
||||||
|
self._save_to_json(document_id, position_name, position)
|
||||||
|
|
||||||
|
def load_position(self, document_id: str, position_name: str) -> Optional[RecursivePosition]:
|
||||||
|
"""Load a position from storage"""
|
||||||
|
if self.use_shelf:
|
||||||
|
return self._load_from_shelf(document_id, position_name)
|
||||||
|
else:
|
||||||
|
return self._load_from_json(document_id, position_name)
|
||||||
|
|
||||||
|
def list_positions(self, document_id: str) -> List[str]:
|
||||||
|
"""List all saved positions for a document"""
|
||||||
|
if self.use_shelf:
|
||||||
|
return self._list_shelf_positions(document_id)
|
||||||
|
else:
|
||||||
|
return self._list_json_positions(document_id)
|
||||||
|
|
||||||
|
def delete_position(self, document_id: str, position_name: str) -> bool:
|
||||||
|
"""Delete a position from storage"""
|
||||||
|
if self.use_shelf:
|
||||||
|
return self._delete_from_shelf(document_id, position_name)
|
||||||
|
else:
|
||||||
|
return self._delete_from_json(document_id, position_name)
|
||||||
|
|
||||||
|
def _save_to_json(self, document_id: str, position_name: str, position: RecursivePosition):
|
||||||
|
"""Save position as JSON file"""
|
||||||
|
file_path = self.storage_dir / f"{document_id}_{position_name}.json"
|
||||||
|
with open(file_path, 'w') as f:
|
||||||
|
json.dump(position.to_dict(), f, indent=2)
|
||||||
|
|
||||||
|
def _load_from_json(self, document_id: str, position_name: str) -> Optional[RecursivePosition]:
|
||||||
|
"""Load position from JSON file"""
|
||||||
|
file_path = self.storage_dir / f"{document_id}_{position_name}.json"
|
||||||
|
if not file_path.exists():
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return RecursivePosition.from_dict(data)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _list_json_positions(self, document_id: str) -> List[str]:
|
||||||
|
"""List JSON position files for a document"""
|
||||||
|
pattern = f"{document_id}_*.json"
|
||||||
|
files = list(self.storage_dir.glob(pattern))
|
||||||
|
return [f.stem.replace(f"{document_id}_", "") for f in files]
|
||||||
|
|
||||||
|
def _delete_from_json(self, document_id: str, position_name: str) -> bool:
|
||||||
|
"""Delete JSON position file"""
|
||||||
|
file_path = self.storage_dir / f"{document_id}_{position_name}.json"
|
||||||
|
if file_path.exists():
|
||||||
|
file_path.unlink()
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _save_to_shelf(self, document_id: str, position_name: str, position: RecursivePosition):
|
||||||
|
"""Save position to shelf database"""
|
||||||
|
shelf_path = str(self.storage_dir / f"{document_id}.shelf")
|
||||||
|
with shelve.open(shelf_path) as shelf:
|
||||||
|
shelf[position_name] = position
|
||||||
|
|
||||||
|
def _load_from_shelf(self, document_id: str, position_name: str) -> Optional[RecursivePosition]:
|
||||||
|
"""Load position from shelf database"""
|
||||||
|
shelf_path = str(self.storage_dir / f"{document_id}.shelf")
|
||||||
|
try:
|
||||||
|
with shelve.open(shelf_path) as shelf:
|
||||||
|
return shelf.get(position_name)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _list_shelf_positions(self, document_id: str) -> List[str]:
|
||||||
|
"""List positions in shelf database"""
|
||||||
|
shelf_path = str(self.storage_dir / f"{document_id}.shelf")
|
||||||
|
try:
|
||||||
|
with shelve.open(shelf_path) as shelf:
|
||||||
|
return list(shelf.keys())
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _delete_from_shelf(self, document_id: str, position_name: str) -> bool:
|
||||||
|
"""Delete position from shelf database"""
|
||||||
|
shelf_path = str(self.storage_dir / f"{document_id}.shelf")
|
||||||
|
try:
|
||||||
|
with shelve.open(shelf_path) as shelf:
|
||||||
|
if position_name in shelf:
|
||||||
|
del shelf[position_name]
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# Convenience functions for common position patterns
|
||||||
|
def create_word_position(chapter: int, block: int, word: int, char_offset: int = 0) -> RecursivePosition:
|
||||||
|
"""Create a position pointing to a specific word and character"""
|
||||||
|
return (PositionBuilder()
|
||||||
|
.chapter(chapter)
|
||||||
|
.block(block)
|
||||||
|
.paragraph()
|
||||||
|
.word(word, offset=char_offset)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
|
||||||
|
def create_image_position(chapter: int, block: int, image_index: int = 0) -> RecursivePosition:
|
||||||
|
"""Create a position pointing to an image"""
|
||||||
|
return (PositionBuilder()
|
||||||
|
.chapter(chapter)
|
||||||
|
.block(block)
|
||||||
|
.image(image_index)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
|
||||||
|
def create_table_cell_position(chapter: int, block: int, row: int, col: int, word: int = 0) -> RecursivePosition:
|
||||||
|
"""Create a position pointing to content in a table cell"""
|
||||||
|
return (PositionBuilder()
|
||||||
|
.chapter(chapter)
|
||||||
|
.block(block)
|
||||||
|
.table()
|
||||||
|
.table_row(row)
|
||||||
|
.table_cell(col)
|
||||||
|
.word(word)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
|
||||||
|
def create_list_item_position(chapter: int, block: int, item: int, word: int = 0) -> RecursivePosition:
|
||||||
|
"""Create a position pointing to content in a list item"""
|
||||||
|
return (PositionBuilder()
|
||||||
|
.chapter(chapter)
|
||||||
|
.block(block)
|
||||||
|
.list()
|
||||||
|
.list_item(item)
|
||||||
|
.word(word)
|
||||||
|
.build())
|
||||||
@ -1,28 +1,20 @@
|
|||||||
"""
|
"""
|
||||||
Styling module for the pyWebLayout library.
|
Style system for the pyWebLayout library.
|
||||||
|
|
||||||
This package contains styling-related components including:
|
This module provides the core styling components used throughout the library.
|
||||||
- Font handling and text styling
|
|
||||||
- Color management
|
|
||||||
- Text decoration and formatting
|
|
||||||
- Alignment and positioning properties
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Import alignment options
|
from enum import Enum
|
||||||
from pyWebLayout.style.layout import Alignment
|
from .fonts import Font, FontWeight, FontStyle, TextDecoration
|
||||||
|
from .abstract_style import (
|
||||||
# Import font-related classes
|
AbstractStyle, AbstractStyleRegistry, FontFamily, FontSize
|
||||||
from pyWebLayout.style.fonts import (
|
|
||||||
Font, FontWeight, FontStyle, TextDecoration
|
|
||||||
)
|
)
|
||||||
|
from .concrete_style import ConcreteStyle
|
||||||
|
from .page_style import PageStyle
|
||||||
|
from .alignment import Alignment
|
||||||
|
|
||||||
# Import new style system
|
__all__ = [
|
||||||
from pyWebLayout.style.abstract_style import (
|
"Font", "FontWeight", "FontStyle", "TextDecoration",
|
||||||
AbstractStyle, AbstractStyleRegistry, FontFamily, FontSize, TextAlign
|
"AbstractStyle", "AbstractStyleRegistry", "FontFamily", "FontSize", "TextAlign",
|
||||||
)
|
"ConcreteStyle", "PageStyle", "Alignment"
|
||||||
from pyWebLayout.style.concrete_style import (
|
]
|
||||||
ConcreteStyle, ConcreteStyleRegistry, RenderingContext, StyleResolver
|
|
||||||
)
|
|
||||||
|
|
||||||
# Import page styling
|
|
||||||
from pyWebLayout.style.page_style import PageStyle
|
|
||||||
|
|||||||
@ -49,12 +49,11 @@ class FontSize(Enum):
|
|||||||
return cls.MEDIUM
|
return cls.MEDIUM
|
||||||
|
|
||||||
|
|
||||||
class TextAlign(Enum):
|
# Import Alignment from the centralized location
|
||||||
"""Text alignment options"""
|
from .alignment import Alignment
|
||||||
LEFT = "left"
|
|
||||||
CENTER = "center"
|
# Use Alignment for text alignment
|
||||||
RIGHT = "right"
|
TextAlign = Alignment
|
||||||
JUSTIFY = "justify"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
|
|||||||
18
pyWebLayout/style/alignment.py
Normal file
18
pyWebLayout/style/alignment.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
"""
|
||||||
|
Alignment options for the pyWebLayout library.
|
||||||
|
|
||||||
|
This module provides alignment-related functionality.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
class Alignment(Enum):
|
||||||
|
"""Text alignment options"""
|
||||||
|
LEFT = "left"
|
||||||
|
RIGHT = "right"
|
||||||
|
CENTER = "center"
|
||||||
|
JUSTIFY = "justify"
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
"""Return the string value of the alignment."""
|
||||||
|
return self.value
|
||||||
@ -7,7 +7,8 @@ user preferences, device capabilities, and rendering context.
|
|||||||
|
|
||||||
from typing import Dict, Optional, Tuple, Union, Any
|
from typing import Dict, Optional, Tuple, Union, Any
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from .abstract_style import AbstractStyle, FontFamily, FontSize, TextAlign
|
from .abstract_style import AbstractStyle, FontFamily, FontSize
|
||||||
|
from pyWebLayout.style.alignment import Alignment as TextAlign
|
||||||
from .fonts import Font, FontWeight, FontStyle, TextDecoration
|
from .fonts import Font, FontWeight, FontStyle, TextDecoration
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|||||||
@ -1,17 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Layout and alignment options for the pyWebLayout library.
|
Layout options for the pyWebLayout library.
|
||||||
|
|
||||||
|
This module provides layout-related functionality.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
|
|
||||||
class Alignment(Enum):
|
|
||||||
"""
|
|
||||||
Enum for alignment options used in layout and rendering.
|
|
||||||
"""
|
|
||||||
LEFT = 1
|
|
||||||
CENTER = 2
|
|
||||||
RIGHT = 3
|
|
||||||
TOP = 4
|
|
||||||
BOTTOM = 5
|
|
||||||
JUSTIFY = 6
|
|
||||||
|
|||||||
@ -1,53 +1,54 @@
|
|||||||
from typing import Tuple, Optional
|
from typing import Tuple, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from .abstract_style import AbstractStyle, FontFamily, FontSize
|
||||||
|
from pyWebLayout.style.alignment import Alignment as TextAlign
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class PageStyle:
|
class PageStyle:
|
||||||
"""
|
"""
|
||||||
Defines the styling properties for a page including borders, spacing, and layout.
|
Defines the styling properties for a page including borders, spacing, and layout.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Border properties
|
# Border properties
|
||||||
border_width: int = 0
|
border_width: int = 0
|
||||||
border_color: Tuple[int, int, int] = (0, 0, 0)
|
border_color: Tuple[int, int, int] = (0, 0, 0)
|
||||||
|
|
||||||
# Spacing properties
|
# Spacing properties
|
||||||
line_spacing: int = 5
|
line_spacing: int = 5
|
||||||
inter_block_spacing: int = 15
|
inter_block_spacing: int = 15
|
||||||
|
|
||||||
# Padding (top, right, bottom, left)
|
# Padding (top, right, bottom, left)
|
||||||
padding: Tuple[int, int, int, int] = (20, 20, 20, 20)
|
padding: Tuple[int, int, int, int] = (20, 20, 20, 20)
|
||||||
|
|
||||||
# Background color
|
# Background color
|
||||||
background_color: Tuple[int, int, int] = (255, 255, 255)
|
background_color: Tuple[int, int, int] = (255, 255, 255)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def padding_top(self) -> int:
|
def padding_top(self) -> int:
|
||||||
return self.padding[0]
|
return self.padding[0]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def padding_right(self) -> int:
|
def padding_right(self) -> int:
|
||||||
return self.padding[1]
|
return self.padding[1]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def padding_bottom(self) -> int:
|
def padding_bottom(self) -> int:
|
||||||
return self.padding[2]
|
return self.padding[2]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def padding_left(self) -> int:
|
def padding_left(self) -> int:
|
||||||
return self.padding[3]
|
return self.padding[3]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def total_horizontal_padding(self) -> int:
|
def total_horizontal_padding(self) -> int:
|
||||||
"""Get total horizontal padding (left + right)"""
|
"""Get total horizontal padding (left + right)"""
|
||||||
return self.padding_left + self.padding_right
|
return self.padding_left + self.padding_right
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def total_vertical_padding(self) -> int:
|
def total_vertical_padding(self) -> int:
|
||||||
"""Get total vertical padding (top + bottom)"""
|
"""Get total vertical padding (top + bottom)"""
|
||||||
return self.padding_top + self.padding_bottom
|
return self.padding_top + self.padding_bottom
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def total_border_width(self) -> int:
|
def total_border_width(self) -> int:
|
||||||
"""Get total border width (both sides)"""
|
"""Get total border width (both sides)"""
|
||||||
|
|||||||
@ -9,7 +9,7 @@ import numpy as np
|
|||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
from pyWebLayout.concrete.text import Line, Text, LeftAlignmentHandler, CenterRightAlignmentHandler, JustifyAlignmentHandler
|
from pyWebLayout.concrete.text import Line, Text, LeftAlignmentHandler, CenterRightAlignmentHandler, JustifyAlignmentHandler
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
from pyWebLayout.style import Font
|
from pyWebLayout.style import Font
|
||||||
from pyWebLayout.abstract import Word
|
from pyWebLayout.abstract import Word
|
||||||
from PIL import Image, ImageFont, ImageDraw
|
from PIL import Image, ImageFont, ImageDraw
|
||||||
|
|||||||
@ -9,7 +9,7 @@ from PIL import Image
|
|||||||
from unittest.mock import Mock, patch
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
from pyWebLayout.concrete.box import Box
|
from pyWebLayout.concrete.box import Box
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
|
|
||||||
|
|
||||||
class TestBox(unittest.TestCase):
|
class TestBox(unittest.TestCase):
|
||||||
|
|||||||
@ -16,7 +16,7 @@ from pyWebLayout.abstract.functional import (
|
|||||||
Link, Button, Form, FormField, LinkType, FormFieldType
|
Link, Button, Form, FormField, LinkType, FormFieldType
|
||||||
)
|
)
|
||||||
from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration
|
from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
|
|
||||||
|
|
||||||
class TestLinkText(unittest.TestCase):
|
class TestLinkText(unittest.TestCase):
|
||||||
|
|||||||
@ -12,7 +12,7 @@ from unittest.mock import Mock, patch, MagicMock
|
|||||||
|
|
||||||
from pyWebLayout.concrete.image import RenderableImage
|
from pyWebLayout.concrete.image import RenderableImage
|
||||||
from pyWebLayout.abstract.block import Image as AbstractImage
|
from pyWebLayout.abstract.block import Image as AbstractImage
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
|
|
||||||
|
|
||||||
class TestRenderableImage(unittest.TestCase):
|
class TestRenderableImage(unittest.TestCase):
|
||||||
|
|||||||
@ -12,7 +12,7 @@ from unittest.mock import Mock, patch, MagicMock
|
|||||||
from pyWebLayout.concrete.text import Text, Line
|
from pyWebLayout.concrete.text import Text, Line
|
||||||
from pyWebLayout.abstract.inline import Word
|
from pyWebLayout.abstract.inline import Word
|
||||||
from pyWebLayout.style import Font, FontStyle, FontWeight, TextDecoration
|
from pyWebLayout.style import Font, FontStyle, FontWeight, TextDecoration
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style import Alignment
|
||||||
|
|
||||||
class TestText(unittest.TestCase):
|
class TestText(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
@ -247,14 +247,14 @@ class TestLine(unittest.TestCase):
|
|||||||
# Create a word to add
|
# Create a word to add
|
||||||
|
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
word = Word(text="AAAAAAAA", style=self.style)
|
word = Word(text="AAAAAAA", style=self.style)
|
||||||
|
|
||||||
# This test may need adjustment based on the actual implementation
|
# This test may need adjustment based on the actual implementation
|
||||||
|
|
||||||
success, overflow_part = line.add_word(word)
|
success, overflow_part = line.add_word(word)
|
||||||
# If successful, the word should be added
|
# If successful, the word should be added
|
||||||
if overflow_part:
|
if overflow_part:
|
||||||
self.assertEqual(overflow_part.text , "AA")
|
self.assertEqual(overflow_part.text , "A")
|
||||||
return
|
return
|
||||||
|
|
||||||
self.assertFalse(True)
|
self.assertFalse(True)
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Test the new Page implementation to verify it meets the requirements:
|
Unit tests for the new Page implementation to verify it meets the requirements:
|
||||||
1. Accepts a PageStyle that defines borders, line spacing and inter-block spacing
|
1. Accepts a PageStyle that defines borders, line spacing and inter-block spacing
|
||||||
2. Makes an image canvas
|
2. Makes an image canvas
|
||||||
3. Provides a method for accepting child objects
|
3. Provides a method for accepting child objects
|
||||||
@ -7,8 +7,7 @@ Test the new Page implementation to verify it meets the requirements:
|
|||||||
5. Has a method that calls render on all children
|
5. Has a method that calls render on all children
|
||||||
6. Has a method to query a point and determine which child it belongs to
|
6. Has a method to query a point and determine which child it belongs to
|
||||||
"""
|
"""
|
||||||
|
import unittest
|
||||||
import pytest
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from PIL import Image, ImageDraw
|
from PIL import Image, ImageDraw
|
||||||
from pyWebLayout.concrete.page import Page
|
from pyWebLayout.concrete.page import Page
|
||||||
@ -28,162 +27,224 @@ class SimpleTestRenderable(Renderable, Queriable):
|
|||||||
def render(self):
|
def render(self):
|
||||||
"""Render returns None - drawing is done via the page's draw object"""
|
"""Render returns None - drawing is done via the page's draw object"""
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class TestPageImplementation(unittest.TestCase):
|
||||||
|
"""Test cases for the Page class implementation"""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
"""Set up test fixtures"""
|
||||||
|
self.basic_style = PageStyle(
|
||||||
|
border_width=2,
|
||||||
|
border_color=(255, 0, 0),
|
||||||
|
line_spacing=8,
|
||||||
|
inter_block_spacing=20,
|
||||||
|
padding=(15, 15, 15, 15),
|
||||||
|
background_color=(240, 240, 240)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.page_size = (800, 600)
|
||||||
|
|
||||||
|
def test_page_creation_with_style(self):
|
||||||
|
"""Test creating a page with a PageStyle"""
|
||||||
|
page = Page(size=self.page_size, style=self.basic_style)
|
||||||
|
|
||||||
|
self.assertEqual(page.size, self.page_size)
|
||||||
|
self.assertEqual(page.style, self.basic_style)
|
||||||
|
self.assertEqual(page.border_size, 2)
|
||||||
|
|
||||||
|
def test_page_creation_without_style(self):
|
||||||
|
"""Test creating a page without a PageStyle (should use defaults)"""
|
||||||
|
page = Page(size=self.page_size)
|
||||||
|
|
||||||
|
self.assertEqual(page.size, self.page_size)
|
||||||
|
self.assertIsNotNone(page.style)
|
||||||
|
|
||||||
|
def test_page_canvas_and_content_sizes(self):
|
||||||
|
"""Test that page correctly calculates canvas and content sizes"""
|
||||||
|
style = PageStyle(
|
||||||
|
border_width=5,
|
||||||
|
padding=(10, 20, 30, 40) # top, right, bottom, left
|
||||||
|
)
|
||||||
|
|
||||||
|
page = Page(size=self.page_size, style=style)
|
||||||
|
|
||||||
|
# Canvas size should be page size minus borders
|
||||||
|
expected_canvas_size = (790, 590) # 800-10, 600-10 (border on both sides)
|
||||||
|
self.assertEqual(page.canvas_size, expected_canvas_size)
|
||||||
|
|
||||||
|
# Content size should be canvas minus padding
|
||||||
|
expected_content_size = (730, 550) # 790-60, 590-40 (padding left+right, top+bottom)
|
||||||
|
self.assertEqual(page.content_size, expected_content_size)
|
||||||
|
|
||||||
|
def test_page_add_remove_children(self):
|
||||||
|
"""Test adding and removing children from the page"""
|
||||||
|
page = Page(size=self.page_size)
|
||||||
|
|
||||||
|
# Initially no children
|
||||||
|
self.assertEqual(len(page.children), 0)
|
||||||
|
|
||||||
|
# Add children
|
||||||
|
child1 = SimpleTestRenderable("Child 1")
|
||||||
|
child2 = SimpleTestRenderable("Child 2")
|
||||||
|
|
||||||
|
page.add_child(child1)
|
||||||
|
self.assertEqual(len(page.children), 1)
|
||||||
|
self.assertIn(child1, page.children)
|
||||||
|
|
||||||
|
page.add_child(child2)
|
||||||
|
self.assertEqual(len(page.children), 2)
|
||||||
|
self.assertIn(child2, page.children)
|
||||||
|
|
||||||
|
# Test method chaining
|
||||||
|
child3 = SimpleTestRenderable("Child 3")
|
||||||
|
result = page.add_child(child3)
|
||||||
|
self.assertIs(result, page) # Should return self for chaining
|
||||||
|
self.assertEqual(len(page.children), 3)
|
||||||
|
self.assertIn(child3, page.children)
|
||||||
|
|
||||||
|
# Remove childce you’ll notice is that responses don’t stream character-by-character like other providers. Instead, Claude Code processes your full request before sending back the complete response.
|
||||||
|
removed = page.remove_child(child2)
|
||||||
|
self.assertTrue(removed)
|
||||||
|
self.assertEqual(len(page.children), 2)
|
||||||
|
self.assertNotIn(child2, page.children)
|
||||||
|
|
||||||
|
# Try to remove non-existent child
|
||||||
|
removed = page.remove_child(child2)
|
||||||
|
self.assertFalse(removed)
|
||||||
|
|
||||||
|
# Clear all children
|
||||||
|
page.clear_children()
|
||||||
|
self.assertEqual(len(page.children), 0)
|
||||||
|
|
||||||
|
def test_page_render(self):
|
||||||
|
"""Test that page renders and creates a canvas"""
|
||||||
|
style = PageStyle(
|
||||||
|
border_width=2,
|
||||||
|
border_color=(255, 0, 0),
|
||||||
|
background_color=(255, 255, 255)
|
||||||
|
)
|
||||||
|
|
||||||
|
page = Page(size=(200, 150), style=style)
|
||||||
|
|
||||||
|
# Add a child
|
||||||
|
child = SimpleTestRenderable("Test child")
|
||||||
|
page.add_child(child)
|
||||||
|
|
||||||
|
# Render the page
|
||||||
|
image = page.render()
|
||||||
|
|
||||||
|
# Check that we got an image
|
||||||
|
self.assertIsInstance(image, Image.Image)
|
||||||
|
self.assertEqual(image.size, (200, 150))
|
||||||
|
self.assertEqual(image.mode, 'RGBA')
|
||||||
|
|
||||||
|
# Check that draw object is available
|
||||||
|
self.assertIsNotNone(page.draw)
|
||||||
|
|
||||||
|
def test_page_query_point(self):
|
||||||
|
"""Test querying points to find children"""
|
||||||
|
page = Page(size=(400, 300))
|
||||||
|
|
||||||
|
# Add children with known positions and sizes
|
||||||
|
child1 = SimpleTestRenderable("Child 1", (100, 50))
|
||||||
|
child2 = SimpleTestRenderable("Child 2", (80, 40))
|
||||||
|
|
||||||
|
page.add_child(child1).add_child(child2)
|
||||||
|
|
||||||
def test_page_creation_with_style():
|
# Query points
|
||||||
"""Test creating a page with a PageStyle"""
|
# Point within first child
|
||||||
style = PageStyle(
|
found_child = page.query_point((90, 30))
|
||||||
border_width=2,
|
self.assertEqual(found_child, child1)
|
||||||
border_color=(255, 0, 0),
|
|
||||||
line_spacing=8,
|
# Point within second child
|
||||||
inter_block_spacing=20,
|
found_child = page.query_point((30, 30))
|
||||||
padding=(15, 15, 15, 15),
|
self.assertEqual(found_child, child2)
|
||||||
background_color=(240, 240, 240)
|
|
||||||
)
|
# Point outside any child
|
||||||
|
found_child = page.query_point((300, 250))
|
||||||
|
self.assertIsNone(found_child)
|
||||||
|
|
||||||
page = Page(size=(800, 600), style=style)
|
def test_page_in_object(self):
|
||||||
|
"""Test that page correctly implements in_object"""
|
||||||
|
page = Page(size=(400, 300))
|
||||||
|
|
||||||
|
# Points within page bounds
|
||||||
|
self.assertTrue(page.in_object((0, 0)))
|
||||||
|
self.assertTrue(page.in_object((200, 150)))
|
||||||
|
self.assertTrue(page.in_object((399, 299)))
|
||||||
|
|
||||||
|
# Points outside page bounds
|
||||||
|
self.assertFalse(page.in_object((-1, 0)))
|
||||||
|
self.assertFalse(page.in_object((0, -1)))
|
||||||
|
self.assertFalse(page.in_object((400, 299)))
|
||||||
|
self.assertFalse(page.in_object((399, 300)))
|
||||||
|
|
||||||
assert page.size == (800, 600)
|
def test_page_with_borders(self):
|
||||||
assert page.style == style
|
"""Test page rendering with borders"""
|
||||||
assert page.border_size == 2
|
style = PageStyle(
|
||||||
|
border_width=3,
|
||||||
|
border_color=(128, 128, 128),
|
||||||
def test_page_canvas_and_content_sizes():
|
background_color=(255, 255, 255)
|
||||||
"""Test that page correctly calculates canvas and content sizes"""
|
)
|
||||||
style = PageStyle(
|
|
||||||
border_width=5,
|
page = Page(size=(100, 100), style=style)
|
||||||
padding=(10, 20, 30, 40) # top, right, bottom, left
|
image = page.render()
|
||||||
)
|
|
||||||
|
# Check that image was created
|
||||||
page = Page(size=(800, 600), style=style)
|
self.assertIsInstance(image, Image.Image)
|
||||||
|
self.assertEqual(image.size, (100, 100))
|
||||||
# Canvas size should be page size minus borders
|
|
||||||
assert page.canvas_size == (790, 590) # 800-10, 600-10 (border on both sides)
|
# The border should be drawn but we can't easily test pixel values
|
||||||
|
# Just verify the image exists and has the right properties
|
||||||
# Content size should be canvas minus padding
|
|
||||||
assert page.content_size == (730, 550) # 790-60, 590-40 (padding left+right, top+bottom)
|
def test_page_border_size_property(self):
|
||||||
|
"""Test that border_size property returns correct value"""
|
||||||
|
# Test with border
|
||||||
def test_page_add_remove_children():
|
style_with_border = PageStyle(border_width=5)
|
||||||
"""Test adding and removing children from the page"""
|
page_with_border = Page(size=self.page_size, style=style_with_border)
|
||||||
page = Page(size=(800, 600))
|
self.assertEqual(page_with_border.border_size, 5)
|
||||||
|
|
||||||
# Initially no children
|
# Test without border
|
||||||
assert len(page.children) == 0
|
style_no_border = PageStyle(border_width=0)
|
||||||
|
page_no_border = Page(size=self.page_size, style=style_no_border)
|
||||||
# Add children
|
self.assertEqual(page_no_border.border_size, 0)
|
||||||
child1 = SimpleTestRenderable("Child 1")
|
|
||||||
child2 = SimpleTestRenderable("Child 2")
|
def test_page_style_properties(self):
|
||||||
|
"""Test that page correctly exposes style properties"""
|
||||||
page.add_child(child1)
|
page = Page(size=self.page_size, style=self.basic_style)
|
||||||
assert len(page.children) == 1
|
|
||||||
|
# Test that style properties are accessible
|
||||||
page.add_child(child2)
|
self.assertEqual(page.style.border_width, 2)
|
||||||
assert len(page.children) == 2
|
self.assertEqual(page.style.border_color, (255, 0, 0))
|
||||||
|
self.assertEqual(page.style.line_spacing, 8)
|
||||||
# Test method chaining
|
self.assertEqual(page.style.inter_block_spacing, 20)
|
||||||
child3 = SimpleTestRenderable("Child 3")
|
self.assertEqual(page.style.padding, (15, 15, 15, 15))
|
||||||
result = page.add_child(child3)
|
self.assertEqual(page.style.background_color, (240, 240, 240))
|
||||||
assert result is page # Should return self for chaining
|
|
||||||
assert len(page.children) == 3
|
def test_page_children_list_operations(self):
|
||||||
|
"""Test that children list behaves correctly"""
|
||||||
# Remove child
|
page = Page(size=self.page_size)
|
||||||
removed = page.remove_child(child2)
|
|
||||||
assert removed is True
|
# Test that children is initially empty list
|
||||||
assert len(page.children) == 2
|
self.assertIsInstance(page.children, list)
|
||||||
assert child2 not in page.children
|
self.assertEqual(len(page.children), 0)
|
||||||
|
|
||||||
# Try to remove non-existent child
|
# Test adding multiple children
|
||||||
removed = page.remove_child(child2)
|
children = [
|
||||||
assert removed is False
|
SimpleTestRenderable(f"Child {i}")
|
||||||
|
for i in range(5)
|
||||||
# Clear all children
|
]
|
||||||
page.clear_children()
|
|
||||||
assert len(page.children) == 0
|
for child in children:
|
||||||
|
page.add_child(child)
|
||||||
|
|
||||||
def test_page_render():
|
self.assertEqual(len(page.children), 5)
|
||||||
"""Test that page renders and creates a canvas"""
|
|
||||||
style = PageStyle(
|
# Test that children are in the correct order
|
||||||
border_width=2,
|
for i, child in enumerate(page.children):
|
||||||
border_color=(255, 0, 0),
|
self.assertEqual(child._text, f"Child {i}")
|
||||||
background_color=(255, 255, 255)
|
|
||||||
)
|
|
||||||
|
|
||||||
page = Page(size=(200, 150), style=style)
|
|
||||||
|
|
||||||
# Add a child
|
|
||||||
child = SimpleTestRenderable("Test child")
|
|
||||||
page.add_child(child)
|
|
||||||
|
|
||||||
# Render the page
|
|
||||||
image = page.render()
|
|
||||||
|
|
||||||
# Check that we got an image
|
|
||||||
assert isinstance(image, Image.Image)
|
|
||||||
assert image.size == (200, 150)
|
|
||||||
assert image.mode == 'RGBA'
|
|
||||||
|
|
||||||
# Check that draw object is available
|
|
||||||
assert page.draw is not None
|
|
||||||
|
|
||||||
|
|
||||||
def test_page_query_point():
|
|
||||||
"""Test querying points to find children"""
|
|
||||||
page = Page(size=(400, 300))
|
|
||||||
|
|
||||||
# Add children with known positions and sizes
|
|
||||||
child1 = SimpleTestRenderable("Child 1", (100, 50))
|
|
||||||
child2 = SimpleTestRenderable("Child 2", (80, 40))
|
|
||||||
|
|
||||||
page.add_child(child1).add_child(child2)
|
|
||||||
|
|
||||||
# Query points
|
|
||||||
# Point within first child
|
|
||||||
found_child = page.query_point((90, 30))
|
|
||||||
assert found_child == child1
|
|
||||||
|
|
||||||
# Point within second child
|
|
||||||
found_child = page.query_point((30, 30))
|
|
||||||
assert found_child == child2
|
|
||||||
|
|
||||||
# Point outside any child
|
|
||||||
found_child = page.query_point((300, 250))
|
|
||||||
assert found_child is None
|
|
||||||
|
|
||||||
|
|
||||||
def test_page_in_object():
|
|
||||||
"""Test that page correctly implements in_object"""
|
|
||||||
page = Page(size=(400, 300))
|
|
||||||
|
|
||||||
# Points within page bounds
|
|
||||||
assert page.in_object((0, 0)) is True
|
|
||||||
assert page.in_object((200, 150)) is True
|
|
||||||
assert page.in_object((399, 299)) is True
|
|
||||||
|
|
||||||
# Points outside page bounds
|
|
||||||
assert page.in_object((-1, 0)) is False
|
|
||||||
assert page.in_object((0, -1)) is False
|
|
||||||
assert page.in_object((400, 299)) is False
|
|
||||||
assert page.in_object((399, 300)) is False
|
|
||||||
|
|
||||||
|
|
||||||
def test_page_with_borders():
|
|
||||||
"""Test page rendering with borders"""
|
|
||||||
style = PageStyle(
|
|
||||||
border_width=3,
|
|
||||||
border_color=(128, 128, 128),
|
|
||||||
background_color=(255, 255, 255)
|
|
||||||
)
|
|
||||||
|
|
||||||
page = Page(size=(100, 100), style=style)
|
|
||||||
image = page.render()
|
|
||||||
|
|
||||||
# Check that image was created
|
|
||||||
assert isinstance(image, Image.Image)
|
|
||||||
assert image.size == (100, 100)
|
|
||||||
|
|
||||||
# The border should be drawn but we can't easily test pixel values
|
|
||||||
# Just verify the image exists and has the right properties
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
|
|||||||
0
tests/io_tests/__init__.py
Normal file
0
tests/io_tests/__init__.py
Normal file
@ -7,7 +7,8 @@ reusing test patterns from test_html_extraction.py that are known to pass.
|
|||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
from pyWebLayout.io.readers.html_extraction import (
|
from pyWebLayout.io.rea
|
||||||
|
ders.html_extraction import (
|
||||||
create_base_context,
|
create_base_context,
|
||||||
apply_element_styling,
|
apply_element_styling,
|
||||||
parse_inline_styles,
|
parse_inline_styles,
|
||||||
9
tests/layout/__init__.py
Normal file
9
tests/layout/__init__.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
"""
|
||||||
|
Tests for the layout module.
|
||||||
|
|
||||||
|
This package contains tests for the layout system including:
|
||||||
|
- Document layouter tests
|
||||||
|
- Ereader layout system tests
|
||||||
|
- Page buffer tests
|
||||||
|
- Position tracking tests
|
||||||
|
"""
|
||||||
456
tests/layout/test_ereader_system.py
Normal file
456
tests/layout/test_ereader_system.py
Normal file
@ -0,0 +1,456 @@
|
|||||||
|
"""
|
||||||
|
Comprehensive tests for the ereader layout system.
|
||||||
|
|
||||||
|
Tests the complete ereader functionality including position tracking,
|
||||||
|
font scaling, chapter navigation, and page buffering.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from pyWebLayout.abstract.block import Paragraph, Heading, HeadingLevel
|
||||||
|
from pyWebLayout.abstract.inline import Word
|
||||||
|
from pyWebLayout.style import Font
|
||||||
|
from pyWebLayout.style.page_style import PageStyle
|
||||||
|
from pyWebLayout.layout.ereader_layout import RenderingPosition, ChapterNavigator, FontScaler, BidirectionalLayouter
|
||||||
|
from pyWebLayout.layout.ereader_manager import EreaderLayoutManager, BookmarkManager, create_ereader_manager
|
||||||
|
|
||||||
|
|
||||||
|
class TestRenderingPosition(unittest.TestCase):
|
||||||
|
"""Test the RenderingPosition class"""
|
||||||
|
|
||||||
|
def test_position_creation(self):
|
||||||
|
"""Test creating a rendering position"""
|
||||||
|
pos = RenderingPosition(
|
||||||
|
chapter_index=1,
|
||||||
|
block_index=5,
|
||||||
|
word_index=10,
|
||||||
|
table_row=2,
|
||||||
|
table_col=3
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(pos.chapter_index, 1)
|
||||||
|
self.assertEqual(pos.block_index, 5)
|
||||||
|
self.assertEqual(pos.word_index, 10)
|
||||||
|
self.assertEqual(pos.table_row, 2)
|
||||||
|
self.assertEqual(pos.table_col, 3)
|
||||||
|
|
||||||
|
def test_position_serialization(self):
|
||||||
|
"""Test position serialization and deserialization"""
|
||||||
|
pos = RenderingPosition(
|
||||||
|
chapter_index=1,
|
||||||
|
block_index=5,
|
||||||
|
word_index=10,
|
||||||
|
remaining_pretext="test"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Serialize to dict
|
||||||
|
pos_dict = pos.to_dict()
|
||||||
|
self.assertIsInstance(pos_dict, dict)
|
||||||
|
self.assertEqual(pos_dict['chapter_index'], 1)
|
||||||
|
self.assertEqual(pos_dict['remaining_pretext'], "test")
|
||||||
|
|
||||||
|
# Deserialize from dict
|
||||||
|
pos2 = RenderingPosition.from_dict(pos_dict)
|
||||||
|
self.assertEqual(pos, pos2)
|
||||||
|
|
||||||
|
def test_position_copy(self):
|
||||||
|
"""Test position copying"""
|
||||||
|
pos = RenderingPosition(chapter_index=1, block_index=5)
|
||||||
|
pos_copy = pos.copy()
|
||||||
|
|
||||||
|
self.assertEqual(pos, pos_copy)
|
||||||
|
self.assertIsNot(pos, pos_copy) # Different objects
|
||||||
|
|
||||||
|
# Modify copy
|
||||||
|
pos_copy.word_index = 10
|
||||||
|
self.assertNotEqual(pos, pos_copy)
|
||||||
|
|
||||||
|
def test_position_equality_and_hashing(self):
|
||||||
|
"""Test position equality and hashing"""
|
||||||
|
pos1 = RenderingPosition(chapter_index=1, block_index=5)
|
||||||
|
pos2 = RenderingPosition(chapter_index=1, block_index=5)
|
||||||
|
pos3 = RenderingPosition(chapter_index=1, block_index=6)
|
||||||
|
|
||||||
|
self.assertEqual(pos1, pos2)
|
||||||
|
self.assertNotEqual(pos1, pos3)
|
||||||
|
|
||||||
|
# Test hashing (for use as dict keys)
|
||||||
|
pos_dict = {pos1: "test"}
|
||||||
|
self.assertEqual(pos_dict[pos2], "test") # Should work due to equality
|
||||||
|
|
||||||
|
|
||||||
|
class TestChapterNavigator(unittest.TestCase):
|
||||||
|
"""Test the ChapterNavigator class"""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
"""Set up test data"""
|
||||||
|
self.font = Font()
|
||||||
|
|
||||||
|
# Create test blocks with headings
|
||||||
|
self.blocks = [
|
||||||
|
Paragraph(self.font), # Block 0
|
||||||
|
Heading(HeadingLevel.H1, self.font), # Block 1 - Chapter 1
|
||||||
|
Paragraph(self.font), # Block 2
|
||||||
|
Heading(HeadingLevel.H2, self.font), # Block 3 - Subsection
|
||||||
|
Paragraph(self.font), # Block 4
|
||||||
|
Heading(HeadingLevel.H1, self.font), # Block 5 - Chapter 2
|
||||||
|
Paragraph(self.font), # Block 6
|
||||||
|
]
|
||||||
|
|
||||||
|
# Add text to headings
|
||||||
|
self.blocks[1].add_word(Word("Chapter", self.font))
|
||||||
|
self.blocks[1].add_word(Word("One", self.font))
|
||||||
|
|
||||||
|
self.blocks[3].add_word(Word("Subsection", self.font))
|
||||||
|
self.blocks[3].add_word(Word("A", self.font))
|
||||||
|
|
||||||
|
self.blocks[5].add_word(Word("Chapter", self.font))
|
||||||
|
self.blocks[5].add_word(Word("Two", self.font))
|
||||||
|
|
||||||
|
def test_chapter_detection(self):
|
||||||
|
"""Test that chapters are detected correctly"""
|
||||||
|
navigator = ChapterNavigator(self.blocks)
|
||||||
|
|
||||||
|
self.assertEqual(len(navigator.chapters), 3) # 2 H1s + 1 H2
|
||||||
|
|
||||||
|
# Check chapter titles
|
||||||
|
titles = [chapter.title for chapter in navigator.chapters]
|
||||||
|
self.assertIn("Chapter One", titles)
|
||||||
|
self.assertIn("Subsection A", titles)
|
||||||
|
self.assertIn("Chapter Two", titles)
|
||||||
|
|
||||||
|
def test_table_of_contents(self):
|
||||||
|
"""Test table of contents generation"""
|
||||||
|
navigator = ChapterNavigator(self.blocks)
|
||||||
|
toc = navigator.get_table_of_contents()
|
||||||
|
|
||||||
|
self.assertEqual(len(toc), 3)
|
||||||
|
|
||||||
|
# Check first entry
|
||||||
|
title, level, position = toc[0]
|
||||||
|
self.assertEqual(title, "Chapter One")
|
||||||
|
self.assertEqual(level, HeadingLevel.H1)
|
||||||
|
self.assertIsInstance(position, RenderingPosition)
|
||||||
|
|
||||||
|
def test_chapter_position_lookup(self):
|
||||||
|
"""Test looking up chapter positions"""
|
||||||
|
navigator = ChapterNavigator(self.blocks)
|
||||||
|
|
||||||
|
pos = navigator.get_chapter_position("Chapter One")
|
||||||
|
self.assertIsNotNone(pos)
|
||||||
|
self.assertEqual(pos.chapter_index, 0)
|
||||||
|
|
||||||
|
pos = navigator.get_chapter_position("Nonexistent Chapter")
|
||||||
|
self.assertIsNone(pos)
|
||||||
|
|
||||||
|
def test_current_chapter_detection(self):
|
||||||
|
"""Test detecting current chapter from position"""
|
||||||
|
navigator = ChapterNavigator(self.blocks)
|
||||||
|
|
||||||
|
# Position in first chapter
|
||||||
|
pos = RenderingPosition(chapter_index=0, block_index=2)
|
||||||
|
chapter = navigator.get_current_chapter(pos)
|
||||||
|
self.assertIsNotNone(chapter)
|
||||||
|
self.assertEqual(chapter.title, "Chapter One")
|
||||||
|
|
||||||
|
|
||||||
|
class TestFontScaler(unittest.TestCase):
|
||||||
|
"""Test the FontScaler class"""
|
||||||
|
|
||||||
|
def test_font_scaling(self):
|
||||||
|
"""Test font scaling functionality"""
|
||||||
|
original_font = Font(font_size=12)
|
||||||
|
|
||||||
|
# Test no scaling
|
||||||
|
scaled_font = FontScaler.scale_font(original_font, 1.0)
|
||||||
|
self.assertEqual(scaled_font.font_size, 12)
|
||||||
|
|
||||||
|
# Test 2x scaling
|
||||||
|
scaled_font = FontScaler.scale_font(original_font, 2.0)
|
||||||
|
self.assertEqual(scaled_font.font_size, 24)
|
||||||
|
|
||||||
|
# Test 0.5x scaling
|
||||||
|
scaled_font = FontScaler.scale_font(original_font, 0.5)
|
||||||
|
self.assertEqual(scaled_font.font_size, 6)
|
||||||
|
|
||||||
|
# Test minimum size constraint
|
||||||
|
scaled_font = FontScaler.scale_font(original_font, 0.01)
|
||||||
|
self.assertGreaterEqual(scaled_font.font_size, 1)
|
||||||
|
|
||||||
|
def test_word_spacing_scaling(self):
|
||||||
|
"""Test word spacing scaling"""
|
||||||
|
original_spacing = (5, 15)
|
||||||
|
|
||||||
|
# Test no scaling
|
||||||
|
scaled_spacing = FontScaler.scale_word_spacing(original_spacing, 1.0)
|
||||||
|
self.assertEqual(scaled_spacing, (5, 15))
|
||||||
|
|
||||||
|
# Test 2x scaling
|
||||||
|
scaled_spacing = FontScaler.scale_word_spacing(original_spacing, 2.0)
|
||||||
|
self.assertEqual(scaled_spacing, (10, 30))
|
||||||
|
|
||||||
|
# Test minimum constraints
|
||||||
|
scaled_spacing = FontScaler.scale_word_spacing(original_spacing, 0.1)
|
||||||
|
self.assertGreaterEqual(scaled_spacing[0], 1)
|
||||||
|
self.assertGreaterEqual(scaled_spacing[1], 2)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBookmarkManager(unittest.TestCase):
|
||||||
|
"""Test the BookmarkManager class"""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
"""Set up test environment"""
|
||||||
|
self.temp_dir = tempfile.mkdtemp()
|
||||||
|
self.document_id = "test_document"
|
||||||
|
self.bookmark_manager = BookmarkManager(self.document_id, self.temp_dir)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
"""Clean up test environment"""
|
||||||
|
shutil.rmtree(self.temp_dir)
|
||||||
|
|
||||||
|
def test_bookmark_operations(self):
|
||||||
|
"""Test bookmark add/remove/get operations"""
|
||||||
|
pos = RenderingPosition(chapter_index=1, block_index=5)
|
||||||
|
|
||||||
|
# Add bookmark
|
||||||
|
self.bookmark_manager.add_bookmark("test_bookmark", pos)
|
||||||
|
|
||||||
|
# Get bookmark
|
||||||
|
retrieved_pos = self.bookmark_manager.get_bookmark("test_bookmark")
|
||||||
|
self.assertEqual(retrieved_pos, pos)
|
||||||
|
|
||||||
|
# List bookmarks
|
||||||
|
bookmarks = self.bookmark_manager.list_bookmarks()
|
||||||
|
self.assertEqual(len(bookmarks), 1)
|
||||||
|
self.assertEqual(bookmarks[0][0], "test_bookmark")
|
||||||
|
self.assertEqual(bookmarks[0][1], pos)
|
||||||
|
|
||||||
|
# Remove bookmark
|
||||||
|
success = self.bookmark_manager.remove_bookmark("test_bookmark")
|
||||||
|
self.assertTrue(success)
|
||||||
|
|
||||||
|
# Verify removal
|
||||||
|
retrieved_pos = self.bookmark_manager.get_bookmark("test_bookmark")
|
||||||
|
self.assertIsNone(retrieved_pos)
|
||||||
|
|
||||||
|
def test_reading_position_persistence(self):
|
||||||
|
"""Test saving and loading reading position"""
|
||||||
|
pos = RenderingPosition(chapter_index=2, block_index=10, word_index=5)
|
||||||
|
|
||||||
|
# Save position
|
||||||
|
self.bookmark_manager.save_reading_position(pos)
|
||||||
|
|
||||||
|
# Create new manager instance (simulates app restart)
|
||||||
|
new_manager = BookmarkManager(self.document_id, self.temp_dir)
|
||||||
|
|
||||||
|
# Load position
|
||||||
|
loaded_pos = new_manager.load_reading_position()
|
||||||
|
self.assertEqual(loaded_pos, pos)
|
||||||
|
|
||||||
|
def test_bookmark_persistence(self):
|
||||||
|
"""Test that bookmarks persist across manager instances"""
|
||||||
|
pos = RenderingPosition(chapter_index=1, block_index=5)
|
||||||
|
|
||||||
|
# Add bookmark
|
||||||
|
self.bookmark_manager.add_bookmark("persistent_bookmark", pos)
|
||||||
|
|
||||||
|
# Create new manager instance
|
||||||
|
new_manager = BookmarkManager(self.document_id, self.temp_dir)
|
||||||
|
|
||||||
|
# Verify bookmark exists
|
||||||
|
retrieved_pos = new_manager.get_bookmark("persistent_bookmark")
|
||||||
|
self.assertEqual(retrieved_pos, pos)
|
||||||
|
|
||||||
|
|
||||||
|
class TestEreaderLayoutManager(unittest.TestCase):
|
||||||
|
"""Test the complete EreaderLayoutManager"""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
"""Set up test data"""
|
||||||
|
self.temp_dir = tempfile.mkdtemp()
|
||||||
|
self.font = Font()
|
||||||
|
|
||||||
|
# Create test document with multiple paragraphs and headings
|
||||||
|
self.blocks = []
|
||||||
|
|
||||||
|
# Add a heading
|
||||||
|
heading = Heading(HeadingLevel.H1, self.font)
|
||||||
|
heading.add_word(Word("Test", self.font))
|
||||||
|
heading.add_word(Word("Chapter", self.font))
|
||||||
|
self.blocks.append(heading)
|
||||||
|
|
||||||
|
# Add several paragraphs with multiple words
|
||||||
|
for i in range(3):
|
||||||
|
paragraph = Paragraph(self.font)
|
||||||
|
for j in range(20): # 20 words per paragraph
|
||||||
|
paragraph.add_word(Word(f"Word{i}_{j}", self.font))
|
||||||
|
self.blocks.append(paragraph)
|
||||||
|
|
||||||
|
self.page_size = (400, 600)
|
||||||
|
self.document_id = "test_document"
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
"""Clean up test environment"""
|
||||||
|
shutil.rmtree(self.temp_dir)
|
||||||
|
|
||||||
|
def test_manager_initialization(self):
|
||||||
|
"""Test ereader manager initialization"""
|
||||||
|
# Change to temp directory for bookmarks
|
||||||
|
original_cwd = Path.cwd()
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
os.chdir(self.temp_dir)
|
||||||
|
|
||||||
|
manager = EreaderLayoutManager(
|
||||||
|
self.blocks,
|
||||||
|
self.page_size,
|
||||||
|
self.document_id
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(manager.page_size, self.page_size)
|
||||||
|
self.assertEqual(manager.document_id, self.document_id)
|
||||||
|
self.assertEqual(manager.font_scale, 1.0)
|
||||||
|
self.assertIsInstance(manager.current_position, RenderingPosition)
|
||||||
|
|
||||||
|
manager.shutdown()
|
||||||
|
finally:
|
||||||
|
os.chdir(original_cwd)
|
||||||
|
|
||||||
|
def test_font_scaling(self):
|
||||||
|
"""Test font scaling functionality"""
|
||||||
|
original_cwd = Path.cwd()
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
os.chdir(self.temp_dir)
|
||||||
|
|
||||||
|
manager = EreaderLayoutManager(
|
||||||
|
self.blocks,
|
||||||
|
self.page_size,
|
||||||
|
self.document_id
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test initial scale
|
||||||
|
self.assertEqual(manager.get_font_scale(), 1.0)
|
||||||
|
|
||||||
|
# Test scaling
|
||||||
|
page = manager.set_font_scale(1.5)
|
||||||
|
self.assertEqual(manager.get_font_scale(), 1.5)
|
||||||
|
self.assertIsNotNone(page)
|
||||||
|
|
||||||
|
manager.shutdown()
|
||||||
|
finally:
|
||||||
|
os.chdir(original_cwd)
|
||||||
|
|
||||||
|
def test_table_of_contents(self):
|
||||||
|
"""Test table of contents functionality"""
|
||||||
|
original_cwd = Path.cwd()
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
os.chdir(self.temp_dir)
|
||||||
|
|
||||||
|
manager = EreaderLayoutManager(
|
||||||
|
self.blocks,
|
||||||
|
self.page_size,
|
||||||
|
self.document_id
|
||||||
|
)
|
||||||
|
|
||||||
|
toc = manager.get_table_of_contents()
|
||||||
|
self.assertGreater(len(toc), 0)
|
||||||
|
|
||||||
|
# Check first entry
|
||||||
|
title, level, position = toc[0]
|
||||||
|
self.assertEqual(title, "Test Chapter")
|
||||||
|
self.assertEqual(level, HeadingLevel.H1)
|
||||||
|
|
||||||
|
manager.shutdown()
|
||||||
|
finally:
|
||||||
|
os.chdir(original_cwd)
|
||||||
|
|
||||||
|
def test_bookmark_functionality(self):
|
||||||
|
"""Test bookmark functionality"""
|
||||||
|
original_cwd = Path.cwd()
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
os.chdir(self.temp_dir)
|
||||||
|
|
||||||
|
manager = EreaderLayoutManager(
|
||||||
|
self.blocks,
|
||||||
|
self.page_size,
|
||||||
|
self.document_id
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add bookmark
|
||||||
|
success = manager.add_bookmark("test_bookmark")
|
||||||
|
self.assertTrue(success)
|
||||||
|
|
||||||
|
# List bookmarks
|
||||||
|
bookmarks = manager.list_bookmarks()
|
||||||
|
self.assertEqual(len(bookmarks), 1)
|
||||||
|
self.assertEqual(bookmarks[0][0], "test_bookmark")
|
||||||
|
|
||||||
|
# Jump to bookmark (should work even though it's the same position)
|
||||||
|
page = manager.jump_to_bookmark("test_bookmark")
|
||||||
|
self.assertIsNotNone(page)
|
||||||
|
|
||||||
|
# Remove bookmark
|
||||||
|
success = manager.remove_bookmark("test_bookmark")
|
||||||
|
self.assertTrue(success)
|
||||||
|
|
||||||
|
manager.shutdown()
|
||||||
|
finally:
|
||||||
|
os.chdir(original_cwd)
|
||||||
|
|
||||||
|
def test_progress_tracking(self):
|
||||||
|
"""Test reading progress tracking"""
|
||||||
|
original_cwd = Path.cwd()
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
os.chdir(self.temp_dir)
|
||||||
|
|
||||||
|
manager = EreaderLayoutManager(
|
||||||
|
self.blocks,
|
||||||
|
self.page_size,
|
||||||
|
self.document_id
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initial progress should be 0
|
||||||
|
progress = manager.get_reading_progress()
|
||||||
|
self.assertGreaterEqual(progress, 0.0)
|
||||||
|
self.assertLessEqual(progress, 1.0)
|
||||||
|
|
||||||
|
# Get position info
|
||||||
|
info = manager.get_position_info()
|
||||||
|
self.assertIn('position', info)
|
||||||
|
self.assertIn('progress', info)
|
||||||
|
self.assertIn('font_scale', info)
|
||||||
|
|
||||||
|
manager.shutdown()
|
||||||
|
finally:
|
||||||
|
os.chdir(original_cwd)
|
||||||
|
|
||||||
|
def test_convenience_function(self):
|
||||||
|
"""Test the convenience function"""
|
||||||
|
original_cwd = Path.cwd()
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
os.chdir(self.temp_dir)
|
||||||
|
|
||||||
|
manager = create_ereader_manager(
|
||||||
|
self.blocks,
|
||||||
|
self.page_size,
|
||||||
|
self.document_id
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertIsInstance(manager, EreaderLayoutManager)
|
||||||
|
self.assertEqual(manager.page_size, self.page_size)
|
||||||
|
|
||||||
|
manager.shutdown()
|
||||||
|
finally:
|
||||||
|
os.chdir(original_cwd)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
578
tests/layout/test_recursive_position.py
Normal file
578
tests/layout/test_recursive_position.py
Normal file
@ -0,0 +1,578 @@
|
|||||||
|
"""
|
||||||
|
Unit tests for the recursive position system.
|
||||||
|
Tests the hierarchical position tracking that can reference any nested content structure.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from pyWebLayout.layout.recursive_position import (
|
||||||
|
ContentType, LocationNode, RecursivePosition, PositionBuilder, PositionStorage,
|
||||||
|
create_word_position, create_image_position, create_table_cell_position, create_list_item_position
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestLocationNode(unittest.TestCase):
|
||||||
|
"""Test cases for LocationNode"""
|
||||||
|
|
||||||
|
def test_node_creation(self):
|
||||||
|
"""Test basic node creation"""
|
||||||
|
node = LocationNode(ContentType.WORD, 5, 3, {"text": "hello"})
|
||||||
|
|
||||||
|
self.assertEqual(node.content_type, ContentType.WORD)
|
||||||
|
self.assertEqual(node.index, 5)
|
||||||
|
self.assertEqual(node.offset, 3)
|
||||||
|
self.assertEqual(node.metadata["text"], "hello")
|
||||||
|
|
||||||
|
def test_node_serialization(self):
|
||||||
|
"""Test node serialization to/from dict"""
|
||||||
|
node = LocationNode(ContentType.TABLE_CELL, 2, 0, {"colspan": 2})
|
||||||
|
|
||||||
|
# Serialize
|
||||||
|
data = node.to_dict()
|
||||||
|
expected = {
|
||||||
|
'content_type': 'table_cell',
|
||||||
|
'index': 2,
|
||||||
|
'offset': 0,
|
||||||
|
'metadata': {'colspan': 2}
|
||||||
|
}
|
||||||
|
self.assertEqual(data, expected)
|
||||||
|
|
||||||
|
# Deserialize
|
||||||
|
restored = LocationNode.from_dict(data)
|
||||||
|
self.assertEqual(restored.content_type, ContentType.TABLE_CELL)
|
||||||
|
self.assertEqual(restored.index, 2)
|
||||||
|
self.assertEqual(restored.offset, 0)
|
||||||
|
self.assertEqual(restored.metadata, {'colspan': 2})
|
||||||
|
|
||||||
|
def test_node_string_representation(self):
|
||||||
|
"""Test string representation of nodes"""
|
||||||
|
node1 = LocationNode(ContentType.PARAGRAPH, 3)
|
||||||
|
self.assertEqual(str(node1), "paragraph[3]")
|
||||||
|
|
||||||
|
node2 = LocationNode(ContentType.WORD, 5, 2)
|
||||||
|
self.assertEqual(str(node2), "word[5]+2")
|
||||||
|
|
||||||
|
|
||||||
|
class TestRecursivePosition(unittest.TestCase):
|
||||||
|
"""Test cases for RecursivePosition"""
|
||||||
|
|
||||||
|
def test_position_creation(self):
|
||||||
|
"""Test basic position creation"""
|
||||||
|
pos = RecursivePosition()
|
||||||
|
|
||||||
|
# Should have document root by default
|
||||||
|
self.assertEqual(len(pos.path), 1)
|
||||||
|
self.assertEqual(pos.path[0].content_type, ContentType.DOCUMENT)
|
||||||
|
|
||||||
|
def test_position_building(self):
|
||||||
|
"""Test building complex positions"""
|
||||||
|
pos = RecursivePosition()
|
||||||
|
pos.add_node(LocationNode(ContentType.CHAPTER, 2))
|
||||||
|
pos.add_node(LocationNode(ContentType.BLOCK, 5))
|
||||||
|
pos.add_node(LocationNode(ContentType.PARAGRAPH, 0))
|
||||||
|
pos.add_node(LocationNode(ContentType.WORD, 12, 3))
|
||||||
|
|
||||||
|
self.assertEqual(len(pos.path), 5) # Including document root
|
||||||
|
self.assertEqual(pos.path[1].content_type, ContentType.CHAPTER)
|
||||||
|
self.assertEqual(pos.path[1].index, 2)
|
||||||
|
self.assertEqual(pos.path[-1].content_type, ContentType.WORD)
|
||||||
|
self.assertEqual(pos.path[-1].index, 12)
|
||||||
|
self.assertEqual(pos.path[-1].offset, 3)
|
||||||
|
|
||||||
|
def test_position_copy(self):
|
||||||
|
"""Test position copying"""
|
||||||
|
original = RecursivePosition()
|
||||||
|
original.add_node(LocationNode(ContentType.CHAPTER, 1))
|
||||||
|
original.add_node(LocationNode(ContentType.WORD, 5, 2, {"text": "test"}))
|
||||||
|
original.rendering_metadata = {"font_scale": 1.5}
|
||||||
|
|
||||||
|
copy = original.copy()
|
||||||
|
|
||||||
|
# Should be equal but not the same object
|
||||||
|
self.assertEqual(original, copy)
|
||||||
|
self.assertIsNot(original, copy)
|
||||||
|
self.assertIsNot(original.path, copy.path)
|
||||||
|
self.assertIsNot(original.rendering_metadata, copy.rendering_metadata)
|
||||||
|
|
||||||
|
# Modifying copy shouldn't affect original
|
||||||
|
copy.add_node(LocationNode(ContentType.IMAGE, 0))
|
||||||
|
self.assertNotEqual(len(original.path), len(copy.path))
|
||||||
|
|
||||||
|
def test_node_queries(self):
|
||||||
|
"""Test querying nodes by type"""
|
||||||
|
pos = RecursivePosition()
|
||||||
|
pos.add_node(LocationNode(ContentType.CHAPTER, 2))
|
||||||
|
pos.add_node(LocationNode(ContentType.BLOCK, 5))
|
||||||
|
pos.add_node(LocationNode(ContentType.TABLE, 0))
|
||||||
|
pos.add_node(LocationNode(ContentType.TABLE_ROW, 1))
|
||||||
|
pos.add_node(LocationNode(ContentType.TABLE_CELL, 2))
|
||||||
|
|
||||||
|
# Get single node
|
||||||
|
chapter_node = pos.get_node(ContentType.CHAPTER)
|
||||||
|
self.assertIsNotNone(chapter_node)
|
||||||
|
self.assertEqual(chapter_node.index, 2)
|
||||||
|
|
||||||
|
# Get non-existent node
|
||||||
|
word_node = pos.get_node(ContentType.WORD)
|
||||||
|
self.assertIsNone(word_node)
|
||||||
|
|
||||||
|
# Get multiple nodes (if there were multiple)
|
||||||
|
table_nodes = pos.get_nodes(ContentType.TABLE_ROW)
|
||||||
|
self.assertEqual(len(table_nodes), 1)
|
||||||
|
self.assertEqual(table_nodes[0].index, 1)
|
||||||
|
|
||||||
|
def test_position_hierarchy_operations(self):
|
||||||
|
"""Test ancestor/descendant relationships"""
|
||||||
|
# Create ancestor position: document -> chapter[1] -> block[2]
|
||||||
|
ancestor = RecursivePosition()
|
||||||
|
ancestor.add_node(LocationNode(ContentType.CHAPTER, 1))
|
||||||
|
ancestor.add_node(LocationNode(ContentType.BLOCK, 2))
|
||||||
|
|
||||||
|
# Create descendant position: document -> chapter[1] -> block[2] -> paragraph -> word[5]
|
||||||
|
descendant = ancestor.copy()
|
||||||
|
descendant.add_node(LocationNode(ContentType.PARAGRAPH, 0))
|
||||||
|
descendant.add_node(LocationNode(ContentType.WORD, 5))
|
||||||
|
|
||||||
|
# Create unrelated position: document -> chapter[2] -> block[1]
|
||||||
|
unrelated = RecursivePosition()
|
||||||
|
unrelated.add_node(LocationNode(ContentType.CHAPTER, 2))
|
||||||
|
unrelated.add_node(LocationNode(ContentType.BLOCK, 1))
|
||||||
|
|
||||||
|
# Test relationships
|
||||||
|
self.assertTrue(ancestor.is_ancestor_of(descendant))
|
||||||
|
self.assertTrue(descendant.is_descendant_of(ancestor))
|
||||||
|
self.assertFalse(ancestor.is_ancestor_of(unrelated))
|
||||||
|
self.assertFalse(unrelated.is_descendant_of(ancestor))
|
||||||
|
|
||||||
|
# Test common ancestor
|
||||||
|
common = ancestor.get_common_ancestor(descendant)
|
||||||
|
self.assertEqual(len(common.path), 3) # document + chapter + block
|
||||||
|
|
||||||
|
common_unrelated = ancestor.get_common_ancestor(unrelated)
|
||||||
|
self.assertEqual(len(common_unrelated.path), 1) # Only document root
|
||||||
|
|
||||||
|
def test_position_truncation(self):
|
||||||
|
"""Test truncating position to specific content type"""
|
||||||
|
pos = RecursivePosition()
|
||||||
|
pos.add_node(LocationNode(ContentType.CHAPTER, 1))
|
||||||
|
pos.add_node(LocationNode(ContentType.BLOCK, 2))
|
||||||
|
pos.add_node(LocationNode(ContentType.PARAGRAPH, 0))
|
||||||
|
pos.add_node(LocationNode(ContentType.WORD, 5))
|
||||||
|
|
||||||
|
# Truncate to block level
|
||||||
|
truncated = pos.copy().truncate_to_type(ContentType.BLOCK)
|
||||||
|
self.assertEqual(len(truncated.path), 3) # document + chapter + block
|
||||||
|
self.assertEqual(truncated.path[-1].content_type, ContentType.BLOCK)
|
||||||
|
|
||||||
|
def test_position_serialization(self):
|
||||||
|
"""Test position serialization to/from dict and JSON"""
|
||||||
|
pos = RecursivePosition()
|
||||||
|
pos.add_node(LocationNode(ContentType.CHAPTER, 2))
|
||||||
|
pos.add_node(LocationNode(ContentType.WORD, 5, 3, {"text": "hello"}))
|
||||||
|
pos.rendering_metadata = {"font_scale": 1.5, "page_size": [800, 600]}
|
||||||
|
|
||||||
|
# Test dict serialization
|
||||||
|
data = pos.to_dict()
|
||||||
|
restored = RecursivePosition.from_dict(data)
|
||||||
|
self.assertEqual(pos, restored)
|
||||||
|
|
||||||
|
# Test JSON serialization
|
||||||
|
json_str = pos.to_json()
|
||||||
|
restored_json = RecursivePosition.from_json(json_str)
|
||||||
|
self.assertEqual(pos, restored_json)
|
||||||
|
|
||||||
|
def test_position_equality_and_hashing(self):
|
||||||
|
"""Test position equality and hashing"""
|
||||||
|
pos1 = RecursivePosition()
|
||||||
|
pos1.add_node(LocationNode(ContentType.CHAPTER, 1))
|
||||||
|
pos1.add_node(LocationNode(ContentType.WORD, 5))
|
||||||
|
|
||||||
|
pos2 = RecursivePosition()
|
||||||
|
pos2.add_node(LocationNode(ContentType.CHAPTER, 1))
|
||||||
|
pos2.add_node(LocationNode(ContentType.WORD, 5))
|
||||||
|
|
||||||
|
pos3 = RecursivePosition()
|
||||||
|
pos3.add_node(LocationNode(ContentType.CHAPTER, 1))
|
||||||
|
pos3.add_node(LocationNode(ContentType.WORD, 6)) # Different word
|
||||||
|
|
||||||
|
# Test equality
|
||||||
|
self.assertEqual(pos1, pos2)
|
||||||
|
self.assertNotEqual(pos1, pos3)
|
||||||
|
|
||||||
|
# Test hashing (should be able to use as dict keys)
|
||||||
|
position_dict = {pos1: "value1", pos3: "value2"}
|
||||||
|
self.assertEqual(position_dict[pos2], "value1") # pos2 should hash same as pos1
|
||||||
|
|
||||||
|
def test_string_representation(self):
|
||||||
|
"""Test human-readable string representation"""
|
||||||
|
pos = RecursivePosition()
|
||||||
|
pos.add_node(LocationNode(ContentType.CHAPTER, 2))
|
||||||
|
pos.add_node(LocationNode(ContentType.BLOCK, 5))
|
||||||
|
pos.add_node(LocationNode(ContentType.WORD, 12, 3))
|
||||||
|
|
||||||
|
expected = "document[0] -> chapter[2] -> block[5] -> word[12]+3"
|
||||||
|
self.assertEqual(str(pos), expected)
|
||||||
|
|
||||||
|
|
||||||
|
class TestPositionBuilder(unittest.TestCase):
|
||||||
|
"""Test cases for PositionBuilder"""
|
||||||
|
|
||||||
|
def test_fluent_building(self):
|
||||||
|
"""Test fluent interface for building positions"""
|
||||||
|
pos = (PositionBuilder()
|
||||||
|
.chapter(2)
|
||||||
|
.block(5)
|
||||||
|
.paragraph()
|
||||||
|
.word(12, offset=3)
|
||||||
|
.with_rendering_metadata(font_scale=1.5, page_size=[800, 600])
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Check path structure
|
||||||
|
self.assertEqual(len(pos.path), 5) # document + chapter + block + paragraph + word
|
||||||
|
self.assertEqual(pos.path[1].content_type, ContentType.CHAPTER)
|
||||||
|
self.assertEqual(pos.path[1].index, 2)
|
||||||
|
self.assertEqual(pos.path[-1].content_type, ContentType.WORD)
|
||||||
|
self.assertEqual(pos.path[-1].index, 12)
|
||||||
|
self.assertEqual(pos.path[-1].offset, 3)
|
||||||
|
|
||||||
|
# Check rendering metadata
|
||||||
|
self.assertEqual(pos.rendering_metadata["font_scale"], 1.5)
|
||||||
|
self.assertEqual(pos.rendering_metadata["page_size"], [800, 600])
|
||||||
|
|
||||||
|
def test_table_building(self):
|
||||||
|
"""Test building table cell positions"""
|
||||||
|
pos = (PositionBuilder()
|
||||||
|
.chapter(1)
|
||||||
|
.block(3)
|
||||||
|
.table()
|
||||||
|
.table_row(2)
|
||||||
|
.table_cell(1)
|
||||||
|
.word(0)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Verify table structure
|
||||||
|
table_node = pos.get_node(ContentType.TABLE)
|
||||||
|
row_node = pos.get_node(ContentType.TABLE_ROW)
|
||||||
|
cell_node = pos.get_node(ContentType.TABLE_CELL)
|
||||||
|
|
||||||
|
self.assertIsNotNone(table_node)
|
||||||
|
self.assertIsNotNone(row_node)
|
||||||
|
self.assertIsNotNone(cell_node)
|
||||||
|
self.assertEqual(row_node.index, 2)
|
||||||
|
self.assertEqual(cell_node.index, 1)
|
||||||
|
|
||||||
|
def test_list_building(self):
|
||||||
|
"""Test building list item positions"""
|
||||||
|
pos = (PositionBuilder()
|
||||||
|
.chapter(0)
|
||||||
|
.block(2)
|
||||||
|
.list()
|
||||||
|
.list_item(3)
|
||||||
|
.word(1)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Verify list structure
|
||||||
|
list_node = pos.get_node(ContentType.LIST)
|
||||||
|
item_node = pos.get_node(ContentType.LIST_ITEM)
|
||||||
|
|
||||||
|
self.assertIsNotNone(list_node)
|
||||||
|
self.assertIsNotNone(item_node)
|
||||||
|
self.assertEqual(item_node.index, 3)
|
||||||
|
|
||||||
|
def test_image_building(self):
|
||||||
|
"""Test building image positions"""
|
||||||
|
pos = (PositionBuilder()
|
||||||
|
.chapter(1)
|
||||||
|
.block(4)
|
||||||
|
.image(0, alt_text="Test image", width=300, height=200)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
image_node = pos.get_node(ContentType.IMAGE)
|
||||||
|
self.assertIsNotNone(image_node)
|
||||||
|
self.assertEqual(image_node.metadata["alt_text"], "Test image")
|
||||||
|
self.assertEqual(image_node.metadata["width"], 300)
|
||||||
|
|
||||||
|
|
||||||
|
class TestPositionStorage(unittest.TestCase):
|
||||||
|
"""Test cases for PositionStorage"""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
"""Set up temporary directory for testing"""
|
||||||
|
self.temp_dir = tempfile.mkdtemp()
|
||||||
|
self.storage_json = PositionStorage(self.temp_dir, use_shelf=False)
|
||||||
|
self.storage_shelf = PositionStorage(self.temp_dir, use_shelf=True)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
"""Clean up temporary directory"""
|
||||||
|
shutil.rmtree(self.temp_dir)
|
||||||
|
|
||||||
|
def test_json_storage(self):
|
||||||
|
"""Test JSON-based position storage"""
|
||||||
|
# Create test position
|
||||||
|
pos = (PositionBuilder()
|
||||||
|
.chapter(2)
|
||||||
|
.block(5)
|
||||||
|
.word(12, offset=3)
|
||||||
|
.with_rendering_metadata(font_scale=1.5)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Save position
|
||||||
|
self.storage_json.save_position("test_doc", "bookmark1", pos)
|
||||||
|
|
||||||
|
# Load position
|
||||||
|
loaded = self.storage_json.load_position("test_doc", "bookmark1")
|
||||||
|
self.assertIsNotNone(loaded)
|
||||||
|
self.assertEqual(pos, loaded)
|
||||||
|
|
||||||
|
# List positions
|
||||||
|
positions = self.storage_json.list_positions("test_doc")
|
||||||
|
self.assertIn("bookmark1", positions)
|
||||||
|
|
||||||
|
# Delete position
|
||||||
|
success = self.storage_json.delete_position("test_doc", "bookmark1")
|
||||||
|
self.assertTrue(success)
|
||||||
|
|
||||||
|
# Verify deletion
|
||||||
|
loaded_after_delete = self.storage_json.load_position("test_doc", "bookmark1")
|
||||||
|
self.assertIsNone(loaded_after_delete)
|
||||||
|
|
||||||
|
def test_shelf_storage(self):
|
||||||
|
"""Test shelf-based position storage"""
|
||||||
|
# Create test position
|
||||||
|
pos = (PositionBuilder()
|
||||||
|
.chapter(1)
|
||||||
|
.block(3)
|
||||||
|
.table()
|
||||||
|
.table_row(2)
|
||||||
|
.table_cell(1)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Save position
|
||||||
|
self.storage_shelf.save_position("test_doc", "table_pos", pos)
|
||||||
|
|
||||||
|
# Load position
|
||||||
|
loaded = self.storage_shelf.load_position("test_doc", "table_pos")
|
||||||
|
self.assertIsNotNone(loaded)
|
||||||
|
self.assertEqual(pos, loaded)
|
||||||
|
|
||||||
|
# List positions
|
||||||
|
positions = self.storage_shelf.list_positions("test_doc")
|
||||||
|
self.assertIn("table_pos", positions)
|
||||||
|
|
||||||
|
# Delete position
|
||||||
|
success = self.storage_shelf.delete_position("test_doc", "table_pos")
|
||||||
|
self.assertTrue(success)
|
||||||
|
|
||||||
|
def test_multiple_positions(self):
|
||||||
|
"""Test storing multiple positions for same document"""
|
||||||
|
pos1 = create_word_position(0, 1, 5)
|
||||||
|
pos2 = create_image_position(1, 2)
|
||||||
|
pos3 = create_table_cell_position(2, 3, 1, 2, 0)
|
||||||
|
|
||||||
|
# Save multiple positions
|
||||||
|
self.storage_json.save_position("multi_doc", "pos1", pos1)
|
||||||
|
self.storage_json.save_position("multi_doc", "pos2", pos2)
|
||||||
|
self.storage_json.save_position("multi_doc", "pos3", pos3)
|
||||||
|
|
||||||
|
# List all positions
|
||||||
|
positions = self.storage_json.list_positions("multi_doc")
|
||||||
|
self.assertEqual(len(positions), 3)
|
||||||
|
self.assertIn("pos1", positions)
|
||||||
|
self.assertIn("pos2", positions)
|
||||||
|
self.assertIn("pos3", positions)
|
||||||
|
|
||||||
|
# Load and verify each position
|
||||||
|
loaded1 = self.storage_json.load_position("multi_doc", "pos1")
|
||||||
|
loaded2 = self.storage_json.load_position("multi_doc", "pos2")
|
||||||
|
loaded3 = self.storage_json.load_position("multi_doc", "pos3")
|
||||||
|
|
||||||
|
self.assertEqual(pos1, loaded1)
|
||||||
|
self.assertEqual(pos2, loaded2)
|
||||||
|
self.assertEqual(pos3, loaded3)
|
||||||
|
|
||||||
|
|
||||||
|
class TestConvenienceFunctions(unittest.TestCase):
|
||||||
|
"""Test cases for convenience functions"""
|
||||||
|
|
||||||
|
def test_create_word_position(self):
|
||||||
|
"""Test word position creation"""
|
||||||
|
pos = create_word_position(2, 5, 12, 3)
|
||||||
|
|
||||||
|
chapter_node = pos.get_node(ContentType.CHAPTER)
|
||||||
|
block_node = pos.get_node(ContentType.BLOCK)
|
||||||
|
word_node = pos.get_node(ContentType.WORD)
|
||||||
|
|
||||||
|
self.assertEqual(chapter_node.index, 2)
|
||||||
|
self.assertEqual(block_node.index, 5)
|
||||||
|
self.assertEqual(word_node.index, 12)
|
||||||
|
self.assertEqual(word_node.offset, 3)
|
||||||
|
|
||||||
|
def test_create_image_position(self):
|
||||||
|
"""Test image position creation"""
|
||||||
|
pos = create_image_position(1, 3, 0)
|
||||||
|
|
||||||
|
chapter_node = pos.get_node(ContentType.CHAPTER)
|
||||||
|
block_node = pos.get_node(ContentType.BLOCK)
|
||||||
|
image_node = pos.get_node(ContentType.IMAGE)
|
||||||
|
|
||||||
|
self.assertEqual(chapter_node.index, 1)
|
||||||
|
self.assertEqual(block_node.index, 3)
|
||||||
|
self.assertEqual(image_node.index, 0)
|
||||||
|
|
||||||
|
def test_create_table_cell_position(self):
|
||||||
|
"""Test table cell position creation"""
|
||||||
|
pos = create_table_cell_position(0, 2, 1, 3, 5)
|
||||||
|
|
||||||
|
chapter_node = pos.get_node(ContentType.CHAPTER)
|
||||||
|
block_node = pos.get_node(ContentType.BLOCK)
|
||||||
|
table_node = pos.get_node(ContentType.TABLE)
|
||||||
|
row_node = pos.get_node(ContentType.TABLE_ROW)
|
||||||
|
cell_node = pos.get_node(ContentType.TABLE_CELL)
|
||||||
|
word_node = pos.get_node(ContentType.WORD)
|
||||||
|
|
||||||
|
self.assertEqual(chapter_node.index, 0)
|
||||||
|
self.assertEqual(block_node.index, 2)
|
||||||
|
self.assertEqual(row_node.index, 1)
|
||||||
|
self.assertEqual(cell_node.index, 3)
|
||||||
|
self.assertEqual(word_node.index, 5)
|
||||||
|
|
||||||
|
def test_create_list_item_position(self):
|
||||||
|
"""Test list item position creation"""
|
||||||
|
pos = create_list_item_position(1, 4, 2, 7)
|
||||||
|
|
||||||
|
chapter_node = pos.get_node(ContentType.CHAPTER)
|
||||||
|
block_node = pos.get_node(ContentType.BLOCK)
|
||||||
|
list_node = pos.get_node(ContentType.LIST)
|
||||||
|
item_node = pos.get_node(ContentType.LIST_ITEM)
|
||||||
|
word_node = pos.get_node(ContentType.WORD)
|
||||||
|
|
||||||
|
self.assertEqual(chapter_node.index, 1)
|
||||||
|
self.assertEqual(block_node.index, 4)
|
||||||
|
self.assertEqual(item_node.index, 2)
|
||||||
|
self.assertEqual(word_node.index, 7)
|
||||||
|
|
||||||
|
|
||||||
|
class TestRealWorldScenarios(unittest.TestCase):
|
||||||
|
"""Test cases for real-world usage scenarios"""
|
||||||
|
|
||||||
|
def test_ereader_bookmark_scenario(self):
|
||||||
|
"""Test typical ereader bookmark usage"""
|
||||||
|
# User is reading chapter 3, paragraph 2, word 15, character 5
|
||||||
|
reading_pos = (PositionBuilder()
|
||||||
|
.chapter(3)
|
||||||
|
.block(8) # Block 8 in chapter 3
|
||||||
|
.paragraph()
|
||||||
|
.word(15, offset=5)
|
||||||
|
.with_rendering_metadata(
|
||||||
|
font_scale=1.2,
|
||||||
|
page_size=[600, 800],
|
||||||
|
theme="dark"
|
||||||
|
)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Save as bookmark
|
||||||
|
storage = PositionStorage(use_shelf=False)
|
||||||
|
storage.save_position("my_novel", "chapter3_climax", reading_pos)
|
||||||
|
|
||||||
|
# Later, load bookmark
|
||||||
|
loaded_pos = storage.load_position("my_novel", "chapter3_climax")
|
||||||
|
self.assertEqual(reading_pos, loaded_pos)
|
||||||
|
|
||||||
|
# Verify we can extract the reading context
|
||||||
|
chapter_node = loaded_pos.get_node(ContentType.CHAPTER)
|
||||||
|
word_node = loaded_pos.get_node(ContentType.WORD)
|
||||||
|
|
||||||
|
self.assertEqual(chapter_node.index, 3)
|
||||||
|
self.assertEqual(word_node.index, 15)
|
||||||
|
self.assertEqual(word_node.offset, 5)
|
||||||
|
self.assertEqual(loaded_pos.rendering_metadata["font_scale"], 1.2)
|
||||||
|
|
||||||
|
def test_table_navigation_scenario(self):
|
||||||
|
"""Test navigating within a complex table"""
|
||||||
|
# User is in a table: chapter 2, table block 5, row 3, cell 2, word 1
|
||||||
|
table_pos = (PositionBuilder()
|
||||||
|
.chapter(2)
|
||||||
|
.block(5)
|
||||||
|
.table(0, table_type="data", columns=4, rows=10)
|
||||||
|
.table_row(3, row_type="data")
|
||||||
|
.table_cell(2, cell_type="data", colspan=1)
|
||||||
|
.word(1)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Navigate to next cell (same row, next column)
|
||||||
|
next_cell_pos = table_pos.copy()
|
||||||
|
cell_node = next_cell_pos.get_node(ContentType.TABLE_CELL)
|
||||||
|
cell_node.index = 3 # Move to next column
|
||||||
|
word_node = next_cell_pos.get_node(ContentType.WORD)
|
||||||
|
word_node.index = 0 # Reset to first word in new cell
|
||||||
|
|
||||||
|
# Verify positions are different but related
|
||||||
|
self.assertNotEqual(table_pos, next_cell_pos)
|
||||||
|
|
||||||
|
# They should share common ancestor up to table row level
|
||||||
|
common = table_pos.get_common_ancestor(next_cell_pos)
|
||||||
|
row_node = common.get_node(ContentType.TABLE_ROW)
|
||||||
|
self.assertIsNotNone(row_node)
|
||||||
|
self.assertEqual(row_node.index, 3)
|
||||||
|
|
||||||
|
def test_multi_level_list_scenario(self):
|
||||||
|
"""Test navigating nested lists"""
|
||||||
|
# Position in nested list: chapter 1, list block 3, item 2, sub-list, sub-item 1, word 3
|
||||||
|
nested_pos = (PositionBuilder()
|
||||||
|
.chapter(1)
|
||||||
|
.block(3)
|
||||||
|
.list(0, list_type="ordered")
|
||||||
|
.list_item(2)
|
||||||
|
.list(1, list_type="unordered") # Nested list
|
||||||
|
.list_item(1)
|
||||||
|
.word(3)
|
||||||
|
.build())
|
||||||
|
|
||||||
|
# Verify we can distinguish between the two list levels
|
||||||
|
list_nodes = nested_pos.get_nodes(ContentType.LIST)
|
||||||
|
self.assertEqual(len(list_nodes), 2)
|
||||||
|
self.assertEqual(list_nodes[0].index, 0) # Outer list
|
||||||
|
self.assertEqual(list_nodes[1].index, 1) # Inner list
|
||||||
|
|
||||||
|
# Verify list item hierarchy
|
||||||
|
item_nodes = nested_pos.get_nodes(ContentType.LIST_ITEM)
|
||||||
|
self.assertEqual(len(item_nodes), 2)
|
||||||
|
self.assertEqual(item_nodes[0].index, 2) # Outer item
|
||||||
|
self.assertEqual(item_nodes[1].index, 1) # Inner item
|
||||||
|
|
||||||
|
def test_position_comparison_and_sorting(self):
|
||||||
|
"""Test comparing positions for sorting/ordering"""
|
||||||
|
# Create positions at different locations
|
||||||
|
pos1 = create_word_position(1, 2, 5) # Chapter 1, block 2, word 5
|
||||||
|
pos2 = create_word_position(1, 2, 10) # Chapter 1, block 2, word 10
|
||||||
|
pos3 = create_word_position(1, 3, 1) # Chapter 1, block 3, word 1
|
||||||
|
pos4 = create_word_position(2, 1, 1) # Chapter 2, block 1, word 1
|
||||||
|
|
||||||
|
positions = [pos4, pos2, pos1, pos3] # Unsorted
|
||||||
|
|
||||||
|
# For proper sorting, we'd need to implement comparison operators
|
||||||
|
# For now, we can test that positions are distinguishable
|
||||||
|
unique_positions = set(positions)
|
||||||
|
self.assertEqual(len(unique_positions), 4)
|
||||||
|
|
||||||
|
# Test that we can find common ancestors
|
||||||
|
common_12 = pos1.get_common_ancestor(pos2)
|
||||||
|
common_13 = pos1.get_common_ancestor(pos3)
|
||||||
|
common_14 = pos1.get_common_ancestor(pos4)
|
||||||
|
|
||||||
|
# pos1 and pos2 share paragraph-level ancestor (same chapter, block, paragraph)
|
||||||
|
self.assertEqual(len(common_12.path), 4) # document + chapter + block + paragraph
|
||||||
|
|
||||||
|
# pos1 and pos3 share chapter-level ancestor (same chapter, different blocks)
|
||||||
|
self.assertEqual(len(common_13.path), 2) # document + chapter
|
||||||
|
|
||||||
|
# pos1 and pos4 share only document-level ancestor (different chapters)
|
||||||
|
self.assertEqual(len(common_14.path), 1) # document only
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
@ -5,7 +5,8 @@ Tests the Font class and style enums for proper functionality and immutability.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
from pyWebLayout.style import Font, FontStyle, FontWeight, TextDecoration, Alignment
|
from pyWebLayout.style import Font, FontStyle, FontWeight, TextDecoration
|
||||||
|
from pyWebLayout.style import Alignment
|
||||||
|
|
||||||
|
|
||||||
class TestStyleObjects(unittest.TestCase):
|
class TestStyleObjects(unittest.TestCase):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user