pyWebLayout/tests/layout/test_recursive_position.py
Duncan Tourolle 65ab46556f
Some checks failed
Python CI / test (push) Failing after 3m55s
big update with ok rendering
2025-08-27 22:22:54 +02:00

579 lines
23 KiB
Python

"""
Unit tests for the recursive position system.
Tests the hierarchical position tracking that can reference any nested content structure.
"""
import unittest
import tempfile
import shutil
import json
from pathlib import Path
from pyWebLayout.layout.recursive_position import (
ContentType, LocationNode, RecursivePosition, PositionBuilder, PositionStorage,
create_word_position, create_image_position, create_table_cell_position, create_list_item_position
)
class TestLocationNode(unittest.TestCase):
"""Test cases for LocationNode"""
def test_node_creation(self):
"""Test basic node creation"""
node = LocationNode(ContentType.WORD, 5, 3, {"text": "hello"})
self.assertEqual(node.content_type, ContentType.WORD)
self.assertEqual(node.index, 5)
self.assertEqual(node.offset, 3)
self.assertEqual(node.metadata["text"], "hello")
def test_node_serialization(self):
"""Test node serialization to/from dict"""
node = LocationNode(ContentType.TABLE_CELL, 2, 0, {"colspan": 2})
# Serialize
data = node.to_dict()
expected = {
'content_type': 'table_cell',
'index': 2,
'offset': 0,
'metadata': {'colspan': 2}
}
self.assertEqual(data, expected)
# Deserialize
restored = LocationNode.from_dict(data)
self.assertEqual(restored.content_type, ContentType.TABLE_CELL)
self.assertEqual(restored.index, 2)
self.assertEqual(restored.offset, 0)
self.assertEqual(restored.metadata, {'colspan': 2})
def test_node_string_representation(self):
"""Test string representation of nodes"""
node1 = LocationNode(ContentType.PARAGRAPH, 3)
self.assertEqual(str(node1), "paragraph[3]")
node2 = LocationNode(ContentType.WORD, 5, 2)
self.assertEqual(str(node2), "word[5]+2")
class TestRecursivePosition(unittest.TestCase):
"""Test cases for RecursivePosition"""
def test_position_creation(self):
"""Test basic position creation"""
pos = RecursivePosition()
# Should have document root by default
self.assertEqual(len(pos.path), 1)
self.assertEqual(pos.path[0].content_type, ContentType.DOCUMENT)
def test_position_building(self):
"""Test building complex positions"""
pos = RecursivePosition()
pos.add_node(LocationNode(ContentType.CHAPTER, 2))
pos.add_node(LocationNode(ContentType.BLOCK, 5))
pos.add_node(LocationNode(ContentType.PARAGRAPH, 0))
pos.add_node(LocationNode(ContentType.WORD, 12, 3))
self.assertEqual(len(pos.path), 5) # Including document root
self.assertEqual(pos.path[1].content_type, ContentType.CHAPTER)
self.assertEqual(pos.path[1].index, 2)
self.assertEqual(pos.path[-1].content_type, ContentType.WORD)
self.assertEqual(pos.path[-1].index, 12)
self.assertEqual(pos.path[-1].offset, 3)
def test_position_copy(self):
"""Test position copying"""
original = RecursivePosition()
original.add_node(LocationNode(ContentType.CHAPTER, 1))
original.add_node(LocationNode(ContentType.WORD, 5, 2, {"text": "test"}))
original.rendering_metadata = {"font_scale": 1.5}
copy = original.copy()
# Should be equal but not the same object
self.assertEqual(original, copy)
self.assertIsNot(original, copy)
self.assertIsNot(original.path, copy.path)
self.assertIsNot(original.rendering_metadata, copy.rendering_metadata)
# Modifying copy shouldn't affect original
copy.add_node(LocationNode(ContentType.IMAGE, 0))
self.assertNotEqual(len(original.path), len(copy.path))
def test_node_queries(self):
"""Test querying nodes by type"""
pos = RecursivePosition()
pos.add_node(LocationNode(ContentType.CHAPTER, 2))
pos.add_node(LocationNode(ContentType.BLOCK, 5))
pos.add_node(LocationNode(ContentType.TABLE, 0))
pos.add_node(LocationNode(ContentType.TABLE_ROW, 1))
pos.add_node(LocationNode(ContentType.TABLE_CELL, 2))
# Get single node
chapter_node = pos.get_node(ContentType.CHAPTER)
self.assertIsNotNone(chapter_node)
self.assertEqual(chapter_node.index, 2)
# Get non-existent node
word_node = pos.get_node(ContentType.WORD)
self.assertIsNone(word_node)
# Get multiple nodes (if there were multiple)
table_nodes = pos.get_nodes(ContentType.TABLE_ROW)
self.assertEqual(len(table_nodes), 1)
self.assertEqual(table_nodes[0].index, 1)
def test_position_hierarchy_operations(self):
"""Test ancestor/descendant relationships"""
# Create ancestor position: document -> chapter[1] -> block[2]
ancestor = RecursivePosition()
ancestor.add_node(LocationNode(ContentType.CHAPTER, 1))
ancestor.add_node(LocationNode(ContentType.BLOCK, 2))
# Create descendant position: document -> chapter[1] -> block[2] -> paragraph -> word[5]
descendant = ancestor.copy()
descendant.add_node(LocationNode(ContentType.PARAGRAPH, 0))
descendant.add_node(LocationNode(ContentType.WORD, 5))
# Create unrelated position: document -> chapter[2] -> block[1]
unrelated = RecursivePosition()
unrelated.add_node(LocationNode(ContentType.CHAPTER, 2))
unrelated.add_node(LocationNode(ContentType.BLOCK, 1))
# Test relationships
self.assertTrue(ancestor.is_ancestor_of(descendant))
self.assertTrue(descendant.is_descendant_of(ancestor))
self.assertFalse(ancestor.is_ancestor_of(unrelated))
self.assertFalse(unrelated.is_descendant_of(ancestor))
# Test common ancestor
common = ancestor.get_common_ancestor(descendant)
self.assertEqual(len(common.path), 3) # document + chapter + block
common_unrelated = ancestor.get_common_ancestor(unrelated)
self.assertEqual(len(common_unrelated.path), 1) # Only document root
def test_position_truncation(self):
"""Test truncating position to specific content type"""
pos = RecursivePosition()
pos.add_node(LocationNode(ContentType.CHAPTER, 1))
pos.add_node(LocationNode(ContentType.BLOCK, 2))
pos.add_node(LocationNode(ContentType.PARAGRAPH, 0))
pos.add_node(LocationNode(ContentType.WORD, 5))
# Truncate to block level
truncated = pos.copy().truncate_to_type(ContentType.BLOCK)
self.assertEqual(len(truncated.path), 3) # document + chapter + block
self.assertEqual(truncated.path[-1].content_type, ContentType.BLOCK)
def test_position_serialization(self):
"""Test position serialization to/from dict and JSON"""
pos = RecursivePosition()
pos.add_node(LocationNode(ContentType.CHAPTER, 2))
pos.add_node(LocationNode(ContentType.WORD, 5, 3, {"text": "hello"}))
pos.rendering_metadata = {"font_scale": 1.5, "page_size": [800, 600]}
# Test dict serialization
data = pos.to_dict()
restored = RecursivePosition.from_dict(data)
self.assertEqual(pos, restored)
# Test JSON serialization
json_str = pos.to_json()
restored_json = RecursivePosition.from_json(json_str)
self.assertEqual(pos, restored_json)
def test_position_equality_and_hashing(self):
"""Test position equality and hashing"""
pos1 = RecursivePosition()
pos1.add_node(LocationNode(ContentType.CHAPTER, 1))
pos1.add_node(LocationNode(ContentType.WORD, 5))
pos2 = RecursivePosition()
pos2.add_node(LocationNode(ContentType.CHAPTER, 1))
pos2.add_node(LocationNode(ContentType.WORD, 5))
pos3 = RecursivePosition()
pos3.add_node(LocationNode(ContentType.CHAPTER, 1))
pos3.add_node(LocationNode(ContentType.WORD, 6)) # Different word
# Test equality
self.assertEqual(pos1, pos2)
self.assertNotEqual(pos1, pos3)
# Test hashing (should be able to use as dict keys)
position_dict = {pos1: "value1", pos3: "value2"}
self.assertEqual(position_dict[pos2], "value1") # pos2 should hash same as pos1
def test_string_representation(self):
"""Test human-readable string representation"""
pos = RecursivePosition()
pos.add_node(LocationNode(ContentType.CHAPTER, 2))
pos.add_node(LocationNode(ContentType.BLOCK, 5))
pos.add_node(LocationNode(ContentType.WORD, 12, 3))
expected = "document[0] -> chapter[2] -> block[5] -> word[12]+3"
self.assertEqual(str(pos), expected)
class TestPositionBuilder(unittest.TestCase):
"""Test cases for PositionBuilder"""
def test_fluent_building(self):
"""Test fluent interface for building positions"""
pos = (PositionBuilder()
.chapter(2)
.block(5)
.paragraph()
.word(12, offset=3)
.with_rendering_metadata(font_scale=1.5, page_size=[800, 600])
.build())
# Check path structure
self.assertEqual(len(pos.path), 5) # document + chapter + block + paragraph + word
self.assertEqual(pos.path[1].content_type, ContentType.CHAPTER)
self.assertEqual(pos.path[1].index, 2)
self.assertEqual(pos.path[-1].content_type, ContentType.WORD)
self.assertEqual(pos.path[-1].index, 12)
self.assertEqual(pos.path[-1].offset, 3)
# Check rendering metadata
self.assertEqual(pos.rendering_metadata["font_scale"], 1.5)
self.assertEqual(pos.rendering_metadata["page_size"], [800, 600])
def test_table_building(self):
"""Test building table cell positions"""
pos = (PositionBuilder()
.chapter(1)
.block(3)
.table()
.table_row(2)
.table_cell(1)
.word(0)
.build())
# Verify table structure
table_node = pos.get_node(ContentType.TABLE)
row_node = pos.get_node(ContentType.TABLE_ROW)
cell_node = pos.get_node(ContentType.TABLE_CELL)
self.assertIsNotNone(table_node)
self.assertIsNotNone(row_node)
self.assertIsNotNone(cell_node)
self.assertEqual(row_node.index, 2)
self.assertEqual(cell_node.index, 1)
def test_list_building(self):
"""Test building list item positions"""
pos = (PositionBuilder()
.chapter(0)
.block(2)
.list()
.list_item(3)
.word(1)
.build())
# Verify list structure
list_node = pos.get_node(ContentType.LIST)
item_node = pos.get_node(ContentType.LIST_ITEM)
self.assertIsNotNone(list_node)
self.assertIsNotNone(item_node)
self.assertEqual(item_node.index, 3)
def test_image_building(self):
"""Test building image positions"""
pos = (PositionBuilder()
.chapter(1)
.block(4)
.image(0, alt_text="Test image", width=300, height=200)
.build())
image_node = pos.get_node(ContentType.IMAGE)
self.assertIsNotNone(image_node)
self.assertEqual(image_node.metadata["alt_text"], "Test image")
self.assertEqual(image_node.metadata["width"], 300)
class TestPositionStorage(unittest.TestCase):
"""Test cases for PositionStorage"""
def setUp(self):
"""Set up temporary directory for testing"""
self.temp_dir = tempfile.mkdtemp()
self.storage_json = PositionStorage(self.temp_dir, use_shelf=False)
self.storage_shelf = PositionStorage(self.temp_dir, use_shelf=True)
def tearDown(self):
"""Clean up temporary directory"""
shutil.rmtree(self.temp_dir)
def test_json_storage(self):
"""Test JSON-based position storage"""
# Create test position
pos = (PositionBuilder()
.chapter(2)
.block(5)
.word(12, offset=3)
.with_rendering_metadata(font_scale=1.5)
.build())
# Save position
self.storage_json.save_position("test_doc", "bookmark1", pos)
# Load position
loaded = self.storage_json.load_position("test_doc", "bookmark1")
self.assertIsNotNone(loaded)
self.assertEqual(pos, loaded)
# List positions
positions = self.storage_json.list_positions("test_doc")
self.assertIn("bookmark1", positions)
# Delete position
success = self.storage_json.delete_position("test_doc", "bookmark1")
self.assertTrue(success)
# Verify deletion
loaded_after_delete = self.storage_json.load_position("test_doc", "bookmark1")
self.assertIsNone(loaded_after_delete)
def test_shelf_storage(self):
"""Test shelf-based position storage"""
# Create test position
pos = (PositionBuilder()
.chapter(1)
.block(3)
.table()
.table_row(2)
.table_cell(1)
.build())
# Save position
self.storage_shelf.save_position("test_doc", "table_pos", pos)
# Load position
loaded = self.storage_shelf.load_position("test_doc", "table_pos")
self.assertIsNotNone(loaded)
self.assertEqual(pos, loaded)
# List positions
positions = self.storage_shelf.list_positions("test_doc")
self.assertIn("table_pos", positions)
# Delete position
success = self.storage_shelf.delete_position("test_doc", "table_pos")
self.assertTrue(success)
def test_multiple_positions(self):
"""Test storing multiple positions for same document"""
pos1 = create_word_position(0, 1, 5)
pos2 = create_image_position(1, 2)
pos3 = create_table_cell_position(2, 3, 1, 2, 0)
# Save multiple positions
self.storage_json.save_position("multi_doc", "pos1", pos1)
self.storage_json.save_position("multi_doc", "pos2", pos2)
self.storage_json.save_position("multi_doc", "pos3", pos3)
# List all positions
positions = self.storage_json.list_positions("multi_doc")
self.assertEqual(len(positions), 3)
self.assertIn("pos1", positions)
self.assertIn("pos2", positions)
self.assertIn("pos3", positions)
# Load and verify each position
loaded1 = self.storage_json.load_position("multi_doc", "pos1")
loaded2 = self.storage_json.load_position("multi_doc", "pos2")
loaded3 = self.storage_json.load_position("multi_doc", "pos3")
self.assertEqual(pos1, loaded1)
self.assertEqual(pos2, loaded2)
self.assertEqual(pos3, loaded3)
class TestConvenienceFunctions(unittest.TestCase):
"""Test cases for convenience functions"""
def test_create_word_position(self):
"""Test word position creation"""
pos = create_word_position(2, 5, 12, 3)
chapter_node = pos.get_node(ContentType.CHAPTER)
block_node = pos.get_node(ContentType.BLOCK)
word_node = pos.get_node(ContentType.WORD)
self.assertEqual(chapter_node.index, 2)
self.assertEqual(block_node.index, 5)
self.assertEqual(word_node.index, 12)
self.assertEqual(word_node.offset, 3)
def test_create_image_position(self):
"""Test image position creation"""
pos = create_image_position(1, 3, 0)
chapter_node = pos.get_node(ContentType.CHAPTER)
block_node = pos.get_node(ContentType.BLOCK)
image_node = pos.get_node(ContentType.IMAGE)
self.assertEqual(chapter_node.index, 1)
self.assertEqual(block_node.index, 3)
self.assertEqual(image_node.index, 0)
def test_create_table_cell_position(self):
"""Test table cell position creation"""
pos = create_table_cell_position(0, 2, 1, 3, 5)
chapter_node = pos.get_node(ContentType.CHAPTER)
block_node = pos.get_node(ContentType.BLOCK)
table_node = pos.get_node(ContentType.TABLE)
row_node = pos.get_node(ContentType.TABLE_ROW)
cell_node = pos.get_node(ContentType.TABLE_CELL)
word_node = pos.get_node(ContentType.WORD)
self.assertEqual(chapter_node.index, 0)
self.assertEqual(block_node.index, 2)
self.assertEqual(row_node.index, 1)
self.assertEqual(cell_node.index, 3)
self.assertEqual(word_node.index, 5)
def test_create_list_item_position(self):
"""Test list item position creation"""
pos = create_list_item_position(1, 4, 2, 7)
chapter_node = pos.get_node(ContentType.CHAPTER)
block_node = pos.get_node(ContentType.BLOCK)
list_node = pos.get_node(ContentType.LIST)
item_node = pos.get_node(ContentType.LIST_ITEM)
word_node = pos.get_node(ContentType.WORD)
self.assertEqual(chapter_node.index, 1)
self.assertEqual(block_node.index, 4)
self.assertEqual(item_node.index, 2)
self.assertEqual(word_node.index, 7)
class TestRealWorldScenarios(unittest.TestCase):
"""Test cases for real-world usage scenarios"""
def test_ereader_bookmark_scenario(self):
"""Test typical ereader bookmark usage"""
# User is reading chapter 3, paragraph 2, word 15, character 5
reading_pos = (PositionBuilder()
.chapter(3)
.block(8) # Block 8 in chapter 3
.paragraph()
.word(15, offset=5)
.with_rendering_metadata(
font_scale=1.2,
page_size=[600, 800],
theme="dark"
)
.build())
# Save as bookmark
storage = PositionStorage(use_shelf=False)
storage.save_position("my_novel", "chapter3_climax", reading_pos)
# Later, load bookmark
loaded_pos = storage.load_position("my_novel", "chapter3_climax")
self.assertEqual(reading_pos, loaded_pos)
# Verify we can extract the reading context
chapter_node = loaded_pos.get_node(ContentType.CHAPTER)
word_node = loaded_pos.get_node(ContentType.WORD)
self.assertEqual(chapter_node.index, 3)
self.assertEqual(word_node.index, 15)
self.assertEqual(word_node.offset, 5)
self.assertEqual(loaded_pos.rendering_metadata["font_scale"], 1.2)
def test_table_navigation_scenario(self):
"""Test navigating within a complex table"""
# User is in a table: chapter 2, table block 5, row 3, cell 2, word 1
table_pos = (PositionBuilder()
.chapter(2)
.block(5)
.table(0, table_type="data", columns=4, rows=10)
.table_row(3, row_type="data")
.table_cell(2, cell_type="data", colspan=1)
.word(1)
.build())
# Navigate to next cell (same row, next column)
next_cell_pos = table_pos.copy()
cell_node = next_cell_pos.get_node(ContentType.TABLE_CELL)
cell_node.index = 3 # Move to next column
word_node = next_cell_pos.get_node(ContentType.WORD)
word_node.index = 0 # Reset to first word in new cell
# Verify positions are different but related
self.assertNotEqual(table_pos, next_cell_pos)
# They should share common ancestor up to table row level
common = table_pos.get_common_ancestor(next_cell_pos)
row_node = common.get_node(ContentType.TABLE_ROW)
self.assertIsNotNone(row_node)
self.assertEqual(row_node.index, 3)
def test_multi_level_list_scenario(self):
"""Test navigating nested lists"""
# Position in nested list: chapter 1, list block 3, item 2, sub-list, sub-item 1, word 3
nested_pos = (PositionBuilder()
.chapter(1)
.block(3)
.list(0, list_type="ordered")
.list_item(2)
.list(1, list_type="unordered") # Nested list
.list_item(1)
.word(3)
.build())
# Verify we can distinguish between the two list levels
list_nodes = nested_pos.get_nodes(ContentType.LIST)
self.assertEqual(len(list_nodes), 2)
self.assertEqual(list_nodes[0].index, 0) # Outer list
self.assertEqual(list_nodes[1].index, 1) # Inner list
# Verify list item hierarchy
item_nodes = nested_pos.get_nodes(ContentType.LIST_ITEM)
self.assertEqual(len(item_nodes), 2)
self.assertEqual(item_nodes[0].index, 2) # Outer item
self.assertEqual(item_nodes[1].index, 1) # Inner item
def test_position_comparison_and_sorting(self):
"""Test comparing positions for sorting/ordering"""
# Create positions at different locations
pos1 = create_word_position(1, 2, 5) # Chapter 1, block 2, word 5
pos2 = create_word_position(1, 2, 10) # Chapter 1, block 2, word 10
pos3 = create_word_position(1, 3, 1) # Chapter 1, block 3, word 1
pos4 = create_word_position(2, 1, 1) # Chapter 2, block 1, word 1
positions = [pos4, pos2, pos1, pos3] # Unsorted
# For proper sorting, we'd need to implement comparison operators
# For now, we can test that positions are distinguishable
unique_positions = set(positions)
self.assertEqual(len(unique_positions), 4)
# Test that we can find common ancestors
common_12 = pos1.get_common_ancestor(pos2)
common_13 = pos1.get_common_ancestor(pos3)
common_14 = pos1.get_common_ancestor(pos4)
# pos1 and pos2 share paragraph-level ancestor (same chapter, block, paragraph)
self.assertEqual(len(common_12.path), 4) # document + chapter + block + paragraph
# pos1 and pos3 share chapter-level ancestor (same chapter, different blocks)
self.assertEqual(len(common_13.path), 2) # document + chapter
# pos1 and pos4 share only document-level ancestor (different chapters)
self.assertEqual(len(common_14.path), 1) # document only
if __name__ == '__main__':
unittest.main()