pyWebLayout/tests/io_tests/test_base_reader.py
Duncan Tourolle 84229ad4da
All checks were successful
Python CI / test (push) Successful in 10m1s
update tests
2025-11-05 22:47:49 +01:00

303 lines
10 KiB
Python

"""
Tests for pyWebLayout.io.readers.base module.
Tests the base reader classes and their functionality.
"""
import pytest
from pyWebLayout.io.readers.base import (
BaseReader,
MetadataReader,
StructureReader,
ContentReader,
ResourceReader,
CompositeReader
)
from pyWebLayout.abstract.document import Document
# Concrete implementations for testing
class ConcreteBaseReader(BaseReader):
"""Test implementation of BaseReader."""
def can_read(self, source):
return isinstance(source, str) and source.endswith('.test')
def read(self, source, **options):
doc = Document()
doc.set_metadata('source', source)
return doc
class ConcreteMetadataReader(MetadataReader):
"""Test implementation of MetadataReader."""
def extract_metadata(self, source, document):
metadata = {
'title': 'Test Title',
'author': 'Test Author'
}
document.set_metadata('title', metadata['title'])
document.set_metadata('author', metadata['author'])
return metadata
class ConcreteStructureReader(StructureReader):
"""Test implementation of StructureReader."""
def extract_structure(self, source, document):
return ['heading1', 'heading2']
class ConcreteContentReader(ContentReader):
"""Test implementation of ContentReader."""
def extract_content(self, source, document):
return "Test content"
class ConcreteResourceReader(ResourceReader):
"""Test implementation of ResourceReader."""
def extract_resources(self, source, document):
resources = {
'image1.png': b'fake image data',
'style.css': 'fake css'
}
for name, data in resources.items():
document.add_resource(name, data)
return resources
class ConcreteCompositeReader(CompositeReader):
"""Test implementation of CompositeReader."""
def can_read(self, source):
return True
# Test Cases
class TestBaseReaderOptions:
"""Test BaseReader options functionality."""
def test_set_and_get_option(self):
"""Test setting and getting options."""
reader = ConcreteBaseReader()
reader.set_option('font_size', 12)
assert reader.get_option('font_size') == 12
def test_get_option_with_default(self):
"""Test getting option with default value."""
reader = ConcreteBaseReader()
assert reader.get_option('nonexistent', 'default_value') == 'default_value'
def test_get_option_without_default(self):
"""Test getting nonexistent option without default."""
reader = ConcreteBaseReader()
assert reader.get_option('nonexistent') is None
def test_multiple_options(self):
"""Test setting multiple options."""
reader = ConcreteBaseReader()
reader.set_option('font_size', 12)
reader.set_option('line_height', 1.5)
reader.set_option('color', 'black')
assert reader.get_option('font_size') == 12
assert reader.get_option('line_height') == 1.5
assert reader.get_option('color') == 'black'
class TestBaseReaderConcrete:
"""Test concrete BaseReader implementation."""
def test_can_read_valid_source(self):
"""Test can_read with valid source."""
reader = ConcreteBaseReader()
assert reader.can_read('document.test') is True
def test_can_read_invalid_source(self):
"""Test can_read with invalid source."""
reader = ConcreteBaseReader()
assert reader.can_read('document.html') is False
def test_read_creates_document(self):
"""Test read creates a Document."""
reader = ConcreteBaseReader()
doc = reader.read('test.test')
assert isinstance(doc, Document)
assert doc.get_metadata('source') == 'test.test'
class TestMetadataReaderConcrete:
"""Test concrete MetadataReader implementation."""
def test_extract_metadata(self):
"""Test metadata extraction."""
reader = ConcreteMetadataReader()
doc = Document()
metadata = reader.extract_metadata('source', doc)
assert metadata['title'] == 'Test Title'
assert metadata['author'] == 'Test Author'
assert doc.get_metadata('title') == 'Test Title'
assert doc.get_metadata('author') == 'Test Author'
class TestStructureReaderConcrete:
"""Test concrete StructureReader implementation."""
def test_extract_structure(self):
"""Test structure extraction."""
reader = ConcreteStructureReader()
doc = Document()
structure = reader.extract_structure('source', doc)
assert isinstance(structure, list)
assert len(structure) == 2
assert structure[0] == 'heading1'
assert structure[1] == 'heading2'
class TestContentReaderConcrete:
"""Test concrete ContentReader implementation."""
def test_extract_content(self):
"""Test content extraction."""
reader = ConcreteContentReader()
doc = Document()
content = reader.extract_content('source', doc)
assert content == "Test content"
class TestResourceReaderConcrete:
"""Test concrete ResourceReader implementation."""
def test_extract_resources(self):
"""Test resource extraction."""
reader = ConcreteResourceReader()
doc = Document()
resources = reader.extract_resources('source', doc)
assert isinstance(resources, dict)
assert 'image1.png' in resources
assert 'style.css' in resources
assert doc.get_resource('image1.png') == b'fake image data'
assert doc.get_resource('style.css') == 'fake css'
class TestCompositeReader:
"""Test CompositeReader functionality."""
def test_initialization(self):
"""Test composite reader initialization."""
reader = ConcreteCompositeReader()
assert reader._metadata_reader is None
assert reader._structure_reader is None
assert reader._content_reader is None
assert reader._resource_reader is None
def test_set_metadata_reader(self):
"""Test setting metadata reader."""
reader = ConcreteCompositeReader()
metadata_reader = ConcreteMetadataReader()
reader.set_metadata_reader(metadata_reader)
assert reader._metadata_reader is metadata_reader
def test_set_structure_reader(self):
"""Test setting structure reader."""
reader = ConcreteCompositeReader()
structure_reader = ConcreteStructureReader()
reader.set_structure_reader(structure_reader)
assert reader._structure_reader is structure_reader
def test_set_content_reader(self):
"""Test setting content reader."""
reader = ConcreteCompositeReader()
content_reader = ConcreteContentReader()
reader.set_content_reader(content_reader)
assert reader._content_reader is content_reader
def test_set_resource_reader(self):
"""Test setting resource reader."""
reader = ConcreteCompositeReader()
resource_reader = ConcreteResourceReader()
reader.set_resource_reader(resource_reader)
assert reader._resource_reader is resource_reader
def test_read_with_all_readers(self):
"""Test reading with all readers configured."""
reader = ConcreteCompositeReader()
reader.set_metadata_reader(ConcreteMetadataReader())
reader.set_structure_reader(ConcreteStructureReader())
reader.set_content_reader(ConcreteContentReader())
reader.set_resource_reader(ConcreteResourceReader())
doc = reader.read('test_source')
# Verify metadata was extracted
assert doc.get_metadata('title') == 'Test Title'
assert doc.get_metadata('author') == 'Test Author'
# Verify resources were extracted
assert doc.get_resource('image1.png') == b'fake image data'
assert doc.get_resource('style.css') == 'fake css'
def test_read_with_no_readers(self):
"""Test reading with no readers configured."""
reader = ConcreteCompositeReader()
doc = reader.read('test_source')
# Should create an empty document
assert isinstance(doc, Document)
def test_read_with_only_metadata_reader(self):
"""Test reading with only metadata reader."""
reader = ConcreteCompositeReader()
reader.set_metadata_reader(ConcreteMetadataReader())
doc = reader.read('test_source')
assert doc.get_metadata('title') == 'Test Title'
def test_read_with_options(self):
"""Test reading with options."""
reader = ConcreteCompositeReader()
reader.set_metadata_reader(ConcreteMetadataReader())
doc = reader.read('test_source', font_size=14, encoding='utf-8')
# Verify options were stored
assert reader.get_option('font_size') == 14
assert reader.get_option('encoding') == 'utf-8'
def test_can_read_implemented(self):
"""Test that can_read is implemented in ConcreteCompositeReader."""
reader = ConcreteCompositeReader()
assert reader.can_read('test_source') is True
class TestCompositeReaderIntegration:
"""Integration tests for CompositeReader."""
def test_full_document_reading_workflow(self):
"""Test complete document reading workflow."""
# Create and configure composite reader
reader = ConcreteCompositeReader()
reader.set_metadata_reader(ConcreteMetadataReader())
reader.set_structure_reader(ConcreteStructureReader())
reader.set_content_reader(ConcreteContentReader())
reader.set_resource_reader(ConcreteResourceReader())
# Read document with options
doc = reader.read('complex_document.test', font_size=16, page_width=800)
# Verify all components worked together
assert doc.get_metadata('title') == 'Test Title'
assert doc.get_metadata('author') == 'Test Author'
assert doc.get_resource('image1.png') is not None
assert reader.get_option('font_size') == 16
assert reader.get_option('page_width') == 800