""" Tests for pyWebLayout.io.readers.base module. Tests the base reader classes and their functionality. """ import pytest from pyWebLayout.io.readers.base import ( BaseReader, MetadataReader, StructureReader, ContentReader, ResourceReader, CompositeReader ) from pyWebLayout.abstract.document import Document # Concrete implementations for testing class ConcreteBaseReader(BaseReader): """Test implementation of BaseReader.""" def can_read(self, source): return isinstance(source, str) and source.endswith('.test') def read(self, source, **options): doc = Document() doc.set_metadata('source', source) return doc class ConcreteMetadataReader(MetadataReader): """Test implementation of MetadataReader.""" def extract_metadata(self, source, document): metadata = { 'title': 'Test Title', 'author': 'Test Author' } document.set_metadata('title', metadata['title']) document.set_metadata('author', metadata['author']) return metadata class ConcreteStructureReader(StructureReader): """Test implementation of StructureReader.""" def extract_structure(self, source, document): return ['heading1', 'heading2'] class ConcreteContentReader(ContentReader): """Test implementation of ContentReader.""" def extract_content(self, source, document): return "Test content" class ConcreteResourceReader(ResourceReader): """Test implementation of ResourceReader.""" def extract_resources(self, source, document): resources = { 'image1.png': b'fake image data', 'style.css': 'fake css' } for name, data in resources.items(): document.add_resource(name, data) return resources class ConcreteCompositeReader(CompositeReader): """Test implementation of CompositeReader.""" def can_read(self, source): return True # Test Cases class TestBaseReaderOptions: """Test BaseReader options functionality.""" def test_set_and_get_option(self): """Test setting and getting options.""" reader = ConcreteBaseReader() reader.set_option('font_size', 12) assert reader.get_option('font_size') == 12 def test_get_option_with_default(self): """Test getting option with default value.""" reader = ConcreteBaseReader() assert reader.get_option('nonexistent', 'default_value') == 'default_value' def test_get_option_without_default(self): """Test getting nonexistent option without default.""" reader = ConcreteBaseReader() assert reader.get_option('nonexistent') is None def test_multiple_options(self): """Test setting multiple options.""" reader = ConcreteBaseReader() reader.set_option('font_size', 12) reader.set_option('line_height', 1.5) reader.set_option('color', 'black') assert reader.get_option('font_size') == 12 assert reader.get_option('line_height') == 1.5 assert reader.get_option('color') == 'black' class TestBaseReaderConcrete: """Test concrete BaseReader implementation.""" def test_can_read_valid_source(self): """Test can_read with valid source.""" reader = ConcreteBaseReader() assert reader.can_read('document.test') is True def test_can_read_invalid_source(self): """Test can_read with invalid source.""" reader = ConcreteBaseReader() assert reader.can_read('document.html') is False def test_read_creates_document(self): """Test read creates a Document.""" reader = ConcreteBaseReader() doc = reader.read('test.test') assert isinstance(doc, Document) assert doc.get_metadata('source') == 'test.test' class TestMetadataReaderConcrete: """Test concrete MetadataReader implementation.""" def test_extract_metadata(self): """Test metadata extraction.""" reader = ConcreteMetadataReader() doc = Document() metadata = reader.extract_metadata('source', doc) assert metadata['title'] == 'Test Title' assert metadata['author'] == 'Test Author' assert doc.get_metadata('title') == 'Test Title' assert doc.get_metadata('author') == 'Test Author' class TestStructureReaderConcrete: """Test concrete StructureReader implementation.""" def test_extract_structure(self): """Test structure extraction.""" reader = ConcreteStructureReader() doc = Document() structure = reader.extract_structure('source', doc) assert isinstance(structure, list) assert len(structure) == 2 assert structure[0] == 'heading1' assert structure[1] == 'heading2' class TestContentReaderConcrete: """Test concrete ContentReader implementation.""" def test_extract_content(self): """Test content extraction.""" reader = ConcreteContentReader() doc = Document() content = reader.extract_content('source', doc) assert content == "Test content" class TestResourceReaderConcrete: """Test concrete ResourceReader implementation.""" def test_extract_resources(self): """Test resource extraction.""" reader = ConcreteResourceReader() doc = Document() resources = reader.extract_resources('source', doc) assert isinstance(resources, dict) assert 'image1.png' in resources assert 'style.css' in resources assert doc.get_resource('image1.png') == b'fake image data' assert doc.get_resource('style.css') == 'fake css' class TestCompositeReader: """Test CompositeReader functionality.""" def test_initialization(self): """Test composite reader initialization.""" reader = ConcreteCompositeReader() assert reader._metadata_reader is None assert reader._structure_reader is None assert reader._content_reader is None assert reader._resource_reader is None def test_set_metadata_reader(self): """Test setting metadata reader.""" reader = ConcreteCompositeReader() metadata_reader = ConcreteMetadataReader() reader.set_metadata_reader(metadata_reader) assert reader._metadata_reader is metadata_reader def test_set_structure_reader(self): """Test setting structure reader.""" reader = ConcreteCompositeReader() structure_reader = ConcreteStructureReader() reader.set_structure_reader(structure_reader) assert reader._structure_reader is structure_reader def test_set_content_reader(self): """Test setting content reader.""" reader = ConcreteCompositeReader() content_reader = ConcreteContentReader() reader.set_content_reader(content_reader) assert reader._content_reader is content_reader def test_set_resource_reader(self): """Test setting resource reader.""" reader = ConcreteCompositeReader() resource_reader = ConcreteResourceReader() reader.set_resource_reader(resource_reader) assert reader._resource_reader is resource_reader def test_read_with_all_readers(self): """Test reading with all readers configured.""" reader = ConcreteCompositeReader() reader.set_metadata_reader(ConcreteMetadataReader()) reader.set_structure_reader(ConcreteStructureReader()) reader.set_content_reader(ConcreteContentReader()) reader.set_resource_reader(ConcreteResourceReader()) doc = reader.read('test_source') # Verify metadata was extracted assert doc.get_metadata('title') == 'Test Title' assert doc.get_metadata('author') == 'Test Author' # Verify resources were extracted assert doc.get_resource('image1.png') == b'fake image data' assert doc.get_resource('style.css') == 'fake css' def test_read_with_no_readers(self): """Test reading with no readers configured.""" reader = ConcreteCompositeReader() doc = reader.read('test_source') # Should create an empty document assert isinstance(doc, Document) def test_read_with_only_metadata_reader(self): """Test reading with only metadata reader.""" reader = ConcreteCompositeReader() reader.set_metadata_reader(ConcreteMetadataReader()) doc = reader.read('test_source') assert doc.get_metadata('title') == 'Test Title' def test_read_with_options(self): """Test reading with options.""" reader = ConcreteCompositeReader() reader.set_metadata_reader(ConcreteMetadataReader()) doc = reader.read('test_source', font_size=14, encoding='utf-8') # Verify options were stored assert reader.get_option('font_size') == 14 assert reader.get_option('encoding') == 'utf-8' def test_can_read_implemented(self): """Test that can_read is implemented in ConcreteCompositeReader.""" reader = ConcreteCompositeReader() assert reader.can_read('test_source') is True class TestCompositeReaderIntegration: """Integration tests for CompositeReader.""" def test_full_document_reading_workflow(self): """Test complete document reading workflow.""" # Create and configure composite reader reader = ConcreteCompositeReader() reader.set_metadata_reader(ConcreteMetadataReader()) reader.set_structure_reader(ConcreteStructureReader()) reader.set_content_reader(ConcreteContentReader()) reader.set_resource_reader(ConcreteResourceReader()) # Read document with options doc = reader.read('complex_document.test', font_size=16, page_width=800) # Verify all components worked together assert doc.get_metadata('title') == 'Test Title' assert doc.get_metadata('author') == 'Test Author' assert doc.get_resource('image1.png') is not None assert reader.get_option('font_size') == 16 assert reader.get_option('page_width') == 800