This commit is contained in:
parent
84229ad4da
commit
1bd9fdb551
@ -1,12 +0,0 @@
|
|||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
# Add the parent directory to sys.path for direct execution
|
|
||||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
|
||||||
|
|
||||||
# Now import the example module
|
|
||||||
from pyWebLayout.example import save_examples
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print("Running PyWebLayout examples...")
|
|
||||||
save_examples()
|
|
||||||
@ -3,13 +3,7 @@ Input/Output module for pyWebLayout.
|
|||||||
|
|
||||||
This package provides functionality for reading and writing various file formats,
|
This package provides functionality for reading and writing various file formats,
|
||||||
including HTML, EPUB, and other document formats.
|
including HTML, EPUB, and other document formats.
|
||||||
|
|
||||||
The module uses a decomposed architecture with specialized readers for different
|
|
||||||
aspects of document parsing (metadata, content, resources), following the same
|
|
||||||
pattern as the abstract module.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Legacy readers (for backward compatibility)
|
# Readers
|
||||||
# Legacy functions provided by new HTML reader for backward compatibility
|
|
||||||
|
|
||||||
from pyWebLayout.io.readers.epub_reader import EPUBReader
|
from pyWebLayout.io.readers.epub_reader import EPUBReader
|
||||||
|
|||||||
@ -1,30 +1,17 @@
|
|||||||
"""
|
"""
|
||||||
Readers module for pyWebLayout.
|
Readers module for pyWebLayout.
|
||||||
|
|
||||||
This module provides specialized readers for different document formats
|
This module provides specialized readers for different document formats.
|
||||||
using a decomposed architecture pattern.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Base classes for the decomposed architecture
|
|
||||||
from .base import BaseReader, MetadataReader, ContentReader, ResourceReader, CompositeReader
|
|
||||||
|
|
||||||
# HTML readers (decomposed)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# EPUB readers
|
# EPUB readers
|
||||||
from .epub_reader import read_epub # Legacy
|
from .epub_reader import read_epub # Legacy
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
# Base classes
|
|
||||||
'BaseReader', 'MetadataReader', 'ContentReader', 'ResourceReader', 'CompositeReader',
|
|
||||||
|
|
||||||
# HTML readers
|
# HTML readers
|
||||||
'HTMLReader', 'read_html', 'read_html_file', 'parse_html_string',
|
'read_html', 'read_html_file', 'parse_html_string',
|
||||||
'HTMLMetadataReader', 'HTMLResourceReader',
|
|
||||||
|
|
||||||
# EPUB readers
|
# EPUB readers
|
||||||
'read_epub', 'EPUBMetadataReader',
|
'read_epub',
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1,229 +0,0 @@
|
|||||||
"""
|
|
||||||
Base classes for document readers in pyWebLayout.
|
|
||||||
|
|
||||||
This module provides the foundational classes that all readers inherit from,
|
|
||||||
similar to how the abstract module provides base classes for document elements.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
from typing import Any, Dict, List, Optional, Union
|
|
||||||
from pyWebLayout.abstract.document import Document
|
|
||||||
|
|
||||||
|
|
||||||
class BaseReader(ABC):
|
|
||||||
"""
|
|
||||||
Abstract base class for all document readers.
|
|
||||||
|
|
||||||
This class defines the common interface that all readers must implement.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
"""Initialize the base reader."""
|
|
||||||
self._document = None
|
|
||||||
self._options = {}
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def can_read(self, source: Union[str, bytes]) -> bool:
|
|
||||||
"""
|
|
||||||
Check if this reader can handle the given source.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
source: The source to check (file path, URL, or content)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if this reader can handle the source, False otherwise
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def read(self, source: Union[str, bytes], **options) -> Document:
|
|
||||||
"""
|
|
||||||
Read and parse the source into a Document.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
source: The source to read (file path, URL, or content)
|
|
||||||
**options: Additional options for reading
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The parsed Document
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def set_option(self, key: str, value: Any):
|
|
||||||
"""
|
|
||||||
Set a reader option.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
key: The option name
|
|
||||||
value: The option value
|
|
||||||
"""
|
|
||||||
self._options[key] = value
|
|
||||||
|
|
||||||
def get_option(self, key: str, default: Any = None) -> Any:
|
|
||||||
"""
|
|
||||||
Get a reader option.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
key: The option name
|
|
||||||
default: Default value if option is not set
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The option value or default
|
|
||||||
"""
|
|
||||||
return self._options.get(key, default)
|
|
||||||
|
|
||||||
|
|
||||||
class MetadataReader(ABC):
|
|
||||||
"""
|
|
||||||
Abstract base class for reading document metadata.
|
|
||||||
|
|
||||||
This class handles extraction of document metadata like title, author, etc.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def extract_metadata(self, source: Any, document: Document) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Extract metadata from the source.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
source: The source data
|
|
||||||
document: The document to populate with metadata
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary of extracted metadata
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class StructureReader(ABC):
|
|
||||||
"""
|
|
||||||
Abstract base class for reading document structure.
|
|
||||||
|
|
||||||
This class handles extraction of document structure like headings, sections, etc.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def extract_structure(self, source: Any, document: Document) -> List[Any]:
|
|
||||||
"""
|
|
||||||
Extract structure information from the source.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
source: The source data
|
|
||||||
document: The document to populate with structure
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of structural elements
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class ContentReader(ABC):
|
|
||||||
"""
|
|
||||||
Abstract base class for reading document content.
|
|
||||||
|
|
||||||
This class handles extraction of document content like text, formatting, etc.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def extract_content(self, source: Any, document: Document) -> Any:
|
|
||||||
"""
|
|
||||||
Extract content from the source.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
source: The source data
|
|
||||||
document: The document to populate with content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The extracted content
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class ResourceReader(ABC):
|
|
||||||
"""
|
|
||||||
Abstract base class for reading document resources.
|
|
||||||
|
|
||||||
This class handles extraction of document resources like images, stylesheets, etc.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def extract_resources(self, source: Any, document: Document) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Extract resources from the source.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
source: The source data
|
|
||||||
document: The document to populate with resources
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary of extracted resources
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class CompositeReader(BaseReader):
|
|
||||||
"""
|
|
||||||
A reader that combines multiple specialized readers.
|
|
||||||
|
|
||||||
This class uses composition to combine metadata, structure, content,
|
|
||||||
and resource readers into a complete document reader.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
"""Initialize the composite reader."""
|
|
||||||
super().__init__()
|
|
||||||
self._metadata_reader: Optional[MetadataReader] = None
|
|
||||||
self._structure_reader: Optional[StructureReader] = None
|
|
||||||
self._content_reader: Optional[ContentReader] = None
|
|
||||||
self._resource_reader: Optional[ResourceReader] = None
|
|
||||||
|
|
||||||
def set_metadata_reader(self, reader: MetadataReader):
|
|
||||||
"""Set the metadata reader."""
|
|
||||||
self._metadata_reader = reader
|
|
||||||
|
|
||||||
def set_structure_reader(self, reader: StructureReader):
|
|
||||||
"""Set the structure reader."""
|
|
||||||
self._structure_reader = reader
|
|
||||||
|
|
||||||
def set_content_reader(self, reader: ContentReader):
|
|
||||||
"""Set the content reader."""
|
|
||||||
self._content_reader = reader
|
|
||||||
|
|
||||||
def set_resource_reader(self, reader: ResourceReader):
|
|
||||||
"""Set the resource reader."""
|
|
||||||
self._resource_reader = reader
|
|
||||||
|
|
||||||
def read(self, source: Union[str, bytes], **options) -> Document:
|
|
||||||
"""
|
|
||||||
Read the source using all configured readers.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
source: The source to read
|
|
||||||
**options: Additional options for reading
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The parsed Document
|
|
||||||
"""
|
|
||||||
# Create a new document
|
|
||||||
document = Document()
|
|
||||||
|
|
||||||
# Store options
|
|
||||||
self._options.update(options)
|
|
||||||
|
|
||||||
# Extract metadata if reader is available
|
|
||||||
if self._metadata_reader:
|
|
||||||
self._metadata_reader.extract_metadata(source, document)
|
|
||||||
|
|
||||||
# Extract structure if reader is available
|
|
||||||
if self._structure_reader:
|
|
||||||
self._structure_reader.extract_structure(source, document)
|
|
||||||
|
|
||||||
# Extract content if reader is available
|
|
||||||
if self._content_reader:
|
|
||||||
self._content_reader.extract_content(source, document)
|
|
||||||
|
|
||||||
# Extract resources if reader is available
|
|
||||||
if self._resource_reader:
|
|
||||||
self._resource_reader.extract_resources(source, document)
|
|
||||||
|
|
||||||
return document
|
|
||||||
@ -1,5 +0,0 @@
|
|||||||
"""
|
|
||||||
Layout options for the pyWebLayout library.
|
|
||||||
|
|
||||||
This module provides layout-related functionality.
|
|
||||||
"""
|
|
||||||
@ -1,302 +0,0 @@
|
|||||||
"""
|
|
||||||
Tests for pyWebLayout.io.readers.base module.
|
|
||||||
|
|
||||||
Tests the base reader classes and their functionality.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from pyWebLayout.io.readers.base import (
|
|
||||||
BaseReader,
|
|
||||||
MetadataReader,
|
|
||||||
StructureReader,
|
|
||||||
ContentReader,
|
|
||||||
ResourceReader,
|
|
||||||
CompositeReader
|
|
||||||
)
|
|
||||||
from pyWebLayout.abstract.document import Document
|
|
||||||
|
|
||||||
|
|
||||||
# Concrete implementations for testing
|
|
||||||
|
|
||||||
class ConcreteBaseReader(BaseReader):
|
|
||||||
"""Test implementation of BaseReader."""
|
|
||||||
|
|
||||||
def can_read(self, source):
|
|
||||||
return isinstance(source, str) and source.endswith('.test')
|
|
||||||
|
|
||||||
def read(self, source, **options):
|
|
||||||
doc = Document()
|
|
||||||
doc.set_metadata('source', source)
|
|
||||||
return doc
|
|
||||||
|
|
||||||
|
|
||||||
class ConcreteMetadataReader(MetadataReader):
|
|
||||||
"""Test implementation of MetadataReader."""
|
|
||||||
|
|
||||||
def extract_metadata(self, source, document):
|
|
||||||
metadata = {
|
|
||||||
'title': 'Test Title',
|
|
||||||
'author': 'Test Author'
|
|
||||||
}
|
|
||||||
document.set_metadata('title', metadata['title'])
|
|
||||||
document.set_metadata('author', metadata['author'])
|
|
||||||
return metadata
|
|
||||||
|
|
||||||
|
|
||||||
class ConcreteStructureReader(StructureReader):
|
|
||||||
"""Test implementation of StructureReader."""
|
|
||||||
|
|
||||||
def extract_structure(self, source, document):
|
|
||||||
return ['heading1', 'heading2']
|
|
||||||
|
|
||||||
|
|
||||||
class ConcreteContentReader(ContentReader):
|
|
||||||
"""Test implementation of ContentReader."""
|
|
||||||
|
|
||||||
def extract_content(self, source, document):
|
|
||||||
return "Test content"
|
|
||||||
|
|
||||||
|
|
||||||
class ConcreteResourceReader(ResourceReader):
|
|
||||||
"""Test implementation of ResourceReader."""
|
|
||||||
|
|
||||||
def extract_resources(self, source, document):
|
|
||||||
resources = {
|
|
||||||
'image1.png': b'fake image data',
|
|
||||||
'style.css': 'fake css'
|
|
||||||
}
|
|
||||||
for name, data in resources.items():
|
|
||||||
document.add_resource(name, data)
|
|
||||||
return resources
|
|
||||||
|
|
||||||
|
|
||||||
class ConcreteCompositeReader(CompositeReader):
|
|
||||||
"""Test implementation of CompositeReader."""
|
|
||||||
|
|
||||||
def can_read(self, source):
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
# Test Cases
|
|
||||||
|
|
||||||
class TestBaseReaderOptions:
|
|
||||||
"""Test BaseReader options functionality."""
|
|
||||||
|
|
||||||
def test_set_and_get_option(self):
|
|
||||||
"""Test setting and getting options."""
|
|
||||||
reader = ConcreteBaseReader()
|
|
||||||
reader.set_option('font_size', 12)
|
|
||||||
assert reader.get_option('font_size') == 12
|
|
||||||
|
|
||||||
def test_get_option_with_default(self):
|
|
||||||
"""Test getting option with default value."""
|
|
||||||
reader = ConcreteBaseReader()
|
|
||||||
assert reader.get_option('nonexistent', 'default_value') == 'default_value'
|
|
||||||
|
|
||||||
def test_get_option_without_default(self):
|
|
||||||
"""Test getting nonexistent option without default."""
|
|
||||||
reader = ConcreteBaseReader()
|
|
||||||
assert reader.get_option('nonexistent') is None
|
|
||||||
|
|
||||||
def test_multiple_options(self):
|
|
||||||
"""Test setting multiple options."""
|
|
||||||
reader = ConcreteBaseReader()
|
|
||||||
reader.set_option('font_size', 12)
|
|
||||||
reader.set_option('line_height', 1.5)
|
|
||||||
reader.set_option('color', 'black')
|
|
||||||
|
|
||||||
assert reader.get_option('font_size') == 12
|
|
||||||
assert reader.get_option('line_height') == 1.5
|
|
||||||
assert reader.get_option('color') == 'black'
|
|
||||||
|
|
||||||
|
|
||||||
class TestBaseReaderConcrete:
|
|
||||||
"""Test concrete BaseReader implementation."""
|
|
||||||
|
|
||||||
def test_can_read_valid_source(self):
|
|
||||||
"""Test can_read with valid source."""
|
|
||||||
reader = ConcreteBaseReader()
|
|
||||||
assert reader.can_read('document.test') is True
|
|
||||||
|
|
||||||
def test_can_read_invalid_source(self):
|
|
||||||
"""Test can_read with invalid source."""
|
|
||||||
reader = ConcreteBaseReader()
|
|
||||||
assert reader.can_read('document.html') is False
|
|
||||||
|
|
||||||
def test_read_creates_document(self):
|
|
||||||
"""Test read creates a Document."""
|
|
||||||
reader = ConcreteBaseReader()
|
|
||||||
doc = reader.read('test.test')
|
|
||||||
assert isinstance(doc, Document)
|
|
||||||
assert doc.get_metadata('source') == 'test.test'
|
|
||||||
|
|
||||||
|
|
||||||
class TestMetadataReaderConcrete:
|
|
||||||
"""Test concrete MetadataReader implementation."""
|
|
||||||
|
|
||||||
def test_extract_metadata(self):
|
|
||||||
"""Test metadata extraction."""
|
|
||||||
reader = ConcreteMetadataReader()
|
|
||||||
doc = Document()
|
|
||||||
metadata = reader.extract_metadata('source', doc)
|
|
||||||
|
|
||||||
assert metadata['title'] == 'Test Title'
|
|
||||||
assert metadata['author'] == 'Test Author'
|
|
||||||
assert doc.get_metadata('title') == 'Test Title'
|
|
||||||
assert doc.get_metadata('author') == 'Test Author'
|
|
||||||
|
|
||||||
|
|
||||||
class TestStructureReaderConcrete:
|
|
||||||
"""Test concrete StructureReader implementation."""
|
|
||||||
|
|
||||||
def test_extract_structure(self):
|
|
||||||
"""Test structure extraction."""
|
|
||||||
reader = ConcreteStructureReader()
|
|
||||||
doc = Document()
|
|
||||||
structure = reader.extract_structure('source', doc)
|
|
||||||
|
|
||||||
assert isinstance(structure, list)
|
|
||||||
assert len(structure) == 2
|
|
||||||
assert structure[0] == 'heading1'
|
|
||||||
assert structure[1] == 'heading2'
|
|
||||||
|
|
||||||
|
|
||||||
class TestContentReaderConcrete:
|
|
||||||
"""Test concrete ContentReader implementation."""
|
|
||||||
|
|
||||||
def test_extract_content(self):
|
|
||||||
"""Test content extraction."""
|
|
||||||
reader = ConcreteContentReader()
|
|
||||||
doc = Document()
|
|
||||||
content = reader.extract_content('source', doc)
|
|
||||||
|
|
||||||
assert content == "Test content"
|
|
||||||
|
|
||||||
|
|
||||||
class TestResourceReaderConcrete:
|
|
||||||
"""Test concrete ResourceReader implementation."""
|
|
||||||
|
|
||||||
def test_extract_resources(self):
|
|
||||||
"""Test resource extraction."""
|
|
||||||
reader = ConcreteResourceReader()
|
|
||||||
doc = Document()
|
|
||||||
resources = reader.extract_resources('source', doc)
|
|
||||||
|
|
||||||
assert isinstance(resources, dict)
|
|
||||||
assert 'image1.png' in resources
|
|
||||||
assert 'style.css' in resources
|
|
||||||
assert doc.get_resource('image1.png') == b'fake image data'
|
|
||||||
assert doc.get_resource('style.css') == 'fake css'
|
|
||||||
|
|
||||||
|
|
||||||
class TestCompositeReader:
|
|
||||||
"""Test CompositeReader functionality."""
|
|
||||||
|
|
||||||
def test_initialization(self):
|
|
||||||
"""Test composite reader initialization."""
|
|
||||||
reader = ConcreteCompositeReader()
|
|
||||||
assert reader._metadata_reader is None
|
|
||||||
assert reader._structure_reader is None
|
|
||||||
assert reader._content_reader is None
|
|
||||||
assert reader._resource_reader is None
|
|
||||||
|
|
||||||
def test_set_metadata_reader(self):
|
|
||||||
"""Test setting metadata reader."""
|
|
||||||
reader = ConcreteCompositeReader()
|
|
||||||
metadata_reader = ConcreteMetadataReader()
|
|
||||||
reader.set_metadata_reader(metadata_reader)
|
|
||||||
assert reader._metadata_reader is metadata_reader
|
|
||||||
|
|
||||||
def test_set_structure_reader(self):
|
|
||||||
"""Test setting structure reader."""
|
|
||||||
reader = ConcreteCompositeReader()
|
|
||||||
structure_reader = ConcreteStructureReader()
|
|
||||||
reader.set_structure_reader(structure_reader)
|
|
||||||
assert reader._structure_reader is structure_reader
|
|
||||||
|
|
||||||
def test_set_content_reader(self):
|
|
||||||
"""Test setting content reader."""
|
|
||||||
reader = ConcreteCompositeReader()
|
|
||||||
content_reader = ConcreteContentReader()
|
|
||||||
reader.set_content_reader(content_reader)
|
|
||||||
assert reader._content_reader is content_reader
|
|
||||||
|
|
||||||
def test_set_resource_reader(self):
|
|
||||||
"""Test setting resource reader."""
|
|
||||||
reader = ConcreteCompositeReader()
|
|
||||||
resource_reader = ConcreteResourceReader()
|
|
||||||
reader.set_resource_reader(resource_reader)
|
|
||||||
assert reader._resource_reader is resource_reader
|
|
||||||
|
|
||||||
def test_read_with_all_readers(self):
|
|
||||||
"""Test reading with all readers configured."""
|
|
||||||
reader = ConcreteCompositeReader()
|
|
||||||
reader.set_metadata_reader(ConcreteMetadataReader())
|
|
||||||
reader.set_structure_reader(ConcreteStructureReader())
|
|
||||||
reader.set_content_reader(ConcreteContentReader())
|
|
||||||
reader.set_resource_reader(ConcreteResourceReader())
|
|
||||||
|
|
||||||
doc = reader.read('test_source')
|
|
||||||
|
|
||||||
# Verify metadata was extracted
|
|
||||||
assert doc.get_metadata('title') == 'Test Title'
|
|
||||||
assert doc.get_metadata('author') == 'Test Author'
|
|
||||||
|
|
||||||
# Verify resources were extracted
|
|
||||||
assert doc.get_resource('image1.png') == b'fake image data'
|
|
||||||
assert doc.get_resource('style.css') == 'fake css'
|
|
||||||
|
|
||||||
def test_read_with_no_readers(self):
|
|
||||||
"""Test reading with no readers configured."""
|
|
||||||
reader = ConcreteCompositeReader()
|
|
||||||
doc = reader.read('test_source')
|
|
||||||
|
|
||||||
# Should create an empty document
|
|
||||||
assert isinstance(doc, Document)
|
|
||||||
|
|
||||||
def test_read_with_only_metadata_reader(self):
|
|
||||||
"""Test reading with only metadata reader."""
|
|
||||||
reader = ConcreteCompositeReader()
|
|
||||||
reader.set_metadata_reader(ConcreteMetadataReader())
|
|
||||||
|
|
||||||
doc = reader.read('test_source')
|
|
||||||
assert doc.get_metadata('title') == 'Test Title'
|
|
||||||
|
|
||||||
def test_read_with_options(self):
|
|
||||||
"""Test reading with options."""
|
|
||||||
reader = ConcreteCompositeReader()
|
|
||||||
reader.set_metadata_reader(ConcreteMetadataReader())
|
|
||||||
|
|
||||||
doc = reader.read('test_source', font_size=14, encoding='utf-8')
|
|
||||||
|
|
||||||
# Verify options were stored
|
|
||||||
assert reader.get_option('font_size') == 14
|
|
||||||
assert reader.get_option('encoding') == 'utf-8'
|
|
||||||
|
|
||||||
def test_can_read_implemented(self):
|
|
||||||
"""Test that can_read is implemented in ConcreteCompositeReader."""
|
|
||||||
reader = ConcreteCompositeReader()
|
|
||||||
assert reader.can_read('test_source') is True
|
|
||||||
|
|
||||||
|
|
||||||
class TestCompositeReaderIntegration:
|
|
||||||
"""Integration tests for CompositeReader."""
|
|
||||||
|
|
||||||
def test_full_document_reading_workflow(self):
|
|
||||||
"""Test complete document reading workflow."""
|
|
||||||
# Create and configure composite reader
|
|
||||||
reader = ConcreteCompositeReader()
|
|
||||||
reader.set_metadata_reader(ConcreteMetadataReader())
|
|
||||||
reader.set_structure_reader(ConcreteStructureReader())
|
|
||||||
reader.set_content_reader(ConcreteContentReader())
|
|
||||||
reader.set_resource_reader(ConcreteResourceReader())
|
|
||||||
|
|
||||||
# Read document with options
|
|
||||||
doc = reader.read('complex_document.test', font_size=16, page_width=800)
|
|
||||||
|
|
||||||
# Verify all components worked together
|
|
||||||
assert doc.get_metadata('title') == 'Test Title'
|
|
||||||
assert doc.get_metadata('author') == 'Test Author'
|
|
||||||
assert doc.get_resource('image1.png') is not None
|
|
||||||
assert reader.get_option('font_size') == 16
|
|
||||||
assert reader.get_option('page_width') == 800
|
|
||||||
Loading…
x
Reference in New Issue
Block a user