From edac4de5b4c26e3a7280bc38f324e00474f3ff28 Mon Sep 17 00:00:00 2001 From: Duncan Tourolle Date: Sat, 21 Jun 2025 11:38:53 +0200 Subject: [PATCH] use font registry rather than make each time --- pyWebLayout/abstract/block.py | 84 +++++++- pyWebLayout/abstract/document.py | 157 ++++++++++++++- pyWebLayout/io/readers/html_extraction.py | 54 ++++-- tests/test_abstract_document.py | 221 +++++++++++++++++++++- tests/test_html_extraction.py | 178 ++++++++++++++++- 5 files changed, 673 insertions(+), 21 deletions(-) diff --git a/pyWebLayout/abstract/block.py b/pyWebLayout/abstract/block.py index 2032bbe..796ca91 100644 --- a/pyWebLayout/abstract/block.py +++ b/pyWebLayout/abstract/block.py @@ -6,6 +6,7 @@ import urllib.request import urllib.parse from PIL import Image as PILImage from .inline import Word, FormattedSpan +from ..style import Font, FontWeight, FontStyle, TextDecoration class BlockType(Enum): @@ -72,6 +73,7 @@ class Paragraph(Block): self._words: List[Word] = [] self._spans: List[FormattedSpan] = [] self._style = style + self._fonts: Dict[str, Font] = {} # Local font registry @classmethod def create_and_add_to(cls, container, style=None) -> 'Paragraph': @@ -190,8 +192,88 @@ class Paragraph(Block): return len(self._words) def __len__(self): - return self.word_count + + def get_or_create_font(self, + font_path: Optional[str] = None, + font_size: int = 16, + colour: Tuple[int, int, int] = (0, 0, 0), + weight: FontWeight = FontWeight.NORMAL, + style: FontStyle = FontStyle.NORMAL, + decoration: TextDecoration = TextDecoration.NONE, + background: Optional[Tuple[int, int, int, int]] = None, + language: str = "en_EN", + min_hyphenation_width: Optional[int] = None) -> Font: + """ + Get or create a font with the specified properties. Cascades to parent if available. + + Args: + font_path: Path to the font file (.ttf, .otf). If None, uses default font. + font_size: Size of the font in points. + colour: RGB color tuple for the text. + weight: Font weight (normal or bold). + style: Font style (normal or italic). + decoration: Text decoration (none, underline, or strikethrough). + background: RGBA background color for the text. If None, transparent background. + language: Language code for hyphenation and text processing. + min_hyphenation_width: Minimum width in pixels required for hyphenation. + + Returns: + Font object (either existing or newly created) + """ + # If we have a parent with font management, delegate to parent + if self._parent and hasattr(self._parent, 'get_or_create_font'): + return self._parent.get_or_create_font( + font_path=font_path, + font_size=font_size, + colour=colour, + weight=weight, + style=style, + decoration=decoration, + background=background, + language=language, + min_hyphenation_width=min_hyphenation_width + ) + + # Otherwise manage our own fonts + # Create a unique key for this font configuration + bg_tuple = background if background else (255, 255, 255, 0) + min_hyph_width = min_hyphenation_width if min_hyphenation_width is not None else font_size * 4 + + font_key = ( + font_path, + font_size, + colour, + weight.value if isinstance(weight, FontWeight) else weight, + style.value if isinstance(style, FontStyle) else style, + decoration.value if isinstance(decoration, TextDecoration) else decoration, + bg_tuple, + language, + min_hyph_width + ) + + # Convert tuple to string for dictionary key + key_str = str(font_key) + + # Check if we already have this font + if key_str in self._fonts: + return self._fonts[key_str] + + # Create new font and store it + new_font = Font( + font_path=font_path, + font_size=font_size, + colour=colour, + weight=weight, + style=style, + decoration=decoration, + background=background, + language=language, + min_hyphenation_width=min_hyphenation_width + ) + + self._fonts[key_str] = new_font + return new_font class HeadingLevel(Enum): diff --git a/pyWebLayout/abstract/document.py b/pyWebLayout/abstract/document.py index 6576ae7..0ff9636 100644 --- a/pyWebLayout/abstract/document.py +++ b/pyWebLayout/abstract/document.py @@ -4,6 +4,7 @@ from enum import Enum from .block import Block, BlockType, Heading, HeadingLevel, Paragraph from .functional import Link, Button, Form from .inline import Word, FormattedSpan +from ..style import Font, FontWeight, FontStyle, TextDecoration class MetadataType(Enum): @@ -43,7 +44,8 @@ class Document: self._stylesheets: List[Dict[str, Any]] = [] # CSS stylesheets self._scripts: List[str] = [] # JavaScript code self._default_style = default_style - + self._fonts: Dict[str, Font] = {} # Font registry for reusing font objects + # Set basic metadata if title: self.set_metadata(MetadataType.TITLE, title) @@ -302,6 +304,73 @@ class Document: toc.append((level, title, heading)) return toc + + def get_or_create_font(self, + font_path: Optional[str] = None, + font_size: int = 16, + colour: Tuple[int, int, int] = (0, 0, 0), + weight: FontWeight = FontWeight.NORMAL, + style: FontStyle = FontStyle.NORMAL, + decoration: TextDecoration = TextDecoration.NONE, + background: Optional[Tuple[int, int, int, int]] = None, + language: str = "en_EN", + min_hyphenation_width: Optional[int] = None) -> Font: + """ + Get or create a font with the specified properties. Reuses existing fonts + when possible to avoid creating duplicate font objects. + + Args: + font_path: Path to the font file (.ttf, .otf). If None, uses default font. + font_size: Size of the font in points. + colour: RGB color tuple for the text. + weight: Font weight (normal or bold). + style: Font style (normal or italic). + decoration: Text decoration (none, underline, or strikethrough). + background: RGBA background color for the text. If None, transparent background. + language: Language code for hyphenation and text processing. + min_hyphenation_width: Minimum width in pixels required for hyphenation. + + Returns: + Font object (either existing or newly created) + """ + # Create a unique key for this font configuration + bg_tuple = background if background else (255, 255, 255, 0) + min_hyph_width = min_hyphenation_width if min_hyphenation_width is not None else font_size * 4 + + font_key = ( + font_path, + font_size, + colour, + weight.value if isinstance(weight, FontWeight) else weight, + style.value if isinstance(style, FontStyle) else style, + decoration.value if isinstance(decoration, TextDecoration) else decoration, + bg_tuple, + language, + min_hyph_width + ) + + # Convert tuple to string for dictionary key + key_str = str(font_key) + + # Check if we already have this font + if key_str in self._fonts: + return self._fonts[key_str] + + # Create new font and store it + new_font = Font( + font_path=font_path, + font_size=font_size, + colour=colour, + weight=weight, + style=style, + decoration=decoration, + background=background, + language=language, + min_hyphenation_width=min_hyphenation_width + ) + + self._fonts[key_str] = new_font + return new_font class Chapter: @@ -310,7 +379,7 @@ class Chapter: A chapter contains a sequence of blocks and has metadata. """ - def __init__(self, title: Optional[str] = None, level: int = 1, style=None): + def __init__(self, title: Optional[str] = None, level: int = 1, style=None, parent=None): """ Initialize a new chapter. @@ -318,12 +387,15 @@ class Chapter: title: The chapter title level: The chapter level (1 = top level, 2 = subsection, etc.) style: Optional default style for child blocks + parent: Parent container (e.g., Document or Book) """ self._title = title self._level = level self._blocks: List[Block] = [] self._metadata: Dict[str, Any] = {} self._style = style + self._parent = parent + self._fonts: Dict[str, Font] = {} # Local font registry @property def title(self) -> Optional[str]: @@ -418,6 +490,87 @@ class Chapter: The metadata value, or None if not set """ return self._metadata.get(key) + + def get_or_create_font(self, + font_path: Optional[str] = None, + font_size: int = 16, + colour: Tuple[int, int, int] = (0, 0, 0), + weight: FontWeight = FontWeight.NORMAL, + style: FontStyle = FontStyle.NORMAL, + decoration: TextDecoration = TextDecoration.NONE, + background: Optional[Tuple[int, int, int, int]] = None, + language: str = "en_EN", + min_hyphenation_width: Optional[int] = None) -> Font: + """ + Get or create a font with the specified properties. Cascades to parent if available. + + Args: + font_path: Path to the font file (.ttf, .otf). If None, uses default font. + font_size: Size of the font in points. + colour: RGB color tuple for the text. + weight: Font weight (normal or bold). + style: Font style (normal or italic). + decoration: Text decoration (none, underline, or strikethrough). + background: RGBA background color for the text. If None, transparent background. + language: Language code for hyphenation and text processing. + min_hyphenation_width: Minimum width in pixels required for hyphenation. + + Returns: + Font object (either existing or newly created) + """ + # If we have a parent with font management, delegate to parent + if self._parent and hasattr(self._parent, 'get_or_create_font'): + return self._parent.get_or_create_font( + font_path=font_path, + font_size=font_size, + colour=colour, + weight=weight, + style=style, + decoration=decoration, + background=background, + language=language, + min_hyphenation_width=min_hyphenation_width + ) + + # Otherwise manage our own fonts + # Create a unique key for this font configuration + bg_tuple = background if background else (255, 255, 255, 0) + min_hyph_width = min_hyphenation_width if min_hyphenation_width is not None else font_size * 4 + + font_key = ( + font_path, + font_size, + colour, + weight.value if isinstance(weight, FontWeight) else weight, + style.value if isinstance(style, FontStyle) else style, + decoration.value if isinstance(decoration, TextDecoration) else decoration, + bg_tuple, + language, + min_hyph_width + ) + + # Convert tuple to string for dictionary key + key_str = str(font_key) + + # Check if we already have this font + if key_str in self._fonts: + return self._fonts[key_str] + + # Create new font and store it + new_font = Font( + font_path=font_path, + font_size=font_size, + colour=colour, + weight=weight, + style=style, + decoration=decoration, + background=background, + language=language, + min_hyphenation_width=min_hyphenation_width + ) + + self._fonts[key_str] = new_font + return new_font class Book(Document): diff --git a/pyWebLayout/io/readers/html_extraction.py b/pyWebLayout/io/readers/html_extraction.py index 04d9064..fb64e1b 100644 --- a/pyWebLayout/io/readers/html_extraction.py +++ b/pyWebLayout/io/readers/html_extraction.py @@ -41,6 +41,7 @@ class StyleContext(NamedTuple): css_styles: Dict[str, str] element_attributes: Dict[str, Any] parent_elements: List[str] # Stack of parent element names + document: Optional[Any] # Reference to document for font registry def with_font(self, font: Font) -> "StyleContext": """Create new context with modified font.""" @@ -69,12 +70,13 @@ class StyleContext(NamedTuple): return self._replace(parent_elements=self.parent_elements + [element_name]) -def create_base_context(base_font: Optional[Font] = None) -> StyleContext: +def create_base_context(base_font: Optional[Font] = None, document=None) -> StyleContext: """ Create a base style context with default values. Args: base_font: Base font to use, defaults to system default + document: Document instance for font registry Returns: StyleContext with default values @@ -86,6 +88,7 @@ def create_base_context(base_font: Optional[Font] = None) -> StyleContext: css_styles={}, element_attributes={}, parent_elements=[], + document=document, ) @@ -125,7 +128,7 @@ def apply_element_styling(context: StyleContext, element: Tag) -> StyleContext: new_context = new_context.with_css_styles(css_styles) # Apply element-specific default styles - font = apply_element_font_styles(new_context.font, tag_name, css_styles) + font = apply_element_font_styles(new_context.font, tag_name, css_styles, new_context) new_context = new_context.with_font(font) # Apply background from styles @@ -154,18 +157,20 @@ def parse_inline_styles(style_text: str) -> Dict[str, str]: def apply_element_font_styles( - font: Font, tag_name: str, css_styles: Dict[str, str] + font: Font, tag_name: str, css_styles: Dict[str, str], context: Optional[StyleContext] = None ) -> Font: """ Apply font styling based on HTML element and CSS styles. + Uses document's font registry when available to avoid creating duplicate fonts. Args: font: Current font tag_name: HTML tag name css_styles: CSS styles dictionary + context: Style context with document reference for font registry Returns: - New Font object with applied styling + Font object with applied styling (either existing or newly created) """ # Default element styles element_font_styles = { @@ -192,6 +197,7 @@ def apply_element_font_styles( decoration = font.decoration background = font.background language = font.language + font_path = font._font_path # Apply element default styles if tag_name in element_font_styles: @@ -264,16 +270,31 @@ def apply_element_font_styles( except ValueError: pass - return Font( - font_path=font._font_path, - font_size=font_size, - colour=colour, - weight=weight, - style=style, - decoration=decoration, - background=background, - language=language, - ) + # Use document's font registry if available to avoid creating duplicate fonts + if context and context.document and hasattr(context.document, 'get_or_create_font'): + return context.document.get_or_create_font( + font_path=font_path, + font_size=font_size, + colour=colour, + weight=weight, + style=style, + decoration=decoration, + background=background, + language=language, + min_hyphenation_width=font.min_hyphenation_width + ) + else: + # Fallback to creating new font if no document context + return Font( + font_path=font_path, + font_size=font_size, + colour=colour, + weight=weight, + style=style, + decoration=decoration, + background=background, + language=language, + ) def apply_background_styles( @@ -725,7 +746,7 @@ HANDLERS: Dict[str, Callable[[Tag, StyleContext], Union[Block, List[Block], None def parse_html_string( - html_string: str, base_font: Optional[Font] = None + html_string: str, base_font: Optional[Font] = None, document=None ) -> List[Block]: """ Parse HTML string and return list of Block objects. @@ -733,12 +754,13 @@ def parse_html_string( Args: html_string: HTML content to parse base_font: Base font for styling, defaults to system default + document: Document instance for font registry to avoid duplicate fonts Returns: List of Block objects representing the document structure """ soup = BeautifulSoup(html_string, "html.parser") - context = create_base_context(base_font) + context = create_base_context(base_font, document) blocks = [] # Process the body if it exists, otherwise process all top-level elements diff --git a/tests/test_abstract_document.py b/tests/test_abstract_document.py index e379901..fadcb04 100644 --- a/tests/test_abstract_document.py +++ b/tests/test_abstract_document.py @@ -9,7 +9,7 @@ import unittest from pyWebLayout.abstract.document import Document, Chapter, Book, MetadataType from pyWebLayout.abstract.block import Paragraph, Heading, HeadingLevel, BlockType from pyWebLayout.abstract.inline import Word, FormattedSpan -from pyWebLayout.style import Font +from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration class TestMetadataType(unittest.TestCase): @@ -464,5 +464,224 @@ class TestBook(unittest.TestCase): self.assertEqual(self.book.get_anchor("preface"), heading) +class TestDocumentFontRegistry(unittest.TestCase): + """Test cases for Document font registry functionality.""" + + def setUp(self): + """Set up test fixtures.""" + self.doc = Document("Test Document", "en-US") + + def test_get_or_create_font_creates_new_font(self): + """Test that get_or_create_font creates a new font when none exists.""" + font = self.doc.get_or_create_font( + font_size=14, + colour=(255, 0, 0), + weight=FontWeight.BOLD + ) + + self.assertEqual(font.font_size, 14) + self.assertEqual(font.colour, (255, 0, 0)) + self.assertEqual(font.weight, FontWeight.BOLD) + + # Check that font is stored in registry + self.assertEqual(len(self.doc._fonts), 1) + + def test_get_or_create_font_reuses_existing_font(self): + """Test that get_or_create_font reuses existing fonts.""" + # Create first font + font1 = self.doc.get_or_create_font( + font_size=14, + colour=(255, 0, 0), + weight=FontWeight.BOLD + ) + + # Create second font with same properties + font2 = self.doc.get_or_create_font( + font_size=14, + colour=(255, 0, 0), + weight=FontWeight.BOLD + ) + + # Should return the same font object + self.assertIs(font1, font2) + + # Should only have one font in registry + self.assertEqual(len(self.doc._fonts), 1) + + def test_get_or_create_font_creates_different_fonts(self): + """Test that different font properties create different fonts.""" + # Create first font + font1 = self.doc.get_or_create_font( + font_size=14, + colour=(255, 0, 0), + weight=FontWeight.BOLD + ) + + # Create font with different size + font2 = self.doc.get_or_create_font( + font_size=16, + colour=(255, 0, 0), + weight=FontWeight.BOLD + ) + + # Create font with different color + font3 = self.doc.get_or_create_font( + font_size=14, + colour=(0, 255, 0), + weight=FontWeight.BOLD + ) + + # Create font with different weight + font4 = self.doc.get_or_create_font( + font_size=14, + colour=(255, 0, 0), + weight=FontWeight.NORMAL + ) + + # All should be different objects + self.assertIsNot(font1, font2) + self.assertIsNot(font1, font3) + self.assertIsNot(font1, font4) + self.assertIsNot(font2, font3) + self.assertIsNot(font2, font4) + self.assertIsNot(font3, font4) + + # Should have four fonts in registry + self.assertEqual(len(self.doc._fonts), 4) + + def test_get_or_create_font_with_all_parameters(self): + """Test get_or_create_font with all parameters.""" + font = self.doc.get_or_create_font( + font_path="path/to/font.ttf", + font_size=18, + colour=(128, 64, 192), + weight=FontWeight.BOLD, + style=FontStyle.ITALIC, + decoration=TextDecoration.UNDERLINE, + background=(255, 255, 255, 128), + language="fr_FR", + min_hyphenation_width=80 + ) + + self.assertEqual(font._font_path, "path/to/font.ttf") + self.assertEqual(font.font_size, 18) + self.assertEqual(font.colour, (128, 64, 192)) + self.assertEqual(font.weight, FontWeight.BOLD) + self.assertEqual(font.style, FontStyle.ITALIC) + self.assertEqual(font.decoration, TextDecoration.UNDERLINE) + self.assertEqual(font.background, (255, 255, 255, 128)) + self.assertEqual(font.language, "fr_FR") + self.assertEqual(font.min_hyphenation_width, 80) + + def test_get_or_create_font_with_defaults(self): + """Test get_or_create_font with default values.""" + font = self.doc.get_or_create_font() + + # Should create font with default values + self.assertIsNotNone(font) + self.assertEqual(font.font_size, 16) # Default font size + self.assertEqual(font.colour, (0, 0, 0)) # Default black color + self.assertEqual(font.weight, FontWeight.NORMAL) + self.assertEqual(font.style, FontStyle.NORMAL) + self.assertEqual(font.decoration, TextDecoration.NONE) + + +class TestChapterFontRegistry(unittest.TestCase): + """Test cases for Chapter font registry functionality.""" + + def setUp(self): + """Set up test fixtures.""" + self.doc = Document("Test Document", "en-US") + self.chapter = Chapter("Test Chapter", 1, parent=self.doc) + + def test_chapter_uses_parent_font_registry(self): + """Test that chapter uses parent document's font registry.""" + # Create font through chapter - should delegate to parent + font1 = self.chapter.get_or_create_font( + font_size=14, + colour=(255, 0, 0), + weight=FontWeight.BOLD + ) + + # Create same font through document - should return same object + font2 = self.doc.get_or_create_font( + font_size=14, + colour=(255, 0, 0), + weight=FontWeight.BOLD + ) + + # Should be the same font object + self.assertIs(font1, font2) + + # Should be stored in document's registry, not chapter's + self.assertEqual(len(self.doc._fonts), 1) + self.assertEqual(len(self.chapter._fonts), 0) + + def test_chapter_without_parent_manages_own_fonts(self): + """Test that chapter without parent manages its own fonts.""" + # Create chapter without parent + standalone_chapter = Chapter("Standalone Chapter", 1) + + # Create font through chapter + font1 = standalone_chapter.get_or_create_font( + font_size=14, + colour=(255, 0, 0), + weight=FontWeight.BOLD + ) + + # Create same font again - should reuse + font2 = standalone_chapter.get_or_create_font( + font_size=14, + colour=(255, 0, 0), + weight=FontWeight.BOLD + ) + + # Should be the same font object + self.assertIs(font1, font2) + + # Should be stored in chapter's own registry + self.assertEqual(len(standalone_chapter._fonts), 1) + + def test_chapter_parent_assignment(self): + """Test that chapter parent assignment works correctly.""" + # Create chapter with parent + chapter_with_parent = Chapter("Chapter with Parent", 1, parent=self.doc) + self.assertEqual(chapter_with_parent._parent, self.doc) + + # Create chapter without parent + chapter_without_parent = Chapter("Chapter without Parent", 1) + self.assertIsNone(chapter_without_parent._parent) + + +class TestBookFontRegistry(unittest.TestCase): + """Test cases for Book font registry functionality.""" + + def setUp(self): + """Set up test fixtures.""" + self.book = Book("Test Book", "Author Name", "en-US") + + def test_book_inherits_document_font_registry(self): + """Test that Book inherits Document's font registry functionality.""" + # Create font through book + font1 = self.book.get_or_create_font( + font_size=14, + colour=(255, 0, 0), + weight=FontWeight.BOLD + ) + + # Create same font again - should reuse + font2 = self.book.get_or_create_font( + font_size=14, + colour=(255, 0, 0), + weight=FontWeight.BOLD + ) + + # Should be the same font object + self.assertIs(font1, font2) + + # Should have one font in registry + self.assertEqual(len(self.book._fonts), 1) + + if __name__ == '__main__': unittest.main() diff --git a/tests/test_html_extraction.py b/tests/test_html_extraction.py index 0cbf1cb..7b4aa13 100644 --- a/tests/test_html_extraction.py +++ b/tests/test_html_extraction.py @@ -8,7 +8,8 @@ including styled content within paragraphs and block-level elements. import unittest from pyWebLayout.io.readers.html_extraction import parse_html_string from pyWebLayout.abstract.block import Paragraph, Heading, HeadingLevel, Quote, CodeBlock, HList, ListStyle, Table -from pyWebLayout.style import FontWeight, FontStyle, TextDecoration +from pyWebLayout.abstract.document import Document +from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration class TestHTMLParagraph(unittest.TestCase): @@ -380,5 +381,180 @@ class TestHTMLComplexStructures(unittest.TestCase): self.assertIsInstance(blocks[0], Table) +class TestHTMLFontRegistryIntegration(unittest.TestCase): + """Test cases for font registry integration with HTML extraction.""" + + def setUp(self): + """Set up test fixtures.""" + self.doc = Document("Test Document", "en-US") + self.base_font = Font(font_size=16, colour=(0, 0, 0)) + + def test_font_registry_creates_fonts(self): + """Test that HTML parsing with document context creates fonts in registry.""" + html_content = """ +
+

This is bold text and italic text.

+

Main Header

+
+ """ + + # Initially empty font registry + initial_font_count = len(self.doc._fonts) + + # Parse HTML with document context + blocks = parse_html_string(html_content, self.base_font, document=self.doc) + + # Should have created fonts for different styles + final_font_count = len(self.doc._fonts) + self.assertGreater(final_font_count, initial_font_count, + "Should have created fonts in registry") + + # Should have created blocks + self.assertGreater(len(blocks), 0, "Should have created blocks") + + def test_font_registry_reuses_fonts(self): + """Test that parsing same content reuses existing fonts.""" + html_content = """ +
+

This is bold text and italic text.

+

Main Header

+
+ """ + + # First parse + blocks1 = parse_html_string(html_content, self.base_font, document=self.doc) + first_parse_font_count = len(self.doc._fonts) + + # Second parse with same content + blocks2 = parse_html_string(html_content, self.base_font, document=self.doc) + second_parse_font_count = len(self.doc._fonts) + + # Font count should not increase on second parse + self.assertEqual(first_parse_font_count, second_parse_font_count, + "Should reuse existing fonts instead of creating new ones") + + # Both parses should create same number of blocks + self.assertEqual(len(blocks1), len(blocks2), + "Should create same structure on both parses") + + def test_font_registry_different_styles_create_different_fonts(self): + """Test that different styles create different font objects.""" + # Create fonts with different properties + font1 = self.doc.get_or_create_font( + font_size=14, colour=(255, 0, 0), weight=FontWeight.BOLD + ) + font2 = self.doc.get_or_create_font( + font_size=16, colour=(255, 0, 0), weight=FontWeight.BOLD + ) + font3 = self.doc.get_or_create_font( + font_size=14, colour=(0, 255, 0), weight=FontWeight.BOLD + ) + + # Should be different objects + self.assertIsNot(font1, font2, "Different sizes should create different fonts") + self.assertIsNot(font1, font3, "Different colors should create different fonts") + self.assertIsNot(font2, font3, "All fonts should be different") + + # Should have 3 fonts in registry + self.assertEqual(len(self.doc._fonts), 3) + + def test_font_registry_integration_with_html_styles(self): + """Test that HTML parsing uses font registry for styled content.""" + html_content = """ +

Normal text with bold and italic and + red text.

+ """ + + # Parse content + blocks = parse_html_string(html_content, self.base_font, document=self.doc) + + # Extract all words from the paragraph + paragraph = blocks[0] + words = list(paragraph.words()) + + # Find words with different styles + normal_words = [w for _, w in words if w.style.weight == FontWeight.NORMAL + and w.style.style == FontStyle.NORMAL] + bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD] + italic_words = [w for _, w in words if w.style.style == FontStyle.ITALIC] + red_words = [w for _, w in words if w.style.colour == (255, 0, 0)] + + # Should have words with different styles + self.assertGreater(len(normal_words), 0, "Should have normal words") + self.assertGreater(len(bold_words), 0, "Should have bold words") + self.assertGreater(len(italic_words), 0, "Should have italic words") + self.assertGreater(len(red_words), 0, "Should have red words") + + # Font registry should contain multiple fonts for different styles + self.assertGreater(len(self.doc._fonts), 1, + "Should have multiple fonts for different styles") + + def test_font_registry_without_document_context(self): + """Test that parsing without document context works (fallback behavior).""" + html_content = "

This is bold text.

" + + # Parse without document context + blocks = parse_html_string(html_content, self.base_font) + + # Should still create blocks successfully + self.assertEqual(len(blocks), 1) + self.assertIsInstance(blocks[0], Paragraph) + + # Should not affect document's font registry + self.assertEqual(len(self.doc._fonts), 0, + "Document font registry should remain empty") + + def test_complex_html_font_reuse(self): + """Test font reuse with complex HTML containing repeated styles.""" + html_content = """ +
+

First Header

+

Paragraph with bold text.

+

Second Header

+

Another paragraph with bold text.

+
+ """ + + # Parse content + blocks = parse_html_string(html_content, self.base_font, document=self.doc) + font_count_after_parse = len(self.doc._fonts) + + # Parse same content again + blocks2 = parse_html_string(html_content, self.base_font, document=self.doc) + font_count_after_second_parse = len(self.doc._fonts) + + # Font count should not increase on second parse + self.assertEqual(font_count_after_parse, font_count_after_second_parse, + "Fonts should be reused for repeated styles") + + # Both should create same structure + self.assertEqual(len(blocks), len(blocks2)) + + def test_font_registry_with_nested_styles(self): + """Test font registry with nested HTML styles.""" + html_content = """ +

Text with bold and bold italic nested styles.

+ """ + + # Parse content + blocks = parse_html_string(html_content, self.base_font, document=self.doc) + + # Should create fonts for different style combinations + paragraph = blocks[0] + words = list(paragraph.words()) + + # Find words that are both bold and italic + bold_italic_words = [w for _, w in words + if w.style.weight == FontWeight.BOLD + and w.style.style == FontStyle.ITALIC] + + self.assertGreater(len(bold_italic_words), 0, + "Should have words with combined bold+italic style") + + # Should have multiple fonts in registry for different combinations + self.assertGreater(len(self.doc._fonts), 1, + "Should create separate fonts for style combinations") + + if __name__ == '__main__': unittest.main()