use font registry rather than make each time
All checks were successful
Python CI / test (push) Successful in 5m6s

This commit is contained in:
Duncan Tourolle 2025-06-21 11:38:53 +02:00
parent ff5f840646
commit edac4de5b4
5 changed files with 673 additions and 21 deletions

View File

@ -6,6 +6,7 @@ import urllib.request
import urllib.parse
from PIL import Image as PILImage
from .inline import Word, FormattedSpan
from ..style import Font, FontWeight, FontStyle, TextDecoration
class BlockType(Enum):
@ -72,6 +73,7 @@ class Paragraph(Block):
self._words: List[Word] = []
self._spans: List[FormattedSpan] = []
self._style = style
self._fonts: Dict[str, Font] = {} # Local font registry
@classmethod
def create_and_add_to(cls, container, style=None) -> 'Paragraph':
@ -190,9 +192,89 @@ class Paragraph(Block):
return len(self._words)
def __len__(self):
return self.word_count
def get_or_create_font(self,
font_path: Optional[str] = None,
font_size: int = 16,
colour: Tuple[int, int, int] = (0, 0, 0),
weight: FontWeight = FontWeight.NORMAL,
style: FontStyle = FontStyle.NORMAL,
decoration: TextDecoration = TextDecoration.NONE,
background: Optional[Tuple[int, int, int, int]] = None,
language: str = "en_EN",
min_hyphenation_width: Optional[int] = None) -> Font:
"""
Get or create a font with the specified properties. Cascades to parent if available.
Args:
font_path: Path to the font file (.ttf, .otf). If None, uses default font.
font_size: Size of the font in points.
colour: RGB color tuple for the text.
weight: Font weight (normal or bold).
style: Font style (normal or italic).
decoration: Text decoration (none, underline, or strikethrough).
background: RGBA background color for the text. If None, transparent background.
language: Language code for hyphenation and text processing.
min_hyphenation_width: Minimum width in pixels required for hyphenation.
Returns:
Font object (either existing or newly created)
"""
# If we have a parent with font management, delegate to parent
if self._parent and hasattr(self._parent, 'get_or_create_font'):
return self._parent.get_or_create_font(
font_path=font_path,
font_size=font_size,
colour=colour,
weight=weight,
style=style,
decoration=decoration,
background=background,
language=language,
min_hyphenation_width=min_hyphenation_width
)
# Otherwise manage our own fonts
# Create a unique key for this font configuration
bg_tuple = background if background else (255, 255, 255, 0)
min_hyph_width = min_hyphenation_width if min_hyphenation_width is not None else font_size * 4
font_key = (
font_path,
font_size,
colour,
weight.value if isinstance(weight, FontWeight) else weight,
style.value if isinstance(style, FontStyle) else style,
decoration.value if isinstance(decoration, TextDecoration) else decoration,
bg_tuple,
language,
min_hyph_width
)
# Convert tuple to string for dictionary key
key_str = str(font_key)
# Check if we already have this font
if key_str in self._fonts:
return self._fonts[key_str]
# Create new font and store it
new_font = Font(
font_path=font_path,
font_size=font_size,
colour=colour,
weight=weight,
style=style,
decoration=decoration,
background=background,
language=language,
min_hyphenation_width=min_hyphenation_width
)
self._fonts[key_str] = new_font
return new_font
class HeadingLevel(Enum):
"""Enumeration representing HTML heading levels (h1-h6)"""

View File

@ -4,6 +4,7 @@ from enum import Enum
from .block import Block, BlockType, Heading, HeadingLevel, Paragraph
from .functional import Link, Button, Form
from .inline import Word, FormattedSpan
from ..style import Font, FontWeight, FontStyle, TextDecoration
class MetadataType(Enum):
@ -43,6 +44,7 @@ class Document:
self._stylesheets: List[Dict[str, Any]] = [] # CSS stylesheets
self._scripts: List[str] = [] # JavaScript code
self._default_style = default_style
self._fonts: Dict[str, Font] = {} # Font registry for reusing font objects
# Set basic metadata
if title:
@ -303,6 +305,73 @@ class Document:
return toc
def get_or_create_font(self,
font_path: Optional[str] = None,
font_size: int = 16,
colour: Tuple[int, int, int] = (0, 0, 0),
weight: FontWeight = FontWeight.NORMAL,
style: FontStyle = FontStyle.NORMAL,
decoration: TextDecoration = TextDecoration.NONE,
background: Optional[Tuple[int, int, int, int]] = None,
language: str = "en_EN",
min_hyphenation_width: Optional[int] = None) -> Font:
"""
Get or create a font with the specified properties. Reuses existing fonts
when possible to avoid creating duplicate font objects.
Args:
font_path: Path to the font file (.ttf, .otf). If None, uses default font.
font_size: Size of the font in points.
colour: RGB color tuple for the text.
weight: Font weight (normal or bold).
style: Font style (normal or italic).
decoration: Text decoration (none, underline, or strikethrough).
background: RGBA background color for the text. If None, transparent background.
language: Language code for hyphenation and text processing.
min_hyphenation_width: Minimum width in pixels required for hyphenation.
Returns:
Font object (either existing or newly created)
"""
# Create a unique key for this font configuration
bg_tuple = background if background else (255, 255, 255, 0)
min_hyph_width = min_hyphenation_width if min_hyphenation_width is not None else font_size * 4
font_key = (
font_path,
font_size,
colour,
weight.value if isinstance(weight, FontWeight) else weight,
style.value if isinstance(style, FontStyle) else style,
decoration.value if isinstance(decoration, TextDecoration) else decoration,
bg_tuple,
language,
min_hyph_width
)
# Convert tuple to string for dictionary key
key_str = str(font_key)
# Check if we already have this font
if key_str in self._fonts:
return self._fonts[key_str]
# Create new font and store it
new_font = Font(
font_path=font_path,
font_size=font_size,
colour=colour,
weight=weight,
style=style,
decoration=decoration,
background=background,
language=language,
min_hyphenation_width=min_hyphenation_width
)
self._fonts[key_str] = new_font
return new_font
class Chapter:
"""
@ -310,7 +379,7 @@ class Chapter:
A chapter contains a sequence of blocks and has metadata.
"""
def __init__(self, title: Optional[str] = None, level: int = 1, style=None):
def __init__(self, title: Optional[str] = None, level: int = 1, style=None, parent=None):
"""
Initialize a new chapter.
@ -318,12 +387,15 @@ class Chapter:
title: The chapter title
level: The chapter level (1 = top level, 2 = subsection, etc.)
style: Optional default style for child blocks
parent: Parent container (e.g., Document or Book)
"""
self._title = title
self._level = level
self._blocks: List[Block] = []
self._metadata: Dict[str, Any] = {}
self._style = style
self._parent = parent
self._fonts: Dict[str, Font] = {} # Local font registry
@property
def title(self) -> Optional[str]:
@ -419,6 +491,87 @@ class Chapter:
"""
return self._metadata.get(key)
def get_or_create_font(self,
font_path: Optional[str] = None,
font_size: int = 16,
colour: Tuple[int, int, int] = (0, 0, 0),
weight: FontWeight = FontWeight.NORMAL,
style: FontStyle = FontStyle.NORMAL,
decoration: TextDecoration = TextDecoration.NONE,
background: Optional[Tuple[int, int, int, int]] = None,
language: str = "en_EN",
min_hyphenation_width: Optional[int] = None) -> Font:
"""
Get or create a font with the specified properties. Cascades to parent if available.
Args:
font_path: Path to the font file (.ttf, .otf). If None, uses default font.
font_size: Size of the font in points.
colour: RGB color tuple for the text.
weight: Font weight (normal or bold).
style: Font style (normal or italic).
decoration: Text decoration (none, underline, or strikethrough).
background: RGBA background color for the text. If None, transparent background.
language: Language code for hyphenation and text processing.
min_hyphenation_width: Minimum width in pixels required for hyphenation.
Returns:
Font object (either existing or newly created)
"""
# If we have a parent with font management, delegate to parent
if self._parent and hasattr(self._parent, 'get_or_create_font'):
return self._parent.get_or_create_font(
font_path=font_path,
font_size=font_size,
colour=colour,
weight=weight,
style=style,
decoration=decoration,
background=background,
language=language,
min_hyphenation_width=min_hyphenation_width
)
# Otherwise manage our own fonts
# Create a unique key for this font configuration
bg_tuple = background if background else (255, 255, 255, 0)
min_hyph_width = min_hyphenation_width if min_hyphenation_width is not None else font_size * 4
font_key = (
font_path,
font_size,
colour,
weight.value if isinstance(weight, FontWeight) else weight,
style.value if isinstance(style, FontStyle) else style,
decoration.value if isinstance(decoration, TextDecoration) else decoration,
bg_tuple,
language,
min_hyph_width
)
# Convert tuple to string for dictionary key
key_str = str(font_key)
# Check if we already have this font
if key_str in self._fonts:
return self._fonts[key_str]
# Create new font and store it
new_font = Font(
font_path=font_path,
font_size=font_size,
colour=colour,
weight=weight,
style=style,
decoration=decoration,
background=background,
language=language,
min_hyphenation_width=min_hyphenation_width
)
self._fonts[key_str] = new_font
return new_font
class Book(Document):
"""

View File

@ -41,6 +41,7 @@ class StyleContext(NamedTuple):
css_styles: Dict[str, str]
element_attributes: Dict[str, Any]
parent_elements: List[str] # Stack of parent element names
document: Optional[Any] # Reference to document for font registry
def with_font(self, font: Font) -> "StyleContext":
"""Create new context with modified font."""
@ -69,12 +70,13 @@ class StyleContext(NamedTuple):
return self._replace(parent_elements=self.parent_elements + [element_name])
def create_base_context(base_font: Optional[Font] = None) -> StyleContext:
def create_base_context(base_font: Optional[Font] = None, document=None) -> StyleContext:
"""
Create a base style context with default values.
Args:
base_font: Base font to use, defaults to system default
document: Document instance for font registry
Returns:
StyleContext with default values
@ -86,6 +88,7 @@ def create_base_context(base_font: Optional[Font] = None) -> StyleContext:
css_styles={},
element_attributes={},
parent_elements=[],
document=document,
)
@ -125,7 +128,7 @@ def apply_element_styling(context: StyleContext, element: Tag) -> StyleContext:
new_context = new_context.with_css_styles(css_styles)
# Apply element-specific default styles
font = apply_element_font_styles(new_context.font, tag_name, css_styles)
font = apply_element_font_styles(new_context.font, tag_name, css_styles, new_context)
new_context = new_context.with_font(font)
# Apply background from styles
@ -154,18 +157,20 @@ def parse_inline_styles(style_text: str) -> Dict[str, str]:
def apply_element_font_styles(
font: Font, tag_name: str, css_styles: Dict[str, str]
font: Font, tag_name: str, css_styles: Dict[str, str], context: Optional[StyleContext] = None
) -> Font:
"""
Apply font styling based on HTML element and CSS styles.
Uses document's font registry when available to avoid creating duplicate fonts.
Args:
font: Current font
tag_name: HTML tag name
css_styles: CSS styles dictionary
context: Style context with document reference for font registry
Returns:
New Font object with applied styling
Font object with applied styling (either existing or newly created)
"""
# Default element styles
element_font_styles = {
@ -192,6 +197,7 @@ def apply_element_font_styles(
decoration = font.decoration
background = font.background
language = font.language
font_path = font._font_path
# Apply element default styles
if tag_name in element_font_styles:
@ -264,8 +270,23 @@ def apply_element_font_styles(
except ValueError:
pass
# Use document's font registry if available to avoid creating duplicate fonts
if context and context.document and hasattr(context.document, 'get_or_create_font'):
return context.document.get_or_create_font(
font_path=font_path,
font_size=font_size,
colour=colour,
weight=weight,
style=style,
decoration=decoration,
background=background,
language=language,
min_hyphenation_width=font.min_hyphenation_width
)
else:
# Fallback to creating new font if no document context
return Font(
font_path=font._font_path,
font_path=font_path,
font_size=font_size,
colour=colour,
weight=weight,
@ -725,7 +746,7 @@ HANDLERS: Dict[str, Callable[[Tag, StyleContext], Union[Block, List[Block], None
def parse_html_string(
html_string: str, base_font: Optional[Font] = None
html_string: str, base_font: Optional[Font] = None, document=None
) -> List[Block]:
"""
Parse HTML string and return list of Block objects.
@ -733,12 +754,13 @@ def parse_html_string(
Args:
html_string: HTML content to parse
base_font: Base font for styling, defaults to system default
document: Document instance for font registry to avoid duplicate fonts
Returns:
List of Block objects representing the document structure
"""
soup = BeautifulSoup(html_string, "html.parser")
context = create_base_context(base_font)
context = create_base_context(base_font, document)
blocks = []
# Process the body if it exists, otherwise process all top-level elements

View File

@ -9,7 +9,7 @@ import unittest
from pyWebLayout.abstract.document import Document, Chapter, Book, MetadataType
from pyWebLayout.abstract.block import Paragraph, Heading, HeadingLevel, BlockType
from pyWebLayout.abstract.inline import Word, FormattedSpan
from pyWebLayout.style import Font
from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration
class TestMetadataType(unittest.TestCase):
@ -464,5 +464,224 @@ class TestBook(unittest.TestCase):
self.assertEqual(self.book.get_anchor("preface"), heading)
class TestDocumentFontRegistry(unittest.TestCase):
"""Test cases for Document font registry functionality."""
def setUp(self):
"""Set up test fixtures."""
self.doc = Document("Test Document", "en-US")
def test_get_or_create_font_creates_new_font(self):
"""Test that get_or_create_font creates a new font when none exists."""
font = self.doc.get_or_create_font(
font_size=14,
colour=(255, 0, 0),
weight=FontWeight.BOLD
)
self.assertEqual(font.font_size, 14)
self.assertEqual(font.colour, (255, 0, 0))
self.assertEqual(font.weight, FontWeight.BOLD)
# Check that font is stored in registry
self.assertEqual(len(self.doc._fonts), 1)
def test_get_or_create_font_reuses_existing_font(self):
"""Test that get_or_create_font reuses existing fonts."""
# Create first font
font1 = self.doc.get_or_create_font(
font_size=14,
colour=(255, 0, 0),
weight=FontWeight.BOLD
)
# Create second font with same properties
font2 = self.doc.get_or_create_font(
font_size=14,
colour=(255, 0, 0),
weight=FontWeight.BOLD
)
# Should return the same font object
self.assertIs(font1, font2)
# Should only have one font in registry
self.assertEqual(len(self.doc._fonts), 1)
def test_get_or_create_font_creates_different_fonts(self):
"""Test that different font properties create different fonts."""
# Create first font
font1 = self.doc.get_or_create_font(
font_size=14,
colour=(255, 0, 0),
weight=FontWeight.BOLD
)
# Create font with different size
font2 = self.doc.get_or_create_font(
font_size=16,
colour=(255, 0, 0),
weight=FontWeight.BOLD
)
# Create font with different color
font3 = self.doc.get_or_create_font(
font_size=14,
colour=(0, 255, 0),
weight=FontWeight.BOLD
)
# Create font with different weight
font4 = self.doc.get_or_create_font(
font_size=14,
colour=(255, 0, 0),
weight=FontWeight.NORMAL
)
# All should be different objects
self.assertIsNot(font1, font2)
self.assertIsNot(font1, font3)
self.assertIsNot(font1, font4)
self.assertIsNot(font2, font3)
self.assertIsNot(font2, font4)
self.assertIsNot(font3, font4)
# Should have four fonts in registry
self.assertEqual(len(self.doc._fonts), 4)
def test_get_or_create_font_with_all_parameters(self):
"""Test get_or_create_font with all parameters."""
font = self.doc.get_or_create_font(
font_path="path/to/font.ttf",
font_size=18,
colour=(128, 64, 192),
weight=FontWeight.BOLD,
style=FontStyle.ITALIC,
decoration=TextDecoration.UNDERLINE,
background=(255, 255, 255, 128),
language="fr_FR",
min_hyphenation_width=80
)
self.assertEqual(font._font_path, "path/to/font.ttf")
self.assertEqual(font.font_size, 18)
self.assertEqual(font.colour, (128, 64, 192))
self.assertEqual(font.weight, FontWeight.BOLD)
self.assertEqual(font.style, FontStyle.ITALIC)
self.assertEqual(font.decoration, TextDecoration.UNDERLINE)
self.assertEqual(font.background, (255, 255, 255, 128))
self.assertEqual(font.language, "fr_FR")
self.assertEqual(font.min_hyphenation_width, 80)
def test_get_or_create_font_with_defaults(self):
"""Test get_or_create_font with default values."""
font = self.doc.get_or_create_font()
# Should create font with default values
self.assertIsNotNone(font)
self.assertEqual(font.font_size, 16) # Default font size
self.assertEqual(font.colour, (0, 0, 0)) # Default black color
self.assertEqual(font.weight, FontWeight.NORMAL)
self.assertEqual(font.style, FontStyle.NORMAL)
self.assertEqual(font.decoration, TextDecoration.NONE)
class TestChapterFontRegistry(unittest.TestCase):
"""Test cases for Chapter font registry functionality."""
def setUp(self):
"""Set up test fixtures."""
self.doc = Document("Test Document", "en-US")
self.chapter = Chapter("Test Chapter", 1, parent=self.doc)
def test_chapter_uses_parent_font_registry(self):
"""Test that chapter uses parent document's font registry."""
# Create font through chapter - should delegate to parent
font1 = self.chapter.get_or_create_font(
font_size=14,
colour=(255, 0, 0),
weight=FontWeight.BOLD
)
# Create same font through document - should return same object
font2 = self.doc.get_or_create_font(
font_size=14,
colour=(255, 0, 0),
weight=FontWeight.BOLD
)
# Should be the same font object
self.assertIs(font1, font2)
# Should be stored in document's registry, not chapter's
self.assertEqual(len(self.doc._fonts), 1)
self.assertEqual(len(self.chapter._fonts), 0)
def test_chapter_without_parent_manages_own_fonts(self):
"""Test that chapter without parent manages its own fonts."""
# Create chapter without parent
standalone_chapter = Chapter("Standalone Chapter", 1)
# Create font through chapter
font1 = standalone_chapter.get_or_create_font(
font_size=14,
colour=(255, 0, 0),
weight=FontWeight.BOLD
)
# Create same font again - should reuse
font2 = standalone_chapter.get_or_create_font(
font_size=14,
colour=(255, 0, 0),
weight=FontWeight.BOLD
)
# Should be the same font object
self.assertIs(font1, font2)
# Should be stored in chapter's own registry
self.assertEqual(len(standalone_chapter._fonts), 1)
def test_chapter_parent_assignment(self):
"""Test that chapter parent assignment works correctly."""
# Create chapter with parent
chapter_with_parent = Chapter("Chapter with Parent", 1, parent=self.doc)
self.assertEqual(chapter_with_parent._parent, self.doc)
# Create chapter without parent
chapter_without_parent = Chapter("Chapter without Parent", 1)
self.assertIsNone(chapter_without_parent._parent)
class TestBookFontRegistry(unittest.TestCase):
"""Test cases for Book font registry functionality."""
def setUp(self):
"""Set up test fixtures."""
self.book = Book("Test Book", "Author Name", "en-US")
def test_book_inherits_document_font_registry(self):
"""Test that Book inherits Document's font registry functionality."""
# Create font through book
font1 = self.book.get_or_create_font(
font_size=14,
colour=(255, 0, 0),
weight=FontWeight.BOLD
)
# Create same font again - should reuse
font2 = self.book.get_or_create_font(
font_size=14,
colour=(255, 0, 0),
weight=FontWeight.BOLD
)
# Should be the same font object
self.assertIs(font1, font2)
# Should have one font in registry
self.assertEqual(len(self.book._fonts), 1)
if __name__ == '__main__':
unittest.main()

View File

@ -8,7 +8,8 @@ including styled content within paragraphs and block-level elements.
import unittest
from pyWebLayout.io.readers.html_extraction import parse_html_string
from pyWebLayout.abstract.block import Paragraph, Heading, HeadingLevel, Quote, CodeBlock, HList, ListStyle, Table
from pyWebLayout.style import FontWeight, FontStyle, TextDecoration
from pyWebLayout.abstract.document import Document
from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration
class TestHTMLParagraph(unittest.TestCase):
@ -380,5 +381,180 @@ class TestHTMLComplexStructures(unittest.TestCase):
self.assertIsInstance(blocks[0], Table)
class TestHTMLFontRegistryIntegration(unittest.TestCase):
"""Test cases for font registry integration with HTML extraction."""
def setUp(self):
"""Set up test fixtures."""
self.doc = Document("Test Document", "en-US")
self.base_font = Font(font_size=16, colour=(0, 0, 0))
def test_font_registry_creates_fonts(self):
"""Test that HTML parsing with document context creates fonts in registry."""
html_content = """
<div>
<p>This is <strong>bold text</strong> and <em>italic text</em>.</p>
<h1>Main Header</h1>
</div>
"""
# Initially empty font registry
initial_font_count = len(self.doc._fonts)
# Parse HTML with document context
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
# Should have created fonts for different styles
final_font_count = len(self.doc._fonts)
self.assertGreater(final_font_count, initial_font_count,
"Should have created fonts in registry")
# Should have created blocks
self.assertGreater(len(blocks), 0, "Should have created blocks")
def test_font_registry_reuses_fonts(self):
"""Test that parsing same content reuses existing fonts."""
html_content = """
<div>
<p>This is <strong>bold text</strong> and <em>italic text</em>.</p>
<h1>Main Header</h1>
</div>
"""
# First parse
blocks1 = parse_html_string(html_content, self.base_font, document=self.doc)
first_parse_font_count = len(self.doc._fonts)
# Second parse with same content
blocks2 = parse_html_string(html_content, self.base_font, document=self.doc)
second_parse_font_count = len(self.doc._fonts)
# Font count should not increase on second parse
self.assertEqual(first_parse_font_count, second_parse_font_count,
"Should reuse existing fonts instead of creating new ones")
# Both parses should create same number of blocks
self.assertEqual(len(blocks1), len(blocks2),
"Should create same structure on both parses")
def test_font_registry_different_styles_create_different_fonts(self):
"""Test that different styles create different font objects."""
# Create fonts with different properties
font1 = self.doc.get_or_create_font(
font_size=14, colour=(255, 0, 0), weight=FontWeight.BOLD
)
font2 = self.doc.get_or_create_font(
font_size=16, colour=(255, 0, 0), weight=FontWeight.BOLD
)
font3 = self.doc.get_or_create_font(
font_size=14, colour=(0, 255, 0), weight=FontWeight.BOLD
)
# Should be different objects
self.assertIsNot(font1, font2, "Different sizes should create different fonts")
self.assertIsNot(font1, font3, "Different colors should create different fonts")
self.assertIsNot(font2, font3, "All fonts should be different")
# Should have 3 fonts in registry
self.assertEqual(len(self.doc._fonts), 3)
def test_font_registry_integration_with_html_styles(self):
"""Test that HTML parsing uses font registry for styled content."""
html_content = """
<p>Normal text with <strong>bold</strong> and <em>italic</em> and
<span style="color: red;">red text</span>.</p>
"""
# Parse content
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
# Extract all words from the paragraph
paragraph = blocks[0]
words = list(paragraph.words())
# Find words with different styles
normal_words = [w for _, w in words if w.style.weight == FontWeight.NORMAL
and w.style.style == FontStyle.NORMAL]
bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD]
italic_words = [w for _, w in words if w.style.style == FontStyle.ITALIC]
red_words = [w for _, w in words if w.style.colour == (255, 0, 0)]
# Should have words with different styles
self.assertGreater(len(normal_words), 0, "Should have normal words")
self.assertGreater(len(bold_words), 0, "Should have bold words")
self.assertGreater(len(italic_words), 0, "Should have italic words")
self.assertGreater(len(red_words), 0, "Should have red words")
# Font registry should contain multiple fonts for different styles
self.assertGreater(len(self.doc._fonts), 1,
"Should have multiple fonts for different styles")
def test_font_registry_without_document_context(self):
"""Test that parsing without document context works (fallback behavior)."""
html_content = "<p>This is <strong>bold text</strong>.</p>"
# Parse without document context
blocks = parse_html_string(html_content, self.base_font)
# Should still create blocks successfully
self.assertEqual(len(blocks), 1)
self.assertIsInstance(blocks[0], Paragraph)
# Should not affect document's font registry
self.assertEqual(len(self.doc._fonts), 0,
"Document font registry should remain empty")
def test_complex_html_font_reuse(self):
"""Test font reuse with complex HTML containing repeated styles."""
html_content = """
<div>
<h1>First Header</h1>
<p>Paragraph with <strong>bold</strong> text.</p>
<h1>Second Header</h1>
<p>Another paragraph with <strong>bold</strong> text.</p>
</div>
"""
# Parse content
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
font_count_after_parse = len(self.doc._fonts)
# Parse same content again
blocks2 = parse_html_string(html_content, self.base_font, document=self.doc)
font_count_after_second_parse = len(self.doc._fonts)
# Font count should not increase on second parse
self.assertEqual(font_count_after_parse, font_count_after_second_parse,
"Fonts should be reused for repeated styles")
# Both should create same structure
self.assertEqual(len(blocks), len(blocks2))
def test_font_registry_with_nested_styles(self):
"""Test font registry with nested HTML styles."""
html_content = """
<p>Text with <strong>bold and <em>bold italic</em> nested</strong> styles.</p>
"""
# Parse content
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
# Should create fonts for different style combinations
paragraph = blocks[0]
words = list(paragraph.words())
# Find words that are both bold and italic
bold_italic_words = [w for _, w in words
if w.style.weight == FontWeight.BOLD
and w.style.style == FontStyle.ITALIC]
self.assertGreater(len(bold_italic_words), 0,
"Should have words with combined bold+italic style")
# Should have multiple fonts in registry for different combinations
self.assertGreater(len(self.doc._fonts), 1,
"Should create separate fonts for style combinations")
if __name__ == '__main__':
unittest.main()