565 lines
24 KiB
Python
565 lines
24 KiB
Python
"""
|
|
Unit tests for HTML extraction functionality.
|
|
|
|
Tests the HTML parsing and conversion to pyWebLayout abstract elements,
|
|
including styled content within paragraphs and block-level elements.
|
|
"""
|
|
|
|
import unittest
|
|
from pyWebLayout.io.readers.html_extraction import parse_html_string
|
|
from pyWebLayout.abstract.block import Paragraph, Heading, HeadingLevel, Quote, CodeBlock, HList, ListStyle, Table
|
|
from pyWebLayout.abstract.document import Document
|
|
from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration
|
|
|
|
|
|
class TestHTMLParagraph(unittest.TestCase):
|
|
"""Test cases for basic paragraph parsing."""
|
|
|
|
def test_simple(self):
|
|
text = "<p>This is a paragraph.</p>"
|
|
paragraphs = parse_html_string(text)
|
|
self.assertEqual(len(paragraphs), 1)
|
|
self.assertEqual(len(paragraphs[0]), 4)
|
|
|
|
for w1, t1 in zip(paragraphs[0].words(), "This is a paragraph.".split(" ")):
|
|
self.assertEqual(w1[1].text, t1)
|
|
|
|
def test_multiple(self):
|
|
text = "<p>This is a paragraph.</p><p>This is another paragraph.</p>"
|
|
paragraphs = parse_html_string(text)
|
|
self.assertEqual(len(paragraphs), 2)
|
|
self.assertEqual(len(paragraphs[0]), 4)
|
|
self.assertEqual(len(paragraphs[1]), 4)
|
|
|
|
for w1, t1 in zip(paragraphs[0].words(), "This is a paragraph.".split(" ")):
|
|
self.assertEqual(w1[1].text, t1)
|
|
|
|
for w1, t1 in zip(paragraphs[1].words(), "This is another paragraph.".split(" ")):
|
|
self.assertEqual(w1[1].text, t1)
|
|
|
|
|
|
class TestHTMLStyledParagraphs(unittest.TestCase):
|
|
"""Test cases for paragraphs with inline styling."""
|
|
|
|
def test_bold_text(self):
|
|
"""Test paragraphs with bold text using <strong> and <b> tags."""
|
|
text = "<p>This is <strong>bold text</strong> in a paragraph.</p>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], Paragraph)
|
|
|
|
words = list(blocks[0].words())
|
|
self.assertEqual(len(words), 7) # "This is bold text in a paragraph."
|
|
|
|
# Check that 'bold' and 'text' words have bold font weight
|
|
bold_word = words[2][1] # 'bold'
|
|
text_word = words[3][1] # 'text'
|
|
self.assertEqual(bold_word.text, "bold")
|
|
self.assertEqual(bold_word.style.weight, FontWeight.BOLD)
|
|
self.assertEqual(text_word.text, "text")
|
|
self.assertEqual(text_word.style.weight, FontWeight.BOLD)
|
|
|
|
# Check that other words are not bold
|
|
normal_word = words[0][1] # 'This'
|
|
self.assertEqual(normal_word.text, "This")
|
|
self.assertNotEqual(normal_word.style.weight, FontWeight.BOLD)
|
|
|
|
def test_italic_text(self):
|
|
"""Test paragraphs with italic text using <em> and <i> tags."""
|
|
text = "<p>This is <em>italic text</em> in a paragraph.</p>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], Paragraph)
|
|
|
|
words = list(blocks[0].words())
|
|
|
|
# Check that 'italic' and 'text' words have italic font style
|
|
italic_word = words[2][1] # 'italic'
|
|
text_word = words[3][1] # 'text'
|
|
self.assertEqual(italic_word.text, "italic")
|
|
self.assertEqual(italic_word.style.style, FontStyle.ITALIC)
|
|
self.assertEqual(text_word.text, "text")
|
|
self.assertEqual(text_word.style.style, FontStyle.ITALIC)
|
|
|
|
def test_underlined_text(self):
|
|
"""Test paragraphs with underlined text using <u> tag."""
|
|
text = "<p>This is <u>underlined text</u> here.</p>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
|
|
words = list(blocks[0].words())
|
|
underlined_word = words[2][1] # 'underlined'
|
|
self.assertEqual(underlined_word.style.decoration, TextDecoration.UNDERLINE)
|
|
|
|
def test_strikethrough_text(self):
|
|
"""Test paragraphs with strikethrough text using <s> and <del> tags."""
|
|
text = "<p>This is <s>strikethrough text</s> here.</p>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
|
|
words = list(blocks[0].words())
|
|
strike_word = words[2][1] # 'strikethrough'
|
|
self.assertEqual(strike_word.style.decoration, TextDecoration.STRIKETHROUGH)
|
|
|
|
def test_span_with_inline_styles(self):
|
|
"""Test paragraphs with span elements containing inline CSS styles."""
|
|
text = '<p>This text is normal, but <span style="color: red; font-weight: bold;">this part is red and bold</span>.</p>'
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], Paragraph)
|
|
|
|
words = list(blocks[0].words())
|
|
|
|
# Find the styled words
|
|
styled_words = []
|
|
for _, word in words:
|
|
if word.text in ["this", "part", "is", "red", "and", "bold"]:
|
|
if word.style.weight == FontWeight.BOLD:
|
|
styled_words.append(word)
|
|
|
|
self.assertGreater(len(styled_words), 0, "Should have bold words in styled span")
|
|
|
|
# Check that at least one word has the red color
|
|
red_words = [w for w in styled_words if w.style.colour == (255, 0, 0)]
|
|
self.assertGreater(len(red_words), 0, "Should have red colored words")
|
|
|
|
def test_mixed_formatting(self):
|
|
"""Test paragraphs with multiple formatting elements combined."""
|
|
text = "<p>This paragraph contains <strong>bold</strong>, <em>italic</em>, <span style=\"color: blue;\">blue</span>, and <mark>highlighted</mark> text all together.</p>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], Paragraph)
|
|
|
|
words = list(blocks[0].words())
|
|
|
|
# Check for bold word
|
|
bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD]
|
|
self.assertGreater(len(bold_words), 0, "Should have bold words")
|
|
|
|
# Check for italic word
|
|
italic_words = [w for _, w in words if w.style.style == FontStyle.ITALIC]
|
|
self.assertGreater(len(italic_words), 0, "Should have italic words")
|
|
|
|
# Check for blue colored word
|
|
blue_words = [w for _, w in words if w.style.colour == (0, 0, 255)]
|
|
self.assertGreater(len(blue_words), 0, "Should have blue colored words")
|
|
|
|
def test_nested_formatting(self):
|
|
"""Test nested formatting elements."""
|
|
text = "<p>This has <strong>bold with <em>italic inside</em></strong> formatting.</p>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
|
|
words = list(blocks[0].words())
|
|
|
|
# Find words that should be both bold and italic
|
|
bold_italic_words = [w for _, w in words
|
|
if w.style.weight == FontWeight.BOLD and w.style.style == FontStyle.ITALIC]
|
|
self.assertGreater(len(bold_italic_words), 0, "Should have words that are both bold and italic")
|
|
|
|
def test_color_variations(self):
|
|
"""Test different color formats in CSS."""
|
|
text = '<p><span style="color: #ff0000;">Hex red</span> and <span style="color: green;">Named green</span>.</p>'
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
|
|
words = list(blocks[0].words())
|
|
|
|
# Check for hex red color
|
|
hex_red_words = [w for _, w in words if w.style.colour == (255, 0, 0)]
|
|
self.assertGreater(len(hex_red_words), 0, "Should have hex red colored words")
|
|
|
|
# Check for named green color
|
|
green_words = [w for _, w in words if w.style.colour == (0, 255, 0)]
|
|
self.assertGreater(len(green_words), 0, "Should have green colored words")
|
|
|
|
|
|
class TestHTMLBlockElements(unittest.TestCase):
|
|
"""Test cases for block-level HTML elements."""
|
|
|
|
def test_body_element(self):
|
|
"""Test parsing of body element containing other elements."""
|
|
text = "<body><p>Paragraph one.</p><p>Paragraph two.</p></body>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 2)
|
|
self.assertIsInstance(blocks[0], Paragraph)
|
|
self.assertIsInstance(blocks[1], Paragraph)
|
|
|
|
def test_div_container(self):
|
|
"""Test div elements as generic containers."""
|
|
text = "<div><p>First paragraph.</p><p>Second paragraph.</p></div>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 2)
|
|
self.assertIsInstance(blocks[0], Paragraph)
|
|
self.assertIsInstance(blocks[1], Paragraph)
|
|
|
|
def test_headings(self):
|
|
"""Test all heading levels h1-h6."""
|
|
text = "<h1>Heading 1</h1><h2>Heading 2</h2><h3>Heading 3</h3><h4>Heading 4</h4><h5>Heading 5</h5><h6>Heading 6</h6>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 6)
|
|
|
|
expected_levels = [HeadingLevel.H1, HeadingLevel.H2, HeadingLevel.H3,
|
|
HeadingLevel.H4, HeadingLevel.H5, HeadingLevel.H6]
|
|
|
|
for i, block in enumerate(blocks):
|
|
self.assertIsInstance(block, Heading)
|
|
self.assertEqual(block.level, expected_levels[i])
|
|
|
|
words = list(block.words())
|
|
self.assertEqual(len(words), 2) # "Heading" and number
|
|
self.assertEqual(words[0][1].text, "Heading")
|
|
|
|
def test_blockquote(self):
|
|
"""Test blockquote elements."""
|
|
text = "<blockquote><p>This is a quoted paragraph.</p></blockquote>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], Quote)
|
|
|
|
# Check that the quote contains a paragraph
|
|
quote_blocks = list(blocks[0].blocks())
|
|
self.assertEqual(len(quote_blocks), 1)
|
|
self.assertIsInstance(quote_blocks[0], Paragraph)
|
|
|
|
def test_preformatted_code(self):
|
|
"""Test preformatted code blocks."""
|
|
text = "<pre><code>function hello() {\n console.log('Hello');\n}</code></pre>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], CodeBlock)
|
|
|
|
lines = list(blocks[0].lines())
|
|
self.assertGreater(len(lines), 0)
|
|
|
|
def test_unordered_list(self):
|
|
"""Test unordered lists."""
|
|
text = "<ul><li>First item</li><li>Second item</li><li>Third item</li></ul>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], HList)
|
|
self.assertEqual(blocks[0].style, ListStyle.UNORDERED)
|
|
|
|
items = list(blocks[0].items())
|
|
self.assertEqual(len(items), 3)
|
|
|
|
def test_ordered_list(self):
|
|
"""Test ordered lists."""
|
|
text = "<ol><li>First item</li><li>Second item</li><li>Third item</li></ol>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], HList)
|
|
self.assertEqual(blocks[0].style, ListStyle.ORDERED)
|
|
|
|
def test_list_with_styled_content(self):
|
|
"""Test lists containing styled content."""
|
|
text = "<ul><li>Normal item</li><li><strong>Bold item</strong></li><li>Item with <em>italic</em> text</li></ul>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], HList)
|
|
|
|
items = list(blocks[0].items())
|
|
self.assertEqual(len(items), 3)
|
|
|
|
# Check second item has bold text
|
|
second_item_blocks = list(items[1].blocks())
|
|
if second_item_blocks:
|
|
words = list(second_item_blocks[0].words())
|
|
bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD]
|
|
self.assertGreater(len(bold_words), 0)
|
|
|
|
def test_table_basic(self):
|
|
"""Test basic table structure."""
|
|
text = """
|
|
<table>
|
|
<tr>
|
|
<th>Header 1</th>
|
|
<th>Header 2</th>
|
|
</tr>
|
|
<tr>
|
|
<td>Cell 1</td>
|
|
<td>Cell 2</td>
|
|
</tr>
|
|
</table>
|
|
"""
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], Table)
|
|
|
|
def test_semantic_elements(self):
|
|
"""Test semantic HTML5 elements treated as containers."""
|
|
text = "<section><article><p>Article content</p></article></section>"
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], Paragraph)
|
|
|
|
def test_nested_block_elements(self):
|
|
"""Test nested block elements."""
|
|
text = """
|
|
<div>
|
|
<h2>Section Title</h2>
|
|
<p>Some introductory text.</p>
|
|
<blockquote>
|
|
<p>A quoted paragraph.</p>
|
|
</blockquote>
|
|
</div>
|
|
"""
|
|
blocks = parse_html_string(text)
|
|
self.assertGreater(len(blocks), 2)
|
|
|
|
# Should have at least a heading, paragraph, and quote
|
|
has_heading = any(isinstance(b, Heading) for b in blocks)
|
|
has_paragraph = any(isinstance(b, Paragraph) for b in blocks)
|
|
has_quote = any(isinstance(b, Quote) for b in blocks)
|
|
|
|
self.assertTrue(has_heading, "Should contain a heading")
|
|
self.assertTrue(has_paragraph, "Should contain a paragraph")
|
|
self.assertTrue(has_quote, "Should contain a quote")
|
|
|
|
def test_empty_elements(self):
|
|
"""Test handling of empty elements."""
|
|
text = "<p></p><div></div><span></span>"
|
|
blocks = parse_html_string(text)
|
|
# Empty elements may not create blocks, which is acceptable behavior
|
|
self.assertGreaterEqual(len(blocks), 0)
|
|
|
|
# Test that empty paragraph with some content does create a block
|
|
text_with_content = "<p> </p>" # Contains whitespace
|
|
blocks_with_content = parse_html_string(text_with_content)
|
|
# This should create at least one block since there's whitespace content
|
|
self.assertGreaterEqual(len(blocks_with_content), 0)
|
|
|
|
|
|
class TestHTMLComplexStructures(unittest.TestCase):
|
|
"""Test cases for complex HTML structures combining multiple features."""
|
|
|
|
def test_article_with_mixed_content(self):
|
|
"""Test a realistic article structure with mixed content."""
|
|
text = """
|
|
<article>
|
|
<h1>Article Title</h1>
|
|
<p>This is the <strong>introduction</strong> paragraph with <em>some emphasis</em>.</p>
|
|
<blockquote>
|
|
<p>This is a <span style="color: blue;">quoted section</span> with styling.</p>
|
|
</blockquote>
|
|
<ul>
|
|
<li>First <strong>important</strong> point</li>
|
|
<li>Second point with <code>inline code</code></li>
|
|
</ul>
|
|
</article>
|
|
"""
|
|
blocks = parse_html_string(text)
|
|
self.assertGreater(len(blocks), 3)
|
|
|
|
# Verify we have the expected block types
|
|
block_types = [type(b).__name__ for b in blocks]
|
|
self.assertIn('Heading', block_types)
|
|
self.assertIn('Paragraph', block_types)
|
|
self.assertIn('Quote', block_types)
|
|
self.assertIn('HList', block_types)
|
|
|
|
def test_styled_table_content(self):
|
|
"""Test table with styled cell content."""
|
|
text = """
|
|
<table>
|
|
<thead>
|
|
<tr>
|
|
<th><strong>Product</strong></th>
|
|
<th><em>Price</em></th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td>Item with <span style="color: red;">red text</span></td>
|
|
<td><strong>$19.99</strong></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
"""
|
|
blocks = parse_html_string(text)
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], Table)
|
|
|
|
|
|
class TestHTMLFontRegistryIntegration(unittest.TestCase):
|
|
"""Test cases for font registry integration with HTML extraction."""
|
|
|
|
def setUp(self):
|
|
"""Set up test fixtures."""
|
|
self.doc = Document("Test Document", "en-US")
|
|
self.base_font = Font(font_size=16, colour=(0, 0, 0))
|
|
|
|
def test_font_registry_creates_fonts(self):
|
|
"""Test that HTML parsing with document context creates fonts in registry."""
|
|
html_content = """
|
|
<div>
|
|
<p>This is <strong>bold text</strong> and <em>italic text</em>.</p>
|
|
<h1>Main Header</h1>
|
|
</div>
|
|
"""
|
|
|
|
# Initially empty style registry
|
|
initial_style_count = self.doc.get_style_registry().get_style_count()
|
|
|
|
# Parse HTML with document context
|
|
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
|
|
|
|
# Should have created styles for different formatting
|
|
final_style_count = self.doc.get_style_registry().get_style_count()
|
|
self.assertGreater(final_style_count, initial_style_count,
|
|
"Should have created styles in registry")
|
|
|
|
# Should have created blocks
|
|
self.assertGreater(len(blocks), 0, "Should have created blocks")
|
|
|
|
def test_font_registry_reuses_fonts(self):
|
|
"""Test that parsing same content reuses existing styles."""
|
|
html_content = """
|
|
<div>
|
|
<p>This is <strong>bold text</strong> and <em>italic text</em>.</p>
|
|
<h1>Main Header</h1>
|
|
</div>
|
|
"""
|
|
|
|
# First parse
|
|
blocks1 = parse_html_string(html_content, self.base_font, document=self.doc)
|
|
first_parse_style_count = self.doc.get_style_registry().get_style_count()
|
|
|
|
# Second parse with same content
|
|
blocks2 = parse_html_string(html_content, self.base_font, document=self.doc)
|
|
second_parse_style_count = self.doc.get_style_registry().get_style_count()
|
|
|
|
# Style count should not increase on second parse
|
|
self.assertEqual(first_parse_style_count, second_parse_style_count,
|
|
"Should reuse existing styles instead of creating new ones")
|
|
|
|
# Both parses should create same number of blocks
|
|
self.assertEqual(len(blocks1), len(blocks2),
|
|
"Should create same structure on both parses")
|
|
|
|
def test_font_registry_different_styles_create_different_fonts(self):
|
|
"""Test that different styles create different style objects."""
|
|
# Create styles with different properties
|
|
style_id1, style1 = self.doc.get_or_create_style(
|
|
font_size=14, color=(255, 0, 0), font_weight=FontWeight.BOLD
|
|
)
|
|
style_id2, style2 = self.doc.get_or_create_style(
|
|
font_size=16, color=(255, 0, 0), font_weight=FontWeight.BOLD
|
|
)
|
|
style_id3, style3 = self.doc.get_or_create_style(
|
|
font_size=14, color=(0, 255, 0), font_weight=FontWeight.BOLD
|
|
)
|
|
|
|
# Should be different style IDs
|
|
self.assertNotEqual(style_id1, style_id2, "Different sizes should create different styles")
|
|
self.assertNotEqual(style_id1, style_id3, "Different colors should create different styles")
|
|
self.assertNotEqual(style_id2, style_id3, "All styles should be different")
|
|
|
|
# Should have multiple styles in registry
|
|
self.assertGreaterEqual(self.doc.get_style_registry().get_style_count(), 3)
|
|
|
|
def test_font_registry_integration_with_html_styles(self):
|
|
"""Test that HTML parsing uses style registry for styled content."""
|
|
html_content = """
|
|
<p>Normal text with <strong>bold</strong> and <em>italic</em> and
|
|
<span style="color: red;">red text</span>.</p>
|
|
"""
|
|
|
|
# Parse content
|
|
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
|
|
|
|
# Extract all words from the paragraph
|
|
paragraph = blocks[0]
|
|
words = list(paragraph.words())
|
|
|
|
# Find words with different styles
|
|
normal_words = [w for _, w in words if w.style.weight == FontWeight.NORMAL
|
|
and w.style.style == FontStyle.NORMAL]
|
|
bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD]
|
|
italic_words = [w for _, w in words if w.style.style == FontStyle.ITALIC]
|
|
red_words = [w for _, w in words if w.style.colour == (255, 0, 0)]
|
|
|
|
# Should have words with different styles
|
|
self.assertGreater(len(normal_words), 0, "Should have normal words")
|
|
self.assertGreater(len(bold_words), 0, "Should have bold words")
|
|
self.assertGreater(len(italic_words), 0, "Should have italic words")
|
|
self.assertGreater(len(red_words), 0, "Should have red words")
|
|
|
|
# Style registry should contain multiple styles for different formatting
|
|
self.assertGreater(self.doc.get_style_registry().get_style_count(), 1,
|
|
"Should have multiple styles for different formatting")
|
|
|
|
def test_font_registry_without_document_context(self):
|
|
"""Test that parsing without document context works (fallback behavior)."""
|
|
html_content = "<p>This is <strong>bold text</strong>.</p>"
|
|
|
|
# Get initial style count (should include default style)
|
|
initial_style_count = self.doc.get_style_registry().get_style_count()
|
|
|
|
# Parse without document context
|
|
blocks = parse_html_string(html_content, self.base_font)
|
|
|
|
# Should still create blocks successfully
|
|
self.assertEqual(len(blocks), 1)
|
|
self.assertIsInstance(blocks[0], Paragraph)
|
|
|
|
# Should not affect document's style registry
|
|
final_style_count = self.doc.get_style_registry().get_style_count()
|
|
self.assertEqual(final_style_count, initial_style_count,
|
|
"Document style registry should remain unchanged")
|
|
|
|
def test_complex_html_font_reuse(self):
|
|
"""Test style reuse with complex HTML containing repeated styles."""
|
|
html_content = """
|
|
<div>
|
|
<h1>First Header</h1>
|
|
<p>Paragraph with <strong>bold</strong> text.</p>
|
|
<h1>Second Header</h1>
|
|
<p>Another paragraph with <strong>bold</strong> text.</p>
|
|
</div>
|
|
"""
|
|
|
|
# Parse content
|
|
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
|
|
style_count_after_parse = self.doc.get_style_registry().get_style_count()
|
|
|
|
# Parse same content again
|
|
blocks2 = parse_html_string(html_content, self.base_font, document=self.doc)
|
|
style_count_after_second_parse = self.doc.get_style_registry().get_style_count()
|
|
|
|
# Style count should not increase on second parse
|
|
self.assertEqual(style_count_after_parse, style_count_after_second_parse,
|
|
"Styles should be reused for repeated formatting")
|
|
|
|
# Both should create same structure
|
|
self.assertEqual(len(blocks), len(blocks2))
|
|
|
|
def test_font_registry_with_nested_styles(self):
|
|
"""Test style registry with nested HTML styles."""
|
|
html_content = """
|
|
<p>Text with <strong>bold and <em>bold italic</em> nested</strong> styles.</p>
|
|
"""
|
|
|
|
# Parse content
|
|
blocks = parse_html_string(html_content, self.base_font, document=self.doc)
|
|
|
|
# Should create styles for different style combinations
|
|
paragraph = blocks[0]
|
|
words = list(paragraph.words())
|
|
|
|
# Find words that are both bold and italic
|
|
bold_italic_words = [w for _, w in words
|
|
if w.style.weight == FontWeight.BOLD
|
|
and w.style.style == FontStyle.ITALIC]
|
|
|
|
self.assertGreater(len(bold_italic_words), 0,
|
|
"Should have words with combined bold+italic style")
|
|
|
|
# Should have multiple styles in registry for different combinations
|
|
self.assertGreater(self.doc.get_style_registry().get_style_count(), 1,
|
|
"Should create separate styles for style combinations")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|