This is a paragraph.
" soup = BeautifulSoup('This is a paragraph.
', 'html.parser') element = soup.find('p') words = extract_text_content(element, self.base_context) # Should match the expected word count from original test self.assertEqual(len(words), 4) # "This", "is", "a", "paragraph." self.assertIsInstance(words[0], Word) self.assertEqual(words[0].text, "This") def test_extract_styled_text_bold(self): """Test extracting bold styled text - adapted from test_bold_text.""" # From: "This is bold text in a paragraph.
" soup = BeautifulSoup('This is bold text in a paragraph.', 'html.parser') element = soup.find('span') words = extract_text_content(element, self.base_context) # Find the bold words bold_words = [w for w in words if w.style.weight == FontWeight.BOLD] self.assertGreater(len(bold_words), 0, "Should have bold words") # Check specific words are bold (from original test expectations) bold_word_texts = [w.text for w in bold_words] self.assertIn("bold", bold_word_texts) self.assertIn("text", bold_word_texts) def test_extract_nested_formatting(self): """Test nested formatting - adapted from test_nested_formatting.""" # From: "This has bold with italic inside formatting.
" soup = BeautifulSoup('This has bold with italic inside formatting.', 'html.parser') element = soup.find('span') words = extract_text_content(element, self.base_context) # Find words that should be both bold and italic bold_italic_words = [w for w in words if w.style.weight == FontWeight.BOLD and w.style.style == FontStyle.ITALIC] self.assertGreater(len(bold_italic_words), 0, "Should have words that are both bold and italic") class TestHandlerFunctions(unittest.TestCase): """Test cases for HTML element handler functions using known working patterns.""" def setUp(self): """Set up test fixtures.""" self.base_context = create_base_context() def test_paragraph_handler_simple(self): """Test paragraph handler - adapted from test_simple.""" # From: "This is a paragraph.
" soup = BeautifulSoup('This is a paragraph.
', 'html.parser') element = soup.find('p') result = paragraph_handler(element, self.base_context) self.assertIsInstance(result, Paragraph) # Should match original test expectations self.assertEqual(len(result), 4) # 4 words words = list(result.words_iter()) expected_texts = ["This", "is", "a", "paragraph."] for i, expected_text in enumerate(expected_texts): self.assertEqual(words[i][1].text, expected_text) def test_heading_handler_all_levels(self): """Test heading handler - adapted from test_headings.""" # From: "" soup = BeautifulSoup('This is a quoted paragraph.
', 'html.parser') element = soup.find('blockquote') result = blockquote_handler(element, self.base_context) self.assertIsInstance(result, Quote) # Check that the quote contains a paragraph (from original test) quote_blocks = list(result.blocks()) self.assertEqual(len(quote_blocks), 1) self.assertIsInstance(quote_blocks[0], Paragraph) def test_preformatted_handler(self): """Test preformatted handler - adapted from test_preformatted_code.""" # From: "This is a quoted paragraph.
function hello() {\n console.log('Hello');\n}"
soup = BeautifulSoup('function hello() {\n console.log(\'Hello\');\n}', 'html.parser')
element = soup.find('pre')
result = preformatted_handler(element, self.base_context)
self.assertIsInstance(result, CodeBlock)
# Should have lines (from original test expectation)
lines = list(result.lines())
self.assertGreater(len(lines), 0)
def test_unordered_list_handler(self):
"""Test unordered list handler - adapted from test_unordered_list."""
# From: "| Header 1 | Header 2 |
|---|---|
| Cell 1 | Cell 2 |
', 'html.parser')
element = soup.find('img')
# Need to apply styling first to get attributes
styled_context = apply_element_styling(self.base_context, element)
result = image_handler(element, styled_context)
self.assertIsInstance(result, Image)
self.assertEqual(result.source, "test.jpg")
self.assertEqual(result.alt_text, "Test image")
self.assertEqual(result.width, 100)
self.assertEqual(result.height, 50)
def test_div_handler_container(self):
"""Test div handler - adapted from test_div_container."""
# From: "First paragraph.
Second paragraph.
First paragraph.
Second paragraph.
This is bold text in a paragraph.
" soup = BeautifulSoup('This is bold text in a paragraph.
', 'html.parser') element = soup.find('p') result = paragraph_handler(element, self.base_context) self.assertIsInstance(result, Paragraph) words = list(result.words_iter()) self.assertEqual(len(words), 7) # From original test expectation # Check that 'bold' and 'text' words have bold font weight (from original test) bold_word = words[2][1] # 'bold' text_word = words[3][1] # 'text' self.assertEqual(bold_word.text, "bold") self.assertEqual(bold_word.style.weight, FontWeight.BOLD) self.assertEqual(text_word.text, "text") self.assertEqual(text_word.style.weight, FontWeight.BOLD) # Check that other words are not bold (from original test) normal_word = words[0][1] # 'This' self.assertEqual(normal_word.text, "This") self.assertNotEqual(normal_word.style.weight, FontWeight.BOLD) def test_paragraph_with_mixed_formatting(self): """Test mixed formatting - adapted from test_mixed_formatting.""" # From: "This paragraph contains bold, italic, blue..." soup = BeautifulSoup('
This paragraph contains bold, italic, blue text.
', 'html.parser') element = soup.find('p') result = paragraph_handler(element, self.base_context) self.assertIsInstance(result, Paragraph) words = list(result.words_iter()) # Check for bold word (from original test pattern) bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD] self.assertGreater(len(bold_words), 0, "Should have bold words") # Check for italic word (from original test pattern) italic_words = [w for _, w in words if w.style.style == FontStyle.ITALIC] self.assertGreater(len(italic_words), 0, "Should have italic words") # Check for blue colored word (from original test pattern) blue_words = [w for _, w in words if w.style.colour == (0, 0, 255)] self.assertGreater(len(blue_words), 0, "Should have blue colored words") if __name__ == '__main__': unittest.main()