Heading 1

"""
Unit tests for individual HTML extraction functions.

Tests the specific handler functions and utility functions in html_extraction module,
reusing test patterns from test_html_extraction.py that are known to pass.
"""

import unittest
from bs4 import BeautifulSoup, Tag
from pyWebLayout.io.readers.html_extraction import (
    create_base_context,
    apply_element_styling,
    parse_inline_styles,
    apply_element_font_styles,
    extract_text_content,
    paragraph_handler,
    div_handler,
    heading_handler,
    blockquote_handler,
    preformatted_handler,
    unordered_list_handler,
    ordered_list_handler,
    list_item_handler,
    table_handler,
    table_row_handler,
    table_cell_handler,
    table_header_cell_handler,
    horizontal_rule_handler,
    image_handler,
    StyleContext,
)
from pyWebLayout.abstract.block import (
    Paragraph,
    Heading,
    HeadingLevel,
    Quote,
    CodeBlock,
    HList,
    ListItem,
    ListStyle,
    Table,
    TableRow,
    TableCell,
    HorizontalRule,
    Image,
)
from pyWebLayout.abstract.inline import Word
from pyWebLayout.style import Font, FontWeight, FontStyle, TextDecoration


class TestUtilityFunctions(unittest.TestCase):
    """Test cases for utility functions."""
    
    def test_create_base_context(self):
        """Test creation of base style context."""
        context = create_base_context()
        
        self.assertIsInstance(context, StyleContext)
        self.assertIsInstance(context.font, Font)
        self.assertIsNone(context.background)
        self.assertEqual(context.css_classes, set())
        self.assertEqual(context.css_styles, {})
        self.assertEqual(context.element_attributes, {})
        self.assertEqual(context.parent_elements, [])
    
    def test_parse_inline_styles_from_existing_tests(self):
        """Test parsing CSS inline styles - adapted from test_span_with_inline_styles."""
        # From: '<span style="color: red; font-weight: bold;">this part is red and bold</span>'
        style_text = "color: red; font-weight: bold;"
        styles = parse_inline_styles(style_text)
        
        expected = {
            "color": "red",
            "font-weight": "bold"
        }
        self.assertEqual(styles, expected)
    
    def test_parse_inline_styles_color_variations(self):
        """Test parsing different color formats - adapted from test_color_variations."""
        # Test hex color parsing
        hex_style = "color: #ff0000;"
        styles = parse_inline_styles(hex_style)
        self.assertEqual(styles.get("color"), "#ff0000")
        
        # Test named color parsing  
        named_style = "color: green;"
        styles = parse_inline_styles(named_style)
        self.assertEqual(styles.get("color"), "green")
    
    def test_apply_element_font_styles_bold_elements(self):
        """Test font style application for bold elements - adapted from test_bold_text."""
        base_font = Font()
        
        # Test <strong> tag - from "<strong>bold text</strong>"
        font = apply_element_font_styles(base_font, "strong", {})
        self.assertEqual(font.weight, FontWeight.BOLD)
        
        # Test <b> tag
        font = apply_element_font_styles(base_font, "b", {})
        self.assertEqual(font.weight, FontWeight.BOLD)
    
    def test_apply_element_font_styles_italic_elements(self):
        """Test font style application for italic elements - adapted from test_italic_text."""
        base_font = Font()
        
        # Test <em> tag - from "<em>italic text</em>"
        font = apply_element_font_styles(base_font, "em", {})
        self.assertEqual(font.style, FontStyle.ITALIC)
        
        # Test <i> tag
        font = apply_element_font_styles(base_font, "i", {})
        self.assertEqual(font.style, FontStyle.ITALIC)
    
    def test_apply_element_font_styles_decoration_elements(self):
        """Test font decoration - adapted from test_underlined_text and test_strikethrough_text."""
        base_font = Font()
        
        # Test <u> tag - from "<u>underlined text</u>"
        font = apply_element_font_styles(base_font, "u", {})
        self.assertEqual(font.decoration, TextDecoration.UNDERLINE)
        
        # Test <s> tag - from "<s>strikethrough text</s>"
        font = apply_element_font_styles(base_font, "s", {})
        self.assertEqual(font.decoration, TextDecoration.STRIKETHROUGH)
        
        # Test <del> tag
        font = apply_element_font_styles(base_font, "del", {})
        self.assertEqual(font.decoration, TextDecoration.STRIKETHROUGH)
    
    def test_apply_element_font_styles_headings(self):
        """Test heading font styles - adapted from test_headings."""
        base_font = Font()
        
        # Test heading sizes and weights - from test_headings which tests h1-h6
        headings = [("h1", 24), ("h2", 20), ("h3", 18), ("h4", 16), ("h5", 14), ("h6", 12)]
        
        for tag, expected_size in headings:
            font = apply_element_font_styles(base_font, tag, {})
            self.assertEqual(font.font_size, expected_size, f"Size mismatch for {tag}")
            self.assertEqual(font.weight, FontWeight.BOLD, f"Weight should be bold for {tag}")
    
    def test_apply_element_font_styles_color_parsing(self):
        """Test color parsing - adapted from test_color_variations."""
        base_font = Font()
        
        # Test named colors - from '<span style="color: green;">Named green</span>'
        css_styles = {"color": "green"}
        font = apply_element_font_styles(base_font, "span", css_styles)
        self.assertEqual(font.colour, (0, 255, 0))
        
        # Test hex colors - from '<span style="color: #ff0000;">Hex red</span>'
        css_styles = {"color": "#ff0000"}
        font = apply_element_font_styles(base_font, "span", css_styles)
        self.assertEqual(font.colour, (255, 0, 0))
    
    def test_apply_element_styling_with_classes_and_styles(self):
        """Test complete element styling - adapted from test_span_with_inline_styles."""
        # From: '<span style="color: red; font-weight: bold;">this part is red and bold</span>'
        soup = BeautifulSoup('<span class="highlight" style="color: red; font-weight: bold;">text</span>', 'html.parser')
        element = soup.find('span')
        base_context = create_base_context()
        
        styled_context = apply_element_styling(base_context, element)
        
        # Check CSS classes
        self.assertIn("highlight", styled_context.css_classes)
        
        # Check CSS styles
        self.assertEqual(styled_context.css_styles.get("color"), "red")
        self.assertEqual(styled_context.css_styles.get("font-weight"), "bold")
        
        # Check font styling
        self.assertEqual(styled_context.font.colour, (255, 0, 0))
        self.assertEqual(styled_context.font.weight, FontWeight.BOLD)


class TestExtractTextContent(unittest.TestCase):
    """Test cases for text content extraction."""
    
    def setUp(self):
        """Set up test fixtures."""
        self.base_context = create_base_context()
    
    def test_extract_simple_text(self):
        """Test extracting simple text - adapted from test_simple."""
        # From: "<p>This is a paragraph.</p>"
        soup = BeautifulSoup('<p>This is a paragraph.</p>', 'html.parser')
        element = soup.find('p')
        
        words = extract_text_content(element, self.base_context)
        
        # Should match the expected word count from original test
        self.assertEqual(len(words), 4)  # "This", "is", "a", "paragraph."
        self.assertIsInstance(words[0], Word)
        self.assertEqual(words[0].text, "This")
    
    def test_extract_styled_text_bold(self):
        """Test extracting bold styled text - adapted from test_bold_text."""
        # From: "<p>This is <strong>bold text</strong> in a paragraph.</p>"
        soup = BeautifulSoup('<span>This is <strong>bold text</strong> in a paragraph.</span>', 'html.parser')
        element = soup.find('span')
        
        words = extract_text_content(element, self.base_context)
        
        # Find the bold words
        bold_words = [w for w in words if w.style.weight == FontWeight.BOLD]
        self.assertGreater(len(bold_words), 0, "Should have bold words")
        
        # Check specific words are bold (from original test expectations)
        bold_word_texts = [w.text for w in bold_words]
        self.assertIn("bold", bold_word_texts)
        self.assertIn("text", bold_word_texts)
    
    def test_extract_nested_formatting(self):
        """Test nested formatting - adapted from test_nested_formatting."""
        # From: "<p>This has <strong>bold with <em>italic inside</em></strong> formatting.</p>"
        soup = BeautifulSoup('<span>This has <strong>bold with <em>italic inside</em></strong> formatting.</span>', 'html.parser')
        element = soup.find('span')
        
        words = extract_text_content(element, self.base_context)
        
        # Find words that should be both bold and italic
        bold_italic_words = [w for w in words 
                           if w.style.weight == FontWeight.BOLD and w.style.style == FontStyle.ITALIC]
        self.assertGreater(len(bold_italic_words), 0, "Should have words that are both bold and italic")


class TestHandlerFunctions(unittest.TestCase):
    """Test cases for HTML element handler functions using known working patterns."""
    
    def setUp(self):
        """Set up test fixtures."""
        self.base_context = create_base_context()
    
    def test_paragraph_handler_simple(self):
        """Test paragraph handler - adapted from test_simple."""
        # From: "<p>This is a paragraph.</p>"
        soup = BeautifulSoup('<p>This is a paragraph.</p>', 'html.parser')
        element = soup.find('p')
        
        result = paragraph_handler(element, self.base_context)
        
        self.assertIsInstance(result, Paragraph)
        # Should match original test expectations
        self.assertEqual(len(result), 4)  # 4 words
        
        words = list(result.words_iter())
        expected_texts = ["This", "is", "a", "paragraph."]
        for i, expected_text in enumerate(expected_texts):
            self.assertEqual(words[i][1].text, expected_text)
    
    def test_heading_handler_all_levels(self):
        """Test heading handler - adapted from test_headings."""
        # From: "<h1>Heading 1</h1><h2>Heading 2</h2>..." 
        expected_levels = [HeadingLevel.H1, HeadingLevel.H2, HeadingLevel.H3, 
                          HeadingLevel.H4, HeadingLevel.H5, HeadingLevel.H6]
        
        for i, expected_level in enumerate(expected_levels, 1):
            tag = f"h{i}"
            soup = BeautifulSoup(f'<{tag}>Heading {i}</{tag}>', 'html.parser')
            element = soup.find(tag)
            
            result = heading_handler(element, self.base_context)
            
            self.assertIsInstance(result, Heading)
            self.assertEqual(result.level, expected_level)
            
            # Should match original test word expectations
            words = list(result.words_iter())
            self.assertEqual(len(words), 2)  # "Heading" and number
            self.assertEqual(words[0][1].text, "Heading")
    
    def test_blockquote_handler(self):
        """Test blockquote handler - adapted from test_blockquote."""
        # From: "<blockquote><p>This is a quoted paragraph.</p></blockquote>"
        soup = BeautifulSoup('<blockquote><p>This is a quoted paragraph.</p></blockquote>', 'html.parser')
        element = soup.find('blockquote')
        
        result = blockquote_handler(element, self.base_context)
        
        self.assertIsInstance(result, Quote)
        
        # Check that the quote contains a paragraph (from original test)
        quote_blocks = list(result.blocks())
        self.assertEqual(len(quote_blocks), 1)
        self.assertIsInstance(quote_blocks[0], Paragraph)
    
    def test_preformatted_handler(self):
        """Test preformatted handler - adapted from test_preformatted_code."""
        # From: "<pre><code>function hello() {\n  console.log('Hello');\n}</code></pre>"
        soup = BeautifulSoup('<pre><code>function hello() {\n  console.log(\'Hello\');\n}</code></pre>', 'html.parser')
        element = soup.find('pre')
        
        result = preformatted_handler(element, self.base_context)
        
        self.assertIsInstance(result, CodeBlock)
        
        # Should have lines (from original test expectation)
        lines = list(result.lines())
        self.assertGreater(len(lines), 0)
    
    def test_unordered_list_handler(self):
        """Test unordered list handler - adapted from test_unordered_list."""
        # From: "<ul><li>First item</li><li>Second item</li><li>Third item</li></ul>"
        soup = BeautifulSoup('<ul><li>First item</li><li>Second item</li><li>Third item</li></ul>', 'html.parser')
        element = soup.find('ul')
        
        result = unordered_list_handler(element, self.base_context)
        
        self.assertIsInstance(result, HList)
        self.assertEqual(result.style, ListStyle.UNORDERED)
        
        # Should match original test expectations
        items = list(result.items())
        self.assertEqual(len(items), 3)
    
    def test_ordered_list_handler(self):
        """Test ordered list handler - adapted from test_ordered_list."""
        # From: "<ol><li>First item</li><li>Second item</li><li>Third item</li></ol>"
        soup = BeautifulSoup('<ol><li>First item</li><li>Second item</li><li>Third item</li></ol>', 'html.parser')
        element = soup.find('ol')
        
        result = ordered_list_handler(element, self.base_context)
        
        self.assertIsInstance(result, HList)
        self.assertEqual(result.style, ListStyle.ORDERED)
        
        # Should match original test expectations
        items = list(result.items())
        self.assertEqual(len(items), 3)  # "First item", "Second item", "Third item"
    
    def test_list_item_handler(self):
        """Test list item handler."""
        soup = BeautifulSoup('<li>List item content</li>', 'html.parser')
        element = soup.find('li')
        
        result = list_item_handler(element, self.base_context)
        
        self.assertIsInstance(result, ListItem)
        blocks = list(result.blocks())
        self.assertGreater(len(blocks), 0)
    
    def test_table_handler(self):
        """Test table handler - adapted from test_table_basic."""
        # From test_table_basic structure
        soup = BeautifulSoup('''
            <table>
                <tr>
                    <th>Header 1</th>
                    <th>Header 2</th>
                </tr>
                <tr>
                    <td>Cell 1</td>
                    <td>Cell 2</td>
                </tr>
            </table>
        ''', 'html.parser')
        element = soup.find('table')
        
        result = table_handler(element, self.base_context)
        
        self.assertIsInstance(result, Table)
    
    def test_table_row_handler(self):
        """Test table row handler."""
        soup = BeautifulSoup('<tr><td>Cell 1</td><td>Cell 2</td></tr>', 'html.parser')
        element = soup.find('tr')
        
        result = table_row_handler(element, self.base_context)
        
        self.assertIsInstance(result, TableRow)
    
    def test_table_cell_handler(self):
        """Test table cell handler."""
        soup = BeautifulSoup('<td>Cell content</td>', 'html.parser')
        element = soup.find('td')
        
        # Apply styling to get attributes
        styled_context = apply_element_styling(self.base_context, element)
        result = table_cell_handler(element, styled_context)
        
        self.assertIsInstance(result, TableCell)
        self.assertEqual(result.is_header, False)
    
    def test_table_header_cell_handler(self):
        """Test table header cell handler."""
        soup = BeautifulSoup('<th>Header content</th>', 'html.parser')
        element = soup.find('th')
        
        # Apply styling to get attributes
        styled_context = apply_element_styling(self.base_context, element)
        result = table_header_cell_handler(element, styled_context)
        
        self.assertIsInstance(result, TableCell)
        self.assertEqual(result.is_header, True)
    
    def test_horizontal_rule_handler(self):
        """Test horizontal rule handler."""
        soup = BeautifulSoup('<hr>', 'html.parser')
        element = soup.find('hr')
        
        result = horizontal_rule_handler(element, self.base_context)
        
        self.assertIsInstance(result, HorizontalRule)
    
    def test_image_handler(self):
        """Test image handler."""
        soup = BeautifulSoup('<img src="test.jpg" alt="Test image" width="100" height="50">', 'html.parser')
        element = soup.find('img')
        
        # Need to apply styling first to get attributes
        styled_context = apply_element_styling(self.base_context, element)
        result = image_handler(element, styled_context)
        
        self.assertIsInstance(result, Image)
        self.assertEqual(result.source, "test.jpg")
        self.assertEqual(result.alt_text, "Test image")
        self.assertEqual(result.width, 100)
        self.assertEqual(result.height, 50)
    
    def test_div_handler_container(self):
        """Test div handler - adapted from test_div_container."""
        # From: "<div><p>First paragraph.</p><p>Second paragraph.</p></div>"
        soup = BeautifulSoup('<div><p>First paragraph.</p><p>Second paragraph.</p></div>', 'html.parser')
        element = soup.find('div')
        
        result = div_handler(element, self.base_context)
        
        self.assertIsInstance(result, list)
        # Should match original test expectations
        self.assertEqual(len(result), 2)
        self.assertIsInstance(result[0], Paragraph)
        self.assertIsInstance(result[1], Paragraph)


class TestStyledContentHandling(unittest.TestCase):
    """Test styled content handling using patterns from existing tests."""
    
    def setUp(self):
        """Set up test fixtures."""
        self.base_context = create_base_context()
    
    def test_paragraph_with_bold_content(self):
        """Test paragraph with bold content - adapted from test_bold_text."""
        # From: "<p>This is <strong>bold text</strong> in a paragraph.</p>"
        soup = BeautifulSoup('<p>This is <strong>bold text</strong> in a paragraph.</p>', 'html.parser')
        element = soup.find('p')
        
        result = paragraph_handler(element, self.base_context)
        
        self.assertIsInstance(result, Paragraph)
        words = list(result.words_iter())
        self.assertEqual(len(words), 7)  # From original test expectation
        
        # Check that 'bold' and 'text' words have bold font weight (from original test)
        bold_word = words[2][1]  # 'bold'
        text_word = words[3][1]  # 'text'
        self.assertEqual(bold_word.text, "bold")
        self.assertEqual(bold_word.style.weight, FontWeight.BOLD)
        self.assertEqual(text_word.text, "text")
        self.assertEqual(text_word.style.weight, FontWeight.BOLD)
        
        # Check that other words are not bold (from original test)
        normal_word = words[0][1]  # 'This'
        self.assertEqual(normal_word.text, "This")
        self.assertNotEqual(normal_word.style.weight, FontWeight.BOLD)
    
    def test_paragraph_with_mixed_formatting(self):
        """Test mixed formatting - adapted from test_mixed_formatting."""
        # From: "<p>This paragraph contains <strong>bold</strong>, <em>italic</em>, <span style=\"color: blue;\">blue</span>..."
        soup = BeautifulSoup('<p>This paragraph contains <strong>bold</strong>, <em>italic</em>, <span style="color: blue;">blue</span> text.</p>', 'html.parser')
        element = soup.find('p')
        
        result = paragraph_handler(element, self.base_context)
        
        self.assertIsInstance(result, Paragraph)
        words = list(result.words_iter())
        
        # Check for bold word (from original test pattern)
        bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD]
        self.assertGreater(len(bold_words), 0, "Should have bold words")
        
        # Check for italic word (from original test pattern)
        italic_words = [w for _, w in words if w.style.style == FontStyle.ITALIC]
        self.assertGreater(len(italic_words), 0, "Should have italic words")
        
        # Check for blue colored word (from original test pattern)
        blue_words = [w for _, w in words if w.style.colour == (0, 0, 255)]
        self.assertGreater(len(blue_words), 0, "Should have blue colored words")


if __name__ == '__main__':
    unittest.main()