From 718027f3c8456a8b2d381a52b2feb1ad81042054 Mon Sep 17 00:00:00 2001 From: Duncan Tourolle Date: Fri, 12 Sep 2025 21:23:56 +0200 Subject: [PATCH] Fix tests --- pyWebLayout/abstract/document.py | 2 +- pyWebLayout/concrete/text.py | 2 +- pyWebLayout/io/readers/html_extraction.py | 4 +-- pyWebLayout/layout/ereader_layout.py | 4 +-- pyWebLayout/style/alignment.py | 8 ++++- tests/abstract/test_abstract_blocks.py | 2 +- tests/concrete/test_concrete_text.py | 2 +- tests/io_tests/test_epub_reader.py | 6 ++-- tests/io_tests/test_html_extraction.py | 30 +++++++++---------- .../test_html_extraction_functions.py | 11 ++++--- tests/layouter/test_document_layouter.py | 14 +++++++++ tests/style/test_html_style.py | 12 ++++---- 12 files changed, 58 insertions(+), 39 deletions(-) diff --git a/pyWebLayout/abstract/document.py b/pyWebLayout/abstract/document.py index 345d21f..a510566 100644 --- a/pyWebLayout/abstract/document.py +++ b/pyWebLayout/abstract/document.py @@ -317,7 +317,7 @@ class Document: for heading in headings: # Extract text from the heading title = "" - for _, word in heading.words(): + for _, word in heading.words_iter(): title += word.text + " " title = title.strip() diff --git a/pyWebLayout/concrete/text.py b/pyWebLayout/concrete/text.py index 1d1d1b8..39cf567 100644 --- a/pyWebLayout/concrete/text.py +++ b/pyWebLayout/concrete/text.py @@ -366,7 +366,7 @@ class Line(Box): spacing_length = self._spacing[0] * (len(self._text_objects) - 1) remaining=self._size[0] - word_length - spacing_length fraction = remaining / text.width - spliter = round(fraction*len(text.text)) # get the split index for best spacing + spliter = max(1, round(fraction*len(word.text))) # get the split index for best spacing, use original word length split = [Text(word.text[:spliter]+"-", word.style, self._draw, line=self, source=word), Text(word.text[spliter:], word.style, self._draw, line=self, source=word)] self._text_objects.append(split[0]) word.add_concete(split) diff --git a/pyWebLayout/io/readers/html_extraction.py b/pyWebLayout/io/readers/html_extraction.py index e6fbe68..f29f576 100644 --- a/pyWebLayout/io/readers/html_extraction.py +++ b/pyWebLayout/io/readers/html_extraction.py @@ -401,10 +401,10 @@ def extract_text_content(element: Tag, context: StyleContext) -> List[Word]: if isinstance(child_result, list): for block in child_result: if isinstance(block, Paragraph): - for _, word in block.words(): + for _, word in block.words_iter(): words.append(word) elif isinstance(child_result, Paragraph): - for _, word in child_result.words(): + for _, word in child_result.words_iter(): words.append(word) return words diff --git a/pyWebLayout/layout/ereader_layout.py b/pyWebLayout/layout/ereader_layout.py index cf89dd0..3528a81 100644 --- a/pyWebLayout/layout/ereader_layout.py +++ b/pyWebLayout/layout/ereader_layout.py @@ -124,7 +124,7 @@ class ChapterNavigator: def _extract_heading_text(self, heading: Heading) -> str: """Extract text content from a heading block""" words = [] - for word in heading.words(): + for position, word in heading.words_iter(): if isinstance(word, Word): words.append(word.text) return " ".join(words) @@ -288,7 +288,7 @@ class BidirectionalLayouter: # In practice, we'd need to handle each block type appropriately if isinstance(block, Paragraph): scaled_block = Paragraph(FontScaler.scale_font(block.style, font_scale)) - for word in block.words(): + for word in block.words_iter(): if isinstance(word, Word): scaled_word = Word(word.text, FontScaler.scale_font(word.style, font_scale)) scaled_block.add_word(scaled_word) diff --git a/pyWebLayout/style/alignment.py b/pyWebLayout/style/alignment.py index c699b04..bac8dd7 100644 --- a/pyWebLayout/style/alignment.py +++ b/pyWebLayout/style/alignment.py @@ -7,11 +7,17 @@ This module provides alignment-related functionality. from enum import Enum class Alignment(Enum): - """Text alignment options""" + """Text and box alignment options""" + # Horizontal alignment LEFT = "left" RIGHT = "right" CENTER = "center" JUSTIFY = "justify" + + # Vertical alignment + TOP = "top" + MIDDLE = "middle" + BOTTOM = "bottom" def __str__(self): """Return the string value of the alignment.""" diff --git a/tests/abstract/test_abstract_blocks.py b/tests/abstract/test_abstract_blocks.py index b4f3f9f..40ea8f3 100644 --- a/tests/abstract/test_abstract_blocks.py +++ b/tests/abstract/test_abstract_blocks.py @@ -49,7 +49,7 @@ class TestBlockElements(unittest.TestCase): self.assertEqual(paragraph.word_count, 2) # Test word iteration - words = list(paragraph.words()) + words = list(paragraph.words_iter()) self.assertEqual(len(words), 2) self.assertEqual(words[0][1].text, "Hello") self.assertEqual(words[1][1].text, "World") diff --git a/tests/concrete/test_concrete_text.py b/tests/concrete/test_concrete_text.py index 0b4d2f6..18b55ea 100644 --- a/tests/concrete/test_concrete_text.py +++ b/tests/concrete/test_concrete_text.py @@ -254,7 +254,7 @@ class TestLine(unittest.TestCase): success, overflow_part = line.add_word(word) # If successful, the word should be added if overflow_part: - self.assertEqual(overflow_part.text , "A") + self.assertEqual(overflow_part.text , "AA") return self.assertFalse(True) diff --git a/tests/io_tests/test_epub_reader.py b/tests/io_tests/test_epub_reader.py index 3454d82..16bb8fc 100644 --- a/tests/io_tests/test_epub_reader.py +++ b/tests/io_tests/test_epub_reader.py @@ -395,7 +395,7 @@ class TestEPUBReader(unittest.TestCase): styled_words_found = False for block in chapter2_blocks: if isinstance(block, Paragraph): - words = list(block.words()) + words = list(block.words_iter()) for _, word in words: if (word.style.weight == FontWeight.BOLD or word.style.style == FontStyle.ITALIC or @@ -717,7 +717,7 @@ class TestEPUBIntegrationWithHTMLExtraction(unittest.TestCase): styled_content_found = False for block in blocks: if isinstance(block, Paragraph): - words = list(block.words()) + words = list(block.words_iter()) for _, word in words: if (word.style.weight == FontWeight.BOLD or word.style.style == FontStyle.ITALIC or @@ -738,7 +738,7 @@ class TestEPUBIntegrationWithHTMLExtraction(unittest.TestCase): for block in blocks: if isinstance(block, (Paragraph, Table)): if isinstance(block, Paragraph): - words = list(block.words()) + words = list(block.words_iter()) for _, word in words: if word.style.colour == (255, 0, 0): # Red red_text_found = True diff --git a/tests/io_tests/test_html_extraction.py b/tests/io_tests/test_html_extraction.py index b9afb3a..03151e1 100644 --- a/tests/io_tests/test_html_extraction.py +++ b/tests/io_tests/test_html_extraction.py @@ -21,7 +21,7 @@ class TestHTMLParagraph(unittest.TestCase): self.assertEqual(len(paragraphs), 1) self.assertEqual(len(paragraphs[0]), 4) - for w1, t1 in zip(paragraphs[0].words(), "This is a paragraph.".split(" ")): + for w1, t1 in zip(paragraphs[0].words_iter(), "This is a paragraph.".split(" ")): self.assertEqual(w1[1].text, t1) def test_multiple(self): @@ -31,10 +31,10 @@ class TestHTMLParagraph(unittest.TestCase): self.assertEqual(len(paragraphs[0]), 4) self.assertEqual(len(paragraphs[1]), 4) - for w1, t1 in zip(paragraphs[0].words(), "This is a paragraph.".split(" ")): + for w1, t1 in zip(paragraphs[0].words_iter(), "This is a paragraph.".split(" ")): self.assertEqual(w1[1].text, t1) - for w1, t1 in zip(paragraphs[1].words(), "This is another paragraph.".split(" ")): + for w1, t1 in zip(paragraphs[1].words_iter(), "This is another paragraph.".split(" ")): self.assertEqual(w1[1].text, t1) @@ -48,7 +48,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase): self.assertEqual(len(blocks), 1) self.assertIsInstance(blocks[0], Paragraph) - words = list(blocks[0].words()) + words = list(blocks[0].words_iter()) self.assertEqual(len(words), 7) # "This is bold text in a paragraph." # Check that 'bold' and 'text' words have bold font weight @@ -71,7 +71,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase): self.assertEqual(len(blocks), 1) self.assertIsInstance(blocks[0], Paragraph) - words = list(blocks[0].words()) + words = list(blocks[0].words_iter()) # Check that 'italic' and 'text' words have italic font style italic_word = words[2][1] # 'italic' @@ -87,7 +87,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase): blocks = parse_html_string(text) self.assertEqual(len(blocks), 1) - words = list(blocks[0].words()) + words = list(blocks[0].words_iter()) underlined_word = words[2][1] # 'underlined' self.assertEqual(underlined_word.style.decoration, TextDecoration.UNDERLINE) @@ -97,7 +97,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase): blocks = parse_html_string(text) self.assertEqual(len(blocks), 1) - words = list(blocks[0].words()) + words = list(blocks[0].words_iter()) strike_word = words[2][1] # 'strikethrough' self.assertEqual(strike_word.style.decoration, TextDecoration.STRIKETHROUGH) @@ -108,7 +108,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase): self.assertEqual(len(blocks), 1) self.assertIsInstance(blocks[0], Paragraph) - words = list(blocks[0].words()) + words = list(blocks[0].words_iter()) # Find the styled words styled_words = [] @@ -130,7 +130,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase): self.assertEqual(len(blocks), 1) self.assertIsInstance(blocks[0], Paragraph) - words = list(blocks[0].words()) + words = list(blocks[0].words_iter()) # Check for bold word bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD] @@ -150,7 +150,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase): blocks = parse_html_string(text) self.assertEqual(len(blocks), 1) - words = list(blocks[0].words()) + words = list(blocks[0].words_iter()) # Find words that should be both bold and italic bold_italic_words = [w for _, w in words @@ -163,7 +163,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase): blocks = parse_html_string(text) self.assertEqual(len(blocks), 1) - words = list(blocks[0].words()) + words = list(blocks[0].words_iter()) # Check for hex red color hex_red_words = [w for _, w in words if w.style.colour == (255, 0, 0)] @@ -206,7 +206,7 @@ class TestHTMLBlockElements(unittest.TestCase): self.assertIsInstance(block, Heading) self.assertEqual(block.level, expected_levels[i]) - words = list(block.words()) + words = list(block.words_iter()) self.assertEqual(len(words), 2) # "Heading" and number self.assertEqual(words[0][1].text, "Heading") @@ -264,7 +264,7 @@ class TestHTMLBlockElements(unittest.TestCase): # Check second item has bold text second_item_blocks = list(items[1].blocks()) if second_item_blocks: - words = list(second_item_blocks[0].words()) + words = list(second_item_blocks[0].words_iter()) bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD] self.assertGreater(len(bold_words), 0) @@ -470,7 +470,7 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase): # Extract all words from the paragraph paragraph = blocks[0] - words = list(paragraph.words()) + words = list(paragraph.words_iter()) # Find words with different styles normal_words = [w for _, w in words if w.style.weight == FontWeight.NORMAL @@ -545,7 +545,7 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase): # Should create styles for different style combinations paragraph = blocks[0] - words = list(paragraph.words()) + words = list(paragraph.words_iter()) # Find words that are both bold and italic bold_italic_words = [w for _, w in words diff --git a/tests/io_tests/test_html_extraction_functions.py b/tests/io_tests/test_html_extraction_functions.py index 2147497..34e4363 100644 --- a/tests/io_tests/test_html_extraction_functions.py +++ b/tests/io_tests/test_html_extraction_functions.py @@ -7,8 +7,7 @@ reusing test patterns from test_html_extraction.py that are known to pass. import unittest from bs4 import BeautifulSoup, Tag -from pyWebLayout.io.rea -ders.html_extraction import ( +from pyWebLayout.io.readers.html_extraction import ( create_base_context, apply_element_styling, parse_inline_styles, @@ -245,7 +244,7 @@ class TestHandlerFunctions(unittest.TestCase): # Should match original test expectations self.assertEqual(len(result), 4) # 4 words - words = list(result.words()) + words = list(result.words_iter()) expected_texts = ["This", "is", "a", "paragraph."] for i, expected_text in enumerate(expected_texts): self.assertEqual(words[i][1].text, expected_text) @@ -267,7 +266,7 @@ class TestHandlerFunctions(unittest.TestCase): self.assertEqual(result.level, expected_level) # Should match original test word expectations - words = list(result.words()) + words = list(result.words_iter()) self.assertEqual(len(words), 2) # "Heading" and number self.assertEqual(words[0][1].text, "Heading") @@ -450,7 +449,7 @@ class TestStyledContentHandling(unittest.TestCase): result = paragraph_handler(element, self.base_context) self.assertIsInstance(result, Paragraph) - words = list(result.words()) + words = list(result.words_iter()) self.assertEqual(len(words), 7) # From original test expectation # Check that 'bold' and 'text' words have bold font weight (from original test) @@ -475,7 +474,7 @@ class TestStyledContentHandling(unittest.TestCase): result = paragraph_handler(element, self.base_context) self.assertIsInstance(result, Paragraph) - words = list(result.words()) + words = list(result.words_iter()) # Check for bold word (from original test pattern) bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD] diff --git a/tests/layouter/test_document_layouter.py b/tests/layouter/test_document_layouter.py index 4c9cfc8..2df910d 100644 --- a/tests/layouter/test_document_layouter.py +++ b/tests/layouter/test_document_layouter.py @@ -50,7 +50,21 @@ class TestDocumentLayouter: self.mock_concrete_style.word_spacing_min = 2.0 self.mock_concrete_style.word_spacing_max = 8.0 self.mock_concrete_style.text_align = "left" + + # Create mock font that returns proper metrics + mock_font = Mock() + mock_font.getmetrics.return_value = (12, 4) # (ascent, descent) + mock_font.font = mock_font # For accessing .font property + self.mock_concrete_style.create_font = Mock() + + # Update mock words to have proper style with font + for word in self.mock_words: + word.style = Mock() + word.style.font = mock_font + word.style.font_size = 16 + word.style.colour = (0, 0, 0) + word.style.background = None @patch('pyWebLayout.layout.document_layouter.ConcreteStyleRegistry') @patch('pyWebLayout.layout.document_layouter.Line') diff --git a/tests/style/test_html_style.py b/tests/style/test_html_style.py index 6537146..8e0fac7 100644 --- a/tests/style/test_html_style.py +++ b/tests/style/test_html_style.py @@ -42,12 +42,12 @@ class TestStyleObjects(unittest.TestCase): def test_alignment_enum(self): """Test Alignment enum values.""" - self.assertEqual(Alignment.LEFT.value, 1) - self.assertEqual(Alignment.CENTER.value, 2) - self.assertEqual(Alignment.RIGHT.value, 3) - self.assertEqual(Alignment.TOP.value, 4) - self.assertEqual(Alignment.BOTTOM.value, 5) - self.assertEqual(Alignment.JUSTIFY.value, 6) + self.assertEqual(Alignment.LEFT.value, "left") + self.assertEqual(Alignment.CENTER.value, "center") + self.assertEqual(Alignment.RIGHT.value, "right") + self.assertEqual(Alignment.TOP.value, "top") + self.assertEqual(Alignment.BOTTOM.value, "bottom") + self.assertEqual(Alignment.JUSTIFY.value, "justify") def test_font_initialization_defaults(self): """Test Font initialization with default values."""