This commit is contained in:
parent
65ab46556f
commit
718027f3c8
@ -317,7 +317,7 @@ class Document:
|
||||
for heading in headings:
|
||||
# Extract text from the heading
|
||||
title = ""
|
||||
for _, word in heading.words():
|
||||
for _, word in heading.words_iter():
|
||||
title += word.text + " "
|
||||
title = title.strip()
|
||||
|
||||
|
||||
@ -366,7 +366,7 @@ class Line(Box):
|
||||
spacing_length = self._spacing[0] * (len(self._text_objects) - 1)
|
||||
remaining=self._size[0] - word_length - spacing_length
|
||||
fraction = remaining / text.width
|
||||
spliter = round(fraction*len(text.text)) # get the split index for best spacing
|
||||
spliter = max(1, round(fraction*len(word.text))) # get the split index for best spacing, use original word length
|
||||
split = [Text(word.text[:spliter]+"-", word.style, self._draw, line=self, source=word), Text(word.text[spliter:], word.style, self._draw, line=self, source=word)]
|
||||
self._text_objects.append(split[0])
|
||||
word.add_concete(split)
|
||||
|
||||
@ -401,10 +401,10 @@ def extract_text_content(element: Tag, context: StyleContext) -> List[Word]:
|
||||
if isinstance(child_result, list):
|
||||
for block in child_result:
|
||||
if isinstance(block, Paragraph):
|
||||
for _, word in block.words():
|
||||
for _, word in block.words_iter():
|
||||
words.append(word)
|
||||
elif isinstance(child_result, Paragraph):
|
||||
for _, word in child_result.words():
|
||||
for _, word in child_result.words_iter():
|
||||
words.append(word)
|
||||
|
||||
return words
|
||||
|
||||
@ -124,7 +124,7 @@ class ChapterNavigator:
|
||||
def _extract_heading_text(self, heading: Heading) -> str:
|
||||
"""Extract text content from a heading block"""
|
||||
words = []
|
||||
for word in heading.words():
|
||||
for position, word in heading.words_iter():
|
||||
if isinstance(word, Word):
|
||||
words.append(word.text)
|
||||
return " ".join(words)
|
||||
@ -288,7 +288,7 @@ class BidirectionalLayouter:
|
||||
# In practice, we'd need to handle each block type appropriately
|
||||
if isinstance(block, Paragraph):
|
||||
scaled_block = Paragraph(FontScaler.scale_font(block.style, font_scale))
|
||||
for word in block.words():
|
||||
for word in block.words_iter():
|
||||
if isinstance(word, Word):
|
||||
scaled_word = Word(word.text, FontScaler.scale_font(word.style, font_scale))
|
||||
scaled_block.add_word(scaled_word)
|
||||
|
||||
@ -7,11 +7,17 @@ This module provides alignment-related functionality.
|
||||
from enum import Enum
|
||||
|
||||
class Alignment(Enum):
|
||||
"""Text alignment options"""
|
||||
"""Text and box alignment options"""
|
||||
# Horizontal alignment
|
||||
LEFT = "left"
|
||||
RIGHT = "right"
|
||||
CENTER = "center"
|
||||
JUSTIFY = "justify"
|
||||
|
||||
# Vertical alignment
|
||||
TOP = "top"
|
||||
MIDDLE = "middle"
|
||||
BOTTOM = "bottom"
|
||||
|
||||
def __str__(self):
|
||||
"""Return the string value of the alignment."""
|
||||
|
||||
@ -49,7 +49,7 @@ class TestBlockElements(unittest.TestCase):
|
||||
self.assertEqual(paragraph.word_count, 2)
|
||||
|
||||
# Test word iteration
|
||||
words = list(paragraph.words())
|
||||
words = list(paragraph.words_iter())
|
||||
self.assertEqual(len(words), 2)
|
||||
self.assertEqual(words[0][1].text, "Hello")
|
||||
self.assertEqual(words[1][1].text, "World")
|
||||
|
||||
@ -254,7 +254,7 @@ class TestLine(unittest.TestCase):
|
||||
success, overflow_part = line.add_word(word)
|
||||
# If successful, the word should be added
|
||||
if overflow_part:
|
||||
self.assertEqual(overflow_part.text , "A")
|
||||
self.assertEqual(overflow_part.text , "AA")
|
||||
return
|
||||
|
||||
self.assertFalse(True)
|
||||
|
||||
@ -395,7 +395,7 @@ class TestEPUBReader(unittest.TestCase):
|
||||
styled_words_found = False
|
||||
for block in chapter2_blocks:
|
||||
if isinstance(block, Paragraph):
|
||||
words = list(block.words())
|
||||
words = list(block.words_iter())
|
||||
for _, word in words:
|
||||
if (word.style.weight == FontWeight.BOLD or
|
||||
word.style.style == FontStyle.ITALIC or
|
||||
@ -717,7 +717,7 @@ class TestEPUBIntegrationWithHTMLExtraction(unittest.TestCase):
|
||||
styled_content_found = False
|
||||
for block in blocks:
|
||||
if isinstance(block, Paragraph):
|
||||
words = list(block.words())
|
||||
words = list(block.words_iter())
|
||||
for _, word in words:
|
||||
if (word.style.weight == FontWeight.BOLD or
|
||||
word.style.style == FontStyle.ITALIC or
|
||||
@ -738,7 +738,7 @@ class TestEPUBIntegrationWithHTMLExtraction(unittest.TestCase):
|
||||
for block in blocks:
|
||||
if isinstance(block, (Paragraph, Table)):
|
||||
if isinstance(block, Paragraph):
|
||||
words = list(block.words())
|
||||
words = list(block.words_iter())
|
||||
for _, word in words:
|
||||
if word.style.colour == (255, 0, 0): # Red
|
||||
red_text_found = True
|
||||
|
||||
@ -21,7 +21,7 @@ class TestHTMLParagraph(unittest.TestCase):
|
||||
self.assertEqual(len(paragraphs), 1)
|
||||
self.assertEqual(len(paragraphs[0]), 4)
|
||||
|
||||
for w1, t1 in zip(paragraphs[0].words(), "This is a paragraph.".split(" ")):
|
||||
for w1, t1 in zip(paragraphs[0].words_iter(), "This is a paragraph.".split(" ")):
|
||||
self.assertEqual(w1[1].text, t1)
|
||||
|
||||
def test_multiple(self):
|
||||
@ -31,10 +31,10 @@ class TestHTMLParagraph(unittest.TestCase):
|
||||
self.assertEqual(len(paragraphs[0]), 4)
|
||||
self.assertEqual(len(paragraphs[1]), 4)
|
||||
|
||||
for w1, t1 in zip(paragraphs[0].words(), "This is a paragraph.".split(" ")):
|
||||
for w1, t1 in zip(paragraphs[0].words_iter(), "This is a paragraph.".split(" ")):
|
||||
self.assertEqual(w1[1].text, t1)
|
||||
|
||||
for w1, t1 in zip(paragraphs[1].words(), "This is another paragraph.".split(" ")):
|
||||
for w1, t1 in zip(paragraphs[1].words_iter(), "This is another paragraph.".split(" ")):
|
||||
self.assertEqual(w1[1].text, t1)
|
||||
|
||||
|
||||
@ -48,7 +48,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase):
|
||||
self.assertEqual(len(blocks), 1)
|
||||
self.assertIsInstance(blocks[0], Paragraph)
|
||||
|
||||
words = list(blocks[0].words())
|
||||
words = list(blocks[0].words_iter())
|
||||
self.assertEqual(len(words), 7) # "This is bold text in a paragraph."
|
||||
|
||||
# Check that 'bold' and 'text' words have bold font weight
|
||||
@ -71,7 +71,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase):
|
||||
self.assertEqual(len(blocks), 1)
|
||||
self.assertIsInstance(blocks[0], Paragraph)
|
||||
|
||||
words = list(blocks[0].words())
|
||||
words = list(blocks[0].words_iter())
|
||||
|
||||
# Check that 'italic' and 'text' words have italic font style
|
||||
italic_word = words[2][1] # 'italic'
|
||||
@ -87,7 +87,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase):
|
||||
blocks = parse_html_string(text)
|
||||
self.assertEqual(len(blocks), 1)
|
||||
|
||||
words = list(blocks[0].words())
|
||||
words = list(blocks[0].words_iter())
|
||||
underlined_word = words[2][1] # 'underlined'
|
||||
self.assertEqual(underlined_word.style.decoration, TextDecoration.UNDERLINE)
|
||||
|
||||
@ -97,7 +97,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase):
|
||||
blocks = parse_html_string(text)
|
||||
self.assertEqual(len(blocks), 1)
|
||||
|
||||
words = list(blocks[0].words())
|
||||
words = list(blocks[0].words_iter())
|
||||
strike_word = words[2][1] # 'strikethrough'
|
||||
self.assertEqual(strike_word.style.decoration, TextDecoration.STRIKETHROUGH)
|
||||
|
||||
@ -108,7 +108,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase):
|
||||
self.assertEqual(len(blocks), 1)
|
||||
self.assertIsInstance(blocks[0], Paragraph)
|
||||
|
||||
words = list(blocks[0].words())
|
||||
words = list(blocks[0].words_iter())
|
||||
|
||||
# Find the styled words
|
||||
styled_words = []
|
||||
@ -130,7 +130,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase):
|
||||
self.assertEqual(len(blocks), 1)
|
||||
self.assertIsInstance(blocks[0], Paragraph)
|
||||
|
||||
words = list(blocks[0].words())
|
||||
words = list(blocks[0].words_iter())
|
||||
|
||||
# Check for bold word
|
||||
bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD]
|
||||
@ -150,7 +150,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase):
|
||||
blocks = parse_html_string(text)
|
||||
self.assertEqual(len(blocks), 1)
|
||||
|
||||
words = list(blocks[0].words())
|
||||
words = list(blocks[0].words_iter())
|
||||
|
||||
# Find words that should be both bold and italic
|
||||
bold_italic_words = [w for _, w in words
|
||||
@ -163,7 +163,7 @@ class TestHTMLStyledParagraphs(unittest.TestCase):
|
||||
blocks = parse_html_string(text)
|
||||
self.assertEqual(len(blocks), 1)
|
||||
|
||||
words = list(blocks[0].words())
|
||||
words = list(blocks[0].words_iter())
|
||||
|
||||
# Check for hex red color
|
||||
hex_red_words = [w for _, w in words if w.style.colour == (255, 0, 0)]
|
||||
@ -206,7 +206,7 @@ class TestHTMLBlockElements(unittest.TestCase):
|
||||
self.assertIsInstance(block, Heading)
|
||||
self.assertEqual(block.level, expected_levels[i])
|
||||
|
||||
words = list(block.words())
|
||||
words = list(block.words_iter())
|
||||
self.assertEqual(len(words), 2) # "Heading" and number
|
||||
self.assertEqual(words[0][1].text, "Heading")
|
||||
|
||||
@ -264,7 +264,7 @@ class TestHTMLBlockElements(unittest.TestCase):
|
||||
# Check second item has bold text
|
||||
second_item_blocks = list(items[1].blocks())
|
||||
if second_item_blocks:
|
||||
words = list(second_item_blocks[0].words())
|
||||
words = list(second_item_blocks[0].words_iter())
|
||||
bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD]
|
||||
self.assertGreater(len(bold_words), 0)
|
||||
|
||||
@ -470,7 +470,7 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
|
||||
|
||||
# Extract all words from the paragraph
|
||||
paragraph = blocks[0]
|
||||
words = list(paragraph.words())
|
||||
words = list(paragraph.words_iter())
|
||||
|
||||
# Find words with different styles
|
||||
normal_words = [w for _, w in words if w.style.weight == FontWeight.NORMAL
|
||||
@ -545,7 +545,7 @@ class TestHTMLFontRegistryIntegration(unittest.TestCase):
|
||||
|
||||
# Should create styles for different style combinations
|
||||
paragraph = blocks[0]
|
||||
words = list(paragraph.words())
|
||||
words = list(paragraph.words_iter())
|
||||
|
||||
# Find words that are both bold and italic
|
||||
bold_italic_words = [w for _, w in words
|
||||
|
||||
@ -7,8 +7,7 @@ reusing test patterns from test_html_extraction.py that are known to pass.
|
||||
|
||||
import unittest
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from pyWebLayout.io.rea
|
||||
ders.html_extraction import (
|
||||
from pyWebLayout.io.readers.html_extraction import (
|
||||
create_base_context,
|
||||
apply_element_styling,
|
||||
parse_inline_styles,
|
||||
@ -245,7 +244,7 @@ class TestHandlerFunctions(unittest.TestCase):
|
||||
# Should match original test expectations
|
||||
self.assertEqual(len(result), 4) # 4 words
|
||||
|
||||
words = list(result.words())
|
||||
words = list(result.words_iter())
|
||||
expected_texts = ["This", "is", "a", "paragraph."]
|
||||
for i, expected_text in enumerate(expected_texts):
|
||||
self.assertEqual(words[i][1].text, expected_text)
|
||||
@ -267,7 +266,7 @@ class TestHandlerFunctions(unittest.TestCase):
|
||||
self.assertEqual(result.level, expected_level)
|
||||
|
||||
# Should match original test word expectations
|
||||
words = list(result.words())
|
||||
words = list(result.words_iter())
|
||||
self.assertEqual(len(words), 2) # "Heading" and number
|
||||
self.assertEqual(words[0][1].text, "Heading")
|
||||
|
||||
@ -450,7 +449,7 @@ class TestStyledContentHandling(unittest.TestCase):
|
||||
result = paragraph_handler(element, self.base_context)
|
||||
|
||||
self.assertIsInstance(result, Paragraph)
|
||||
words = list(result.words())
|
||||
words = list(result.words_iter())
|
||||
self.assertEqual(len(words), 7) # From original test expectation
|
||||
|
||||
# Check that 'bold' and 'text' words have bold font weight (from original test)
|
||||
@ -475,7 +474,7 @@ class TestStyledContentHandling(unittest.TestCase):
|
||||
result = paragraph_handler(element, self.base_context)
|
||||
|
||||
self.assertIsInstance(result, Paragraph)
|
||||
words = list(result.words())
|
||||
words = list(result.words_iter())
|
||||
|
||||
# Check for bold word (from original test pattern)
|
||||
bold_words = [w for _, w in words if w.style.weight == FontWeight.BOLD]
|
||||
|
||||
@ -50,7 +50,21 @@ class TestDocumentLayouter:
|
||||
self.mock_concrete_style.word_spacing_min = 2.0
|
||||
self.mock_concrete_style.word_spacing_max = 8.0
|
||||
self.mock_concrete_style.text_align = "left"
|
||||
|
||||
# Create mock font that returns proper metrics
|
||||
mock_font = Mock()
|
||||
mock_font.getmetrics.return_value = (12, 4) # (ascent, descent)
|
||||
mock_font.font = mock_font # For accessing .font property
|
||||
|
||||
self.mock_concrete_style.create_font = Mock()
|
||||
|
||||
# Update mock words to have proper style with font
|
||||
for word in self.mock_words:
|
||||
word.style = Mock()
|
||||
word.style.font = mock_font
|
||||
word.style.font_size = 16
|
||||
word.style.colour = (0, 0, 0)
|
||||
word.style.background = None
|
||||
|
||||
@patch('pyWebLayout.layout.document_layouter.ConcreteStyleRegistry')
|
||||
@patch('pyWebLayout.layout.document_layouter.Line')
|
||||
|
||||
@ -42,12 +42,12 @@ class TestStyleObjects(unittest.TestCase):
|
||||
|
||||
def test_alignment_enum(self):
|
||||
"""Test Alignment enum values."""
|
||||
self.assertEqual(Alignment.LEFT.value, 1)
|
||||
self.assertEqual(Alignment.CENTER.value, 2)
|
||||
self.assertEqual(Alignment.RIGHT.value, 3)
|
||||
self.assertEqual(Alignment.TOP.value, 4)
|
||||
self.assertEqual(Alignment.BOTTOM.value, 5)
|
||||
self.assertEqual(Alignment.JUSTIFY.value, 6)
|
||||
self.assertEqual(Alignment.LEFT.value, "left")
|
||||
self.assertEqual(Alignment.CENTER.value, "center")
|
||||
self.assertEqual(Alignment.RIGHT.value, "right")
|
||||
self.assertEqual(Alignment.TOP.value, "top")
|
||||
self.assertEqual(Alignment.BOTTOM.value, "bottom")
|
||||
self.assertEqual(Alignment.JUSTIFY.value, "justify")
|
||||
|
||||
def test_font_initialization_defaults(self):
|
||||
"""Test Font initialization with default values."""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user