Working version for ebook rendering!!

This commit is contained in:
Duncan Tourolle 2025-11-04 12:57:15 +01:00
parent fdb3023919
commit de18b1c2cc
8 changed files with 583 additions and 292 deletions

View File

@ -2,7 +2,7 @@
""" """
Simple EPUB page renderer tool. Simple EPUB page renderer tool.
This tool uses the pyWebLayout epub_reader and typesetting modules to: This tool uses the pyWebLayout epub_reader and layout modules to:
1. Load an EPUB file 1. Load an EPUB file
2. Render the first X pages according to command line arguments 2. Render the first X pages according to command line arguments
3. Save the pages as PNG images 3. Save the pages as PNG images
@ -15,17 +15,19 @@ import os
import sys import sys
import argparse import argparse
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional, List
# Add the parent directory to sys.path to import pyWebLayout # Add the parent directory to sys.path to import pyWebLayout
sys.path.insert(0, str(Path(__file__).parent.parent)) sys.path.insert(0, str(Path(__file__).parent.parent))
try: try:
from pyWebLayout.io.readers.epub_reader import read_epub from pyWebLayout.io.readers.epub_reader import read_epub
from pyWebLayout.layout.document_pagination import DocumentPaginator from pyWebLayout.layout.ereader_layout import BidirectionalLayouter, RenderingPosition
from pyWebLayout.layout.document_layouter import paragraph_layouter
from pyWebLayout.concrete.page import Page from pyWebLayout.concrete.page import Page
from pyWebLayout.style.page_style import PageStyle
from pyWebLayout.style.fonts import Font from pyWebLayout.style.fonts import Font
from pyWebLayout.style.layout import Alignment from pyWebLayout.abstract.block import Block
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
except ImportError as e: except ImportError as e:
print(f"Error importing required modules: {e}") print(f"Error importing required modules: {e}")
@ -50,14 +52,14 @@ def render_page_to_image(page: Page) -> Image.Image:
return rendered_image return rendered_image
else: else:
# If render() doesn't return a PIL Image, create error image # If render() doesn't return a PIL Image, create error image
error_image = Image.new('RGB', page._size, 'white') error_image = Image.new('RGB', page.size, 'white')
draw = ImageDraw.Draw(error_image) draw = ImageDraw.Draw(error_image)
draw.text((20, 20), "Error: Page.render() did not return PIL Image", fill='red') draw.text((20, 20), "Error: Page.render() did not return PIL Image", fill='red')
return error_image return error_image
except Exception as e: except Exception as e:
# Create error image if rendering fails # Create error image if rendering fails
error_image = Image.new('RGB', page._size, 'white') error_image = Image.new('RGB', page.size, 'white')
draw = ImageDraw.Draw(error_image) draw = ImageDraw.Draw(error_image)
draw.text((20, 20), f"Rendering error: {str(e)}", fill='red') draw.text((20, 20), f"Rendering error: {str(e)}", fill='red')
print(f"Warning: Error rendering page: {e}") print(f"Warning: Error rendering page: {e}")
@ -85,19 +87,25 @@ def extract_text_from_page(page: Page) -> str:
# Import abstract block types # Import abstract block types
from pyWebLayout.abstract.block import Paragraph, Heading, HList, Table, Image as AbstractImage from pyWebLayout.abstract.block import Paragraph, Heading, HList, Table, Image as AbstractImage
from pyWebLayout.concrete.text import Line
# Handle abstract block objects first # Handle Line objects (concrete)
if isinstance(element, Paragraph): if isinstance(element, Line):
# Extract text from paragraph line_text = []
if hasattr(element, '_text_objects') and element._text_objects:
for text_obj in element._text_objects:
if hasattr(text_obj, 'text'):
line_text.append(str(text_obj.text))
if line_text:
text_lines.append(f"{indent}{' '.join(line_text)}")
# Handle abstract block objects
elif isinstance(element, (Paragraph, Heading)):
# Extract text from paragraph/heading
paragraph_text = extract_text_from_paragraph(element) paragraph_text = extract_text_from_paragraph(element)
if paragraph_text: if paragraph_text:
text_lines.append(f"{indent}PARAGRAPH: {paragraph_text}") block_type = "HEADING" if isinstance(element, Heading) else "PARAGRAPH"
text_lines.append(f"{indent}{block_type}: {paragraph_text}")
elif isinstance(element, Heading):
# Extract text from heading
heading_text = extract_text_from_paragraph(element)
if heading_text:
text_lines.append(f"{indent}HEADING: {heading_text}")
elif isinstance(element, HList): elif isinstance(element, HList):
text_lines.append(f"{indent}LIST:") text_lines.append(f"{indent}LIST:")
@ -115,7 +123,7 @@ def extract_text_from_page(page: Page) -> str:
elif isinstance(element, AbstractImage): elif isinstance(element, AbstractImage):
alt_text = getattr(element, 'alt_text', '') alt_text = getattr(element, 'alt_text', '')
src = getattr(element, 'src', 'Unknown') src = getattr(element, 'source', 'Unknown')
text_lines.append(f"{indent}[IMAGE: {alt_text or src}]") text_lines.append(f"{indent}[IMAGE: {alt_text or src}]")
# Handle containers with children # Handle containers with children
@ -129,15 +137,6 @@ def extract_text_from_page(page: Page) -> str:
if text: if text:
text_lines.append(f"{indent}{text}") text_lines.append(f"{indent}{text}")
# Handle lines with text objects
elif hasattr(element, '_text_objects') and element._text_objects:
line_text = []
for text_obj in element._text_objects:
if hasattr(text_obj, 'text'):
line_text.append(str(text_obj.text))
if line_text:
text_lines.append(f"{indent}{' '.join(line_text)}")
# Handle other object types by showing their class name # Handle other object types by showing their class name
else: else:
class_name = element.__class__.__name__ class_name = element.__class__.__name__
@ -148,8 +147,8 @@ def extract_text_from_page(page: Page) -> str:
words = [] words = []
try: try:
# Try to get words from the paragraph # Try to get words from the paragraph
if hasattr(para_obj, 'words') and callable(para_obj.words): if hasattr(para_obj, 'words_iter') and callable(para_obj.words_iter):
for _, word in para_obj.words(): for _, word in para_obj.words_iter():
if hasattr(word, 'text'): if hasattr(word, 'text'):
words.append(word.text) words.append(word.text)
else: else:
@ -183,6 +182,27 @@ def extract_text_from_page(page: Page) -> str:
return "\n".join(text_lines) return "\n".join(text_lines)
def get_all_blocks_from_book(book) -> List[Block]:
"""
Extract all blocks from all chapters in the book.
Args:
book: The Book object from epub_reader
Returns:
List of all Block objects
"""
all_blocks = []
# Iterate through all chapters
for chapter in book.chapters:
# Get blocks from the chapter
if hasattr(chapter, '_blocks'):
all_blocks.extend(chapter._blocks)
return all_blocks
def main(): def main():
"""Main function to handle command line arguments and process the EPUB.""" """Main function to handle command line arguments and process the EPUB."""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
@ -234,6 +254,13 @@ Examples:
help='Page margin in pixels (default: 40)' help='Page margin in pixels (default: 40)'
) )
parser.add_argument(
'--align', '-a',
choices=['left', 'justify'],
default='left',
help='Text alignment: left or justify (default: left)'
)
args = parser.parse_args() args = parser.parse_args()
# Validate arguments # Validate arguments
@ -268,42 +295,100 @@ Examples:
except Exception as e: except Exception as e:
print(f"Error loading EPUB file: {e}") print(f"Error loading EPUB file: {e}")
import traceback
traceback.print_exc()
return 1 return 1
# Set up pagination # Extract all blocks from the book
page_size = (args.width, args.height) print("Extracting content blocks...")
margins = (args.margin, args.margin, args.margin, args.margin) # top, right, bottom, left try:
all_blocks = get_all_blocks_from_book(book)
print(f"Extracted {len(all_blocks)} content blocks")
print(f"Setting up pagination with page size {page_size} and margins {margins}") if not all_blocks:
print("No content blocks found in EPUB. The book might be empty.")
return 1
# Apply alignment setting to all paragraphs and headings
from pyWebLayout.abstract.block import Paragraph, Heading
from pyWebLayout.style.alignment import Alignment
alignment = Alignment.JUSTIFY if args.align == 'justify' else Alignment.LEFT
print(f"Applying {args.align} alignment to all text blocks...")
# Note: We'll pass alignment to the layouter which will handle it during rendering
# The alignment is applied at the Line level in paragraph_layouter
except Exception as e:
print(f"Error extracting blocks: {e}")
import traceback
traceback.print_exc()
return 1
# Set up page style and layouter
page_size = (args.width, args.height)
page_style = PageStyle(
background_color=(255, 255, 255),
border_width=args.margin,
border_color=(200, 200, 200),
padding=(10, 10, 10, 10), # top, right, bottom, left
line_spacing=5,
inter_block_spacing=15
)
print(f"Setting up layouter with page size {page_size} and {args.align} alignment")
try: try:
paginator = DocumentPaginator( layouter = BidirectionalLayouter(
document=book, blocks=all_blocks,
page_style=page_style,
page_size=page_size, page_size=page_size,
margins=margins, alignment_override=alignment
spacing=5,
halign=Alignment.LEFT
) )
except Exception as e: except Exception as e:
print(f"Error setting up paginator: {e}") print(f"Error setting up layouter: {e}")
import traceback
traceback.print_exc()
return 1 return 1
# Render pages # Render pages
print(f"Rendering {args.pages} pages...") print(f"Rendering up to {args.pages} pages...")
try: try:
# Generate pages pages = []
pages = paginator.paginate(max_pages=args.pages) current_position = RenderingPosition() # Start from beginning
for page_num in range(args.pages):
print(f"Rendering page {page_num + 1}/{args.pages}...")
try:
# Render the page
page, next_position = layouter.render_page_forward(current_position)
pages.append(page)
# Check if we've reached the end of the document
if next_position.block_index >= len(all_blocks):
print(f"Reached end of document after {page_num + 1} pages")
break
# Update position for next page
current_position = next_position
except Exception as e:
print(f"Error rendering page {page_num + 1}: {e}")
import traceback
traceback.print_exc()
break
if not pages: if not pages:
print("No pages were generated. The book might be empty or there might be an issue with pagination.") print("No pages were generated.")
return 1 return 1
print(f"Generated {len(pages)} pages") print(f"Generated {len(pages)} pages")
# Render each page to an image and extract text # Save each page to an image and extract text
for i, page in enumerate(pages): for i, page in enumerate(pages):
print(f"Rendering page {i + 1}/{len(pages)}...") print(f"Saving page {i + 1}/{len(pages)}...")
try: try:
# Create image from page using pyWebLayout's built-in rendering # Create image from page using pyWebLayout's built-in rendering
@ -324,18 +409,22 @@ Examples:
print(f"Saved: {output_path} and {text_path}") print(f"Saved: {output_path} and {text_path}")
except Exception as e: except Exception as e:
print(f"Error rendering page {i + 1}: {e}") print(f"Error saving page {i + 1}: {e}")
import traceback
traceback.print_exc()
continue continue
print(f"\nCompleted! Rendered {len(pages)} pages to {args.output_dir}") print(f"\nCompleted! Rendered {len(pages)} pages to {args.output_dir}")
# Show pagination progress # Calculate progress through the book
if hasattr(paginator, 'get_progress'): if len(all_blocks) > 0:
progress = paginator.get_progress() * 100 progress = (current_position.block_index / len(all_blocks)) * 100
print(f"Progress through book: {progress:.1f}%") print(f"Progress through book: {progress:.1f}%")
except Exception as e: except Exception as e:
print(f"Error during pagination/rendering: {e}") print(f"Error during pagination/rendering: {e}")
import traceback
traceback.print_exc()
return 1 return 1
return 0 return 0

View File

@ -27,16 +27,41 @@ class Page(Renderable, Queriable):
self._children: List[Renderable] = [] self._children: List[Renderable] = []
self._canvas: Optional[Image.Image] = None self._canvas: Optional[Image.Image] = None
self._draw: Optional[ImageDraw.Draw] = None self._draw: Optional[ImageDraw.Draw] = None
self._current_y_offset = 0 # Track vertical position for layout # Initialize y_offset to start of content area
# Position the first line so its baseline is close to the top boundary
# For subsequent lines, baseline-to-baseline spacing is used
self._current_y_offset = self._style.border_width + self._style.padding_top
self._is_first_line = True # Track if we're placing the first line
def free_space(self) -> Tuple[int, int]: def free_space(self) -> Tuple[int, int]:
"""Get the remaining space on the page""" """Get the remaining space on the page"""
return (self._size[0], self._size[1] - self._current_y_offset) return (self._size[0], self._size[1] - self._current_y_offset)
def can_fit_line(self, line_height: int) -> bool: def can_fit_line(self, baseline_spacing: int, ascent: int = 0, descent: int = 0) -> bool:
"""Check if a line of the given height can fit on the page.""" """
remaining_height = self.content_size[1] - (self._current_y_offset - self._style.border_width - self._style.padding_top) Check if a line with the given metrics can fit on the page.
return remaining_height >= line_height
Args:
baseline_spacing: Distance from current position to next baseline
ascent: Font ascent (height above baseline), defaults to 0 for backward compat
descent: Font descent (height below baseline), defaults to 0 for backward compat
Returns:
True if the line fits within page boundaries
"""
# Calculate the maximum Y position allowed (bottom boundary)
max_y = self._size[1] - self._style.border_width - self._style.padding_bottom
# If ascent/descent not provided, use simple check (backward compatibility)
if ascent == 0 and descent == 0:
return (self._current_y_offset + baseline_spacing) <= max_y
# Calculate where the bottom of the text would be
# Text bottom = current_y_offset + ascent + descent
text_bottom = self._current_y_offset + ascent + descent
# Check if text bottom would exceed the boundary
return text_bottom <= max_y
@property @property
def size(self) -> Tuple[int, int]: def size(self) -> Tuple[int, int]:
@ -122,7 +147,8 @@ class Page(Renderable, Queriable):
""" """
self._children.clear() self._children.clear()
self._canvas = None self._canvas = None
self._current_y_offset = 0 # Reset y_offset to start of content area (after border and padding)
self._current_y_offset = self._style.border_width + self._style.padding_top
return self return self
@property @property

View File

@ -53,7 +53,6 @@ class LeftAlignmentHandler(AlignmentHandler):
Returns: Returns:
Tuple[int, int, bool]: Spacing, start position, and overflow flag. Tuple[int, int, bool]: Spacing, start position, and overflow flag.
""" """
print("LeftAlignmentHandler:")
# Handle single word case # Handle single word case
if len(text_objects) <= 1: if len(text_objects) <= 1:
return 0, 0, False return 0, 0, False
@ -76,7 +75,6 @@ class LeftAlignmentHandler(AlignmentHandler):
# Calculate ideal spacing # Calculate ideal spacing
actual_spacing = residual_space // num_gaps actual_spacing = residual_space // num_gaps
print(actual_spacing)
# Clamp within bounds (CREngine pattern: respect max_spacing) # Clamp within bounds (CREngine pattern: respect max_spacing)
if actual_spacing > max_spacing: if actual_spacing > max_spacing:
return max_spacing, 0, False return max_spacing, 0, False
@ -108,7 +106,6 @@ class CenterRightAlignmentHandler(AlignmentHandler):
return 0, max(0, start_position), False return 0, max(0, start_position), False
actual_spacing = residual_space // (len(text_objects)-1) actual_spacing = residual_space // (len(text_objects)-1)
print(actual_spacing)
ideal_space = (min_spacing + max_spacing)/2 ideal_space = (min_spacing + max_spacing)/2
if actual_spacing > 0.5*(min_spacing + max_spacing): if actual_spacing > 0.5*(min_spacing + max_spacing):
actual_spacing = 0.5*(min_spacing + max_spacing) actual_spacing = 0.5*(min_spacing + max_spacing)
@ -138,7 +135,6 @@ class JustifyAlignmentHandler(AlignmentHandler):
actual_spacing = residual_space // num_gaps actual_spacing = residual_space // num_gaps
ideal_space = (min_spacing + max_spacing)//2 ideal_space = (min_spacing + max_spacing)//2
print(actual_spacing)
# can we touch the end? # can we touch the end?
if actual_spacing < max_spacing: if actual_spacing < max_spacing:
if actual_spacing < min_spacing: if actual_spacing < min_spacing:
@ -228,24 +224,58 @@ class Text(Renderable, Queriable):
"""Add this text to a line""" """Add this text to a line"""
self._line = line self._line = line
def _apply_decoration(self): def _apply_decoration(self, next_text: Optional['Text'] = None, spacing: int = 0):
"""Apply text decoration (underline or strikethrough)""" """
Apply text decoration (underline or strikethrough).
Args:
next_text: The next Text object in the line (if any)
spacing: The spacing to the next text object
"""
if self._style.decoration == TextDecoration.UNDERLINE: if self._style.decoration == TextDecoration.UNDERLINE:
# Draw underline at about 90% of the height # Draw underline at about 90% of the height
y_position = self._origin[1] - 0.1*self._style.font_size y_position = self._origin[1] - 0.1*self._style.font_size
self._draw.line([(0, y_position), (self._width, y_position)], line_width = max(1, int(self._style.font_size / 15))
fill=self._style.colour, width=max(1, int(self._style.font_size / 15)))
# Determine end x-coordinate
end_x = self._origin[0] + self._width
# If next text also has underline decoration, extend to connect them
if (next_text is not None and
next_text.style.decoration == TextDecoration.UNDERLINE and
next_text.style.colour == self._style.colour):
# Extend the underline through the spacing to connect with next word
end_x += spacing
self._draw.line([(self._origin[0], y_position), (end_x, y_position)],
fill=self._style.colour, width=line_width)
elif self._style.decoration == TextDecoration.STRIKETHROUGH: elif self._style.decoration == TextDecoration.STRIKETHROUGH:
# Draw strikethrough at about 50% of the height # Draw strikethrough at about 50% of the height
y_position = self._origin[1] + self._middle_y y_position = self._origin[1] + self._middle_y
self._draw.line([(0, y_position), (self._width, y_position)], line_width = max(1, int(self._style.font_size / 15))
fill=self._style.colour, width=max(1, int(self._style.font_size / 15)))
def render(self): # Determine end x-coordinate
end_x = self._origin[0] + self._width
# If next text also has strikethrough decoration, extend to connect them
if (next_text is not None and
next_text.style.decoration == TextDecoration.STRIKETHROUGH and
next_text.style.colour == self._style.colour):
# Extend the strikethrough through the spacing to connect with next word
end_x += spacing
self._draw.line([(self._origin[0], y_position), (end_x, y_position)],
fill=self._style.colour, width=line_width)
def render(self, next_text: Optional['Text'] = None, spacing: int = 0):
""" """
Render the text to an image. Render the text to an image.
Args:
next_text: The next Text object in the line (if any)
spacing: The spacing to the next text object
Returns: Returns:
A PIL Image containing the rendered text A PIL Image containing the rendered text
""" """
@ -254,11 +284,12 @@ class Text(Renderable, Queriable):
if self._style.background and self._style.background[3] > 0: # If alpha > 0 if self._style.background and self._style.background[3] > 0: # If alpha > 0
self._draw.rectangle([self._origin, self._origin+self._size], fill=self._style.background) self._draw.rectangle([self._origin, self._origin+self._size], fill=self._style.background)
# Draw the text using calculated offsets to prevent cropping # Draw the text using baseline as anchor point ("ls" = left-baseline)
self._draw.text((self.origin[0], self._origin[1]), self._text, font=self._style.font, fill=self._style.colour) # This ensures the origin represents the baseline, not the top-left
self._draw.text((self.origin[0], self._origin[1]), self._text, font=self._style.font, fill=self._style.colour, anchor="ls")
# Apply any text decorations # Apply any text decorations with knowledge of next text
self._apply_decoration() self._apply_decoration(next_text, spacing)
class Line(Box): class Line(Box):
""" """
@ -268,7 +299,10 @@ class Line(Box):
def __init__(self, spacing: Tuple[int, int], origin, size, draw: ImageDraw.Draw,font: Optional[Font] = None, def __init__(self, spacing: Tuple[int, int], origin, size, draw: ImageDraw.Draw,font: Optional[Font] = None,
callback=None, sheet=None, mode=None, halign=Alignment.CENTER, callback=None, sheet=None, mode=None, halign=Alignment.CENTER,
valign=Alignment.CENTER, previous = None): valign=Alignment.CENTER, previous = None,
min_word_length_for_brute_force: int = 8,
min_chars_before_hyphen: int = 2,
min_chars_after_hyphen: int = 2):
""" """
Initialize a new line. Initialize a new line.
@ -283,6 +317,9 @@ class Line(Box):
halign: Horizontal alignment of text within the line halign: Horizontal alignment of text within the line
valign: Vertical alignment of text within the line valign: Vertical alignment of text within the line
previous: Reference to the previous line previous: Reference to the previous line
min_word_length_for_brute_force: Minimum word length to attempt brute force hyphenation (default: 8)
min_chars_before_hyphen: Minimum characters before hyphen in any split (default: 2)
min_chars_after_hyphen: Minimum characters after hyphen in any split (default: 2)
""" """
super().__init__(origin, size, callback, sheet, mode, halign, valign) super().__init__(origin, size, callback, sheet, mode, halign, valign)
self._text_objects: List['Text'] = [] # Store Text objects directly self._text_objects: List['Text'] = [] # Store Text objects directly
@ -292,12 +329,18 @@ class Line(Box):
self._words : List['Word'] = [] self._words : List['Word'] = []
self._previous = previous self._previous = previous
self._next = None self._next = None
ascent,descent = self._font.font.getmetrics() ascent, descent = self._font.font.getmetrics()
self._baseline = self._origin[1] - ascent # Store baseline as offset from line origin (top), not absolute position
self._baseline = ascent
self._draw = draw self._draw = draw
self._spacing_render = (spacing[0] + spacing[1]) //2 self._spacing_render = (spacing[0] + spacing[1]) //2
self._position_render = 0 self._position_render = 0
# Hyphenation configuration parameters
self._min_word_length_for_brute_force = min_word_length_for_brute_force
self._min_chars_before_hyphen = min_chars_before_hyphen
self._min_chars_after_hyphen = min_chars_after_hyphen
# Create the appropriate alignment handler # Create the appropriate alignment handler
self._alignment_handler = self._create_alignment_handler(halign) self._alignment_handler = self._create_alignment_handler(halign)
@ -311,7 +354,6 @@ class Line(Box):
Returns: Returns:
The appropriate alignment handler instance The appropriate alignment handler instance
""" """
print("HALGIN!!!!!", alignment)
if alignment == Alignment.LEFT: if alignment == Alignment.LEFT:
return LeftAlignmentHandler() return LeftAlignmentHandler()
elif alignment == Alignment.JUSTIFY: elif alignment == Alignment.JUSTIFY:
@ -333,75 +375,141 @@ class Line(Box):
Add a word to this line using intelligent word fitting strategies. Add a word to this line using intelligent word fitting strategies.
Args: Args:
text: The text content of the word word: The word to add to the line
font: The font to use for this word, or None to use the line's default font part: Optional pretext from a previous hyphenated word
Returns: Returns:
True if the word was successfully added, False if it couldn't fit, in case of hypenation the hyphenated part is returned Tuple of (success, overflow_text):
- success: True if word/part was added, False if it couldn't fit
- overflow_text: Remaining text if word was hyphenated, None otherwise
""" """
# First, add any pretext from previous hyphenation
if part is not None: if part is not None:
self._text_objects.append(part) self._text_objects.append(part)
self._words.append(word) self._words.append(word)
part.add_line(self) part.add_line(self)
# Try to add the full word
text = Text.from_word(word, self._draw) text = Text.from_word(word, self._draw)
self._text_objects.append(text) self._text_objects.append(text)
spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(self._text_objects, self._size[0],self._spacing[0], self._spacing[1]) spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(
print(self._alignment_handler) self._text_objects, self._size[0], self._spacing[0], self._spacing[1]
)
if not overflow: if not overflow:
# Word fits! Add it completely
self._words.append(word) self._words.append(word)
word.add_concete(text) word.add_concete(text)
text.add_line(self) text.add_line(self)
self._position_render = position self._position_render = position
self._spacing_render = spacing self._spacing_render = spacing
return True, None # no overflow word is just added! return True, None
_=self._text_objects.pop() # Word doesn't fit, remove it and try hyphenation
splits = [(Text(pair[0]+"-", word.style,self._draw, line=self, source=word), Text( pair[1], word.style, self._draw, line=self, source=word)) for pair in word.possible_hyphenation()] _ = self._text_objects.pop()
#worst case scenario! # Step 1: Try pyphen hyphenation
if len(splits)==0 and len(word.text)>=6: pyphen_splits = word.possible_hyphenation()
text = Text(word.text+"-", word.style, self._draw) # add hypen to know true length valid_splits = []
word_length = sum([text.width for text in self._text_objects])
spacing_length = self._spacing[0] * (len(self._text_objects) - 1) if pyphen_splits:
remaining=self._size[0] - word_length - spacing_length # Create Text objects for each possible split and check if they fit
fraction = remaining / text.width for pair in pyphen_splits:
# Calculate split position: fraction represents what portion of the hyphenated word fits first_part_text = pair[0] + "-"
# We need to scale this to the original word length, accounting for the hyphen second_part_text = pair[1]
hyphenated_length = len(word.text) + 1 # +1 for hyphen
split_in_hyphenated = round(fraction * hyphenated_length) # Validate minimum character requirements
# Map back to original word, ensuring we don't go past the word length if len(pair[0]) < self._min_chars_before_hyphen:
spliter = min(len(word.text) - 1, max(1, split_in_hyphenated)) continue
split = [Text(word.text[:spliter]+"-", word.style, self._draw, line=self, source=word), Text(word.text[spliter:], word.style, self._draw, line=self, source=word)] if len(pair[1]) < self._min_chars_after_hyphen:
self._text_objects.append(split[0]) continue
word.add_concete(split)
split[0].add_line(self) # Create Text objects
split[1].add_line(self) first_text = Text(first_part_text, word.style, self._draw, line=self, source=word)
self._spacing_render = self._spacing[0] second_text = Text(second_part_text, word.style, self._draw, line=self, source=word)
# Check if first part fits
self._text_objects.append(first_text)
spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(
self._text_objects, self._size[0], self._spacing[0], self._spacing[1]
)
_ = self._text_objects.pop()
if not overflow:
# This split fits! Add it to valid options
valid_splits.append((first_text, second_text, spacing, position))
# Step 2: If we have valid pyphen splits, choose the best one
if valid_splits:
# Select the split with the best (minimum) spacing
best_split = min(valid_splits, key=lambda x: x[2])
first_text, second_text, spacing, position = best_split
# Apply the split
self._text_objects.append(first_text)
first_text.line = self
word.add_concete((first_text, second_text))
self._spacing_render = spacing
self._position_render = position self._position_render = position
return True, split[1] # we apply a brute force split
elif len(splits)==0 and len(word.text)<6:
return False, None # this endpoint means no words can be added.
spacings = []
positions = []
for split in splits:
self._text_objects.append(split[0])
spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(self._text_objects, self._size[0],self._spacing[0], self._spacing[1])
spacings.append(spacing)
positions.append(position)
_=self._text_objects.pop()
idx = int(np.argmin(spacings))
self._text_objects.append(splits[idx][0])
splits[idx][0].line=self
word.add_concete(splits[idx])
self._spacing_render = spacings[idx]
self._position_render = positions[idx]
self._words.append(word) self._words.append(word)
return True, splits[idx][1] # we apply a phyphenated split with best spacing return True, second_text
# Step 3: Try brute force hyphenation (only for long words)
if len(word.text) >= self._min_word_length_for_brute_force:
# Calculate available space for the word
word_length = sum([text.width for text in self._text_objects])
spacing_length = self._spacing[0] * max(0, len(self._text_objects) - 1)
remaining = self._size[0] - word_length - spacing_length
if remaining > 0:
# Create a hyphenated version to measure
test_text = Text(word.text + "-", word.style, self._draw)
if test_text.width > 0:
# Calculate what fraction of the hyphenated word fits
fraction = remaining / test_text.width
# Convert fraction to character position
# We need at least min_chars_before_hyphen and leave at least min_chars_after_hyphen
max_split_pos = len(word.text) - self._min_chars_after_hyphen
min_split_pos = self._min_chars_before_hyphen
# Calculate ideal split position based on available space
ideal_split = int(fraction * len(word.text))
split_pos = max(min_split_pos, min(ideal_split, max_split_pos))
# Ensure we meet minimum requirements
if (split_pos >= self._min_chars_before_hyphen and
len(word.text) - split_pos >= self._min_chars_after_hyphen):
# Create the split
first_part_text = word.text[:split_pos] + "-"
second_part_text = word.text[split_pos:]
first_text = Text(first_part_text, word.style, self._draw, line=self, source=word)
second_text = Text(second_part_text, word.style, self._draw, line=self, source=word)
# Verify the first part actually fits
self._text_objects.append(first_text)
spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(
self._text_objects, self._size[0], self._spacing[0], self._spacing[1]
)
if not overflow:
# Brute force split works!
first_text.line = self
second_text.line = self
word.add_concete((first_text, second_text))
self._spacing_render = spacing
self._position_render = position
self._words.append(word)
return True, second_text
else:
# Doesn't fit, remove it
_ = self._text_objects.pop()
# Step 4: Word cannot be hyphenated or split, move to next line
return False, None
def render(self): def render(self):
""" """
@ -422,9 +530,14 @@ class Line(Box):
# Start x_cursor at line origin plus any alignment offset # Start x_cursor at line origin plus any alignment offset
x_cursor = self._origin[0] + self._position_render x_cursor = self._origin[0] + self._position_render
for text in self._text_objects: for i, text in enumerate(self._text_objects):
# Update text draw context to current draw context # Update text draw context to current draw context
text._draw = self._draw text._draw = self._draw
text.set_origin(np.array([x_cursor, y_cursor])) text.set_origin(np.array([x_cursor, y_cursor]))
text.render()
# Determine next text object for continuous decoration
next_text = self._text_objects[i + 1] if i + 1 < len(self._text_objects) else None
# Render with next text information for continuous underline/strikethrough
text.render(next_text, self._spacing_render)
x_cursor += self._spacing_render + text.width # x-spacing + width of text object x_cursor += self._spacing_render + text.width # x-spacing + width of text object

View File

@ -4,9 +4,9 @@ from typing import List, Tuple, Optional, Union
from pyWebLayout.concrete import Page, Line, Text from pyWebLayout.concrete import Page, Line, Text
from pyWebLayout.abstract import Paragraph, Word, Link from pyWebLayout.abstract import Paragraph, Word, Link
from pyWebLayout.style.concrete_style import ConcreteStyleRegistry from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver
def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None) -> Tuple[bool, Optional[int], Optional[Text]]: def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None, alignment_override: Optional['Alignment'] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
""" """
Layout a paragraph of text within a given page. Layout a paragraph of text within a given page.
@ -18,6 +18,7 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
page: The page to layout the paragraph on page: The page to layout the paragraph on
start_word: Index of the first word to process (for continuation) start_word: Index of the first word to process (for continuation)
pretext: Optional pretext from a previous hyphenated word pretext: Optional pretext from a previous hyphenated word
alignment_override: Optional alignment to override the paragraph's default alignment
Returns: Returns:
Tuple of: Tuple of:
@ -32,21 +33,70 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
if start_word >= len(paragraph.words): if start_word >= len(paragraph.words):
return True, None, None return True, None, None
# Get the concrete style with resolved word spacing constraints # paragraph.style is already a Font object (concrete), not AbstractStyle
style_registry = ConcreteStyleRegistry(page.style_resolver) # We need to get word spacing constraints from the Font's abstract style if available
concrete_style = style_registry.get_concrete_style(paragraph.style) # For now, use reasonable defaults based on font size
from pyWebLayout.style import Font, Alignment
# Extract word spacing constraints (min, max) for Line constructor if isinstance(paragraph.style, Font):
# paragraph.style is already a Font (concrete style)
font = paragraph.style
# Use default word spacing constraints based on font size
# Minimum spacing should be proportional to font size for better readability
min_spacing = float(font.font_size) * 0.25 # 25% of font size
max_spacing = float(font.font_size) * 0.5 # 50% of font size
word_spacing_constraints = (int(min_spacing), int(max_spacing))
text_align = Alignment.LEFT # Default alignment
else:
# paragraph.style is an AbstractStyle, resolve it
rendering_context = RenderingContext(base_font_size=paragraph.style.font_size)
style_resolver = StyleResolver(rendering_context)
style_registry = ConcreteStyleRegistry(style_resolver)
concrete_style = style_registry.get_concrete_style(paragraph.style)
font = concrete_style.create_font()
word_spacing_constraints = ( word_spacing_constraints = (
int(concrete_style.word_spacing_min), int(concrete_style.word_spacing_min),
int(concrete_style.word_spacing_max) int(concrete_style.word_spacing_max)
) )
text_align = concrete_style.text_align
def create_new_line(word: Optional[Union[Word, Text]] = None) -> Optional[Line]: # Apply alignment override if provided
if alignment_override is not None:
text_align = alignment_override
# Cap font size to page maximum if needed
if font.font_size > page.style.max_font_size:
from pyWebLayout.style import Font
font = Font(
font_path=font._font_path,
font_size=page.style.max_font_size,
colour=font.colour,
weight=font.weight,
style=font.style,
decoration=font.decoration,
background=font.background
)
# Calculate baseline-to-baseline spacing using line spacing multiplier
# This is the vertical distance between baselines of consecutive lines
baseline_spacing = int(font.font_size * page.style.line_spacing_multiplier)
# Get font metrics for boundary checking
ascent, descent = font.font.getmetrics()
def create_new_line(word: Optional[Union[Word, Text]] = None, is_first_line: bool = False) -> Optional[Line]:
"""Helper function to create a new line, returns None if page is full.""" """Helper function to create a new line, returns None if page is full."""
if not page.can_fit_line(paragraph.line_height): # Check if this line's baseline and descenders would fit on the page
if not page.can_fit_line(baseline_spacing, ascent, descent):
return None return None
# For the first line, position it so text starts at the top boundary
# For subsequent lines, use current y_offset which tracks baseline-to-baseline spacing
if is_first_line:
# Position line origin so that baseline (origin + ascent) is close to top
# We want minimal space above the text, so origin should be at boundary
y_cursor = page._current_y_offset
else:
y_cursor = page._current_y_offset y_cursor = page._current_y_offset
x_cursor = page.border_size x_cursor = page.border_size
@ -60,10 +110,10 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
return Line( return Line(
spacing=word_spacing_constraints, spacing=word_spacing_constraints,
origin=(x_cursor, y_cursor), origin=(x_cursor, y_cursor),
size=(page.available_width, paragraph.line_height), size=(page.available_width, baseline_spacing),
draw=page.draw, draw=page.draw,
font=concrete_style.create_font(), font=font,
halign=concrete_style.text_align halign=text_align
) )
# Create initial line # Create initial line
@ -72,15 +122,14 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
return False, start_word, pretext return False, start_word, pretext
page.add_child(current_line) page.add_child(current_line)
page._current_y_offset += paragraph.line_height # Note: add_child already updates _current_y_offset based on child's origin and size
# No need to manually increment it here
# Track current position in paragraph # Track current position in paragraph
current_pretext = pretext current_pretext = pretext
# Process words starting from start_word # Process words starting from start_word
for i, word in enumerate(paragraph.words[start_word:], start=start_word): for i, word in enumerate(paragraph.words[start_word:], start=start_word):
if current_pretext:
print(current_pretext.text)
success, overflow_text = current_line.add_word(word, current_pretext) success, overflow_text = current_line.add_word(word, current_pretext)
if success: if success:
@ -93,7 +142,7 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
# If we can't create a new line, return with the current state # If we can't create a new line, return with the current state
return False, i, overflow_text return False, i, overflow_text
page.add_child(current_line) page.add_child(current_line)
page._current_y_offset += paragraph.line_height # Note: add_child already updates _current_y_offset
# Continue to the next word # Continue to the next word
continue continue
else: else:
@ -121,7 +170,7 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
continue continue
page.add_child(current_line) page.add_child(current_line)
page._current_y_offset += paragraph.line_height # Note: add_child already updates _current_y_offset
# Try to add the word to the new line # Try to add the word to the new line
success, overflow_text = current_line.add_word(word, current_pretext) success, overflow_text = current_line.add_word(word, current_pretext)
@ -142,55 +191,3 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
# All words processed successfully # All words processed successfully
return True, None, None return True, None, None
class DocumentLayouter:
"""
Class-based document layouter for more complex layout operations.
"""
def __init__(self, page: Page):
"""Initialize the layouter with a page."""
self.page = page
self.style_registry = ConcreteStyleRegistry(page.style_resolver)
def layout_paragraph(self, paragraph: Paragraph, start_word: int = 0, pretext: Optional[Text] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
"""
Layout a paragraph using the class-based approach.
This method provides the same functionality as the standalone function
but with better state management and reusability.
"""
return paragraph_layouter(paragraph, self.page, start_word, pretext)
def layout_document(self, paragraphs: List[Paragraph]) -> bool:
"""
Layout multiple paragraphs in sequence.
Args:
paragraphs: List of paragraphs to layout
Returns:
True if all paragraphs were laid out successfully, False otherwise
"""
for paragraph in paragraphs:
start_word = 0
pretext = None
while True:
complete, next_word, remaining_pretext = self.layout_paragraph(
paragraph, start_word, pretext
)
if complete:
# Paragraph finished
break
if next_word is None:
# Error condition
return False
# Continue on next page or handle page break
# For now, we'll just return False indicating we need more space
return False
return True

View File

@ -27,6 +27,7 @@ from pyWebLayout.concrete.page import Page
from pyWebLayout.concrete.text import Line, Text from pyWebLayout.concrete.text import Line, Text
from pyWebLayout.style.page_style import PageStyle from pyWebLayout.style.page_style import PageStyle
from pyWebLayout.style import Font from pyWebLayout.style import Font
from pyWebLayout.layout.document_layouter import paragraph_layouter
@dataclass @dataclass
@ -212,11 +213,12 @@ class BidirectionalLayouter:
Handles font scaling and maintains position state. Handles font scaling and maintains position state.
""" """
def __init__(self, blocks: List[Block], page_style: PageStyle, page_size: Tuple[int, int] = (800, 600)): def __init__(self, blocks: List[Block], page_style: PageStyle, page_size: Tuple[int, int] = (800, 600), alignment_override=None):
self.blocks = blocks self.blocks = blocks
self.page_style = page_style self.page_style = page_style
self.page_size = page_size self.page_size = page_size
self.chapter_navigator = ChapterNavigator(blocks) self.chapter_navigator = ChapterNavigator(blocks)
self.alignment_override = alignment_override
def render_page_forward(self, position: RenderingPosition, font_scale: float = 1.0) -> Tuple[Page, RenderingPosition]: def render_page_forward(self, position: RenderingPosition, font_scale: float = 1.0) -> Tuple[Page, RenderingPosition]:
""" """
@ -328,53 +330,65 @@ class BidirectionalLayouter:
return True, new_pos return True, new_pos
def _layout_paragraph_on_page(self, paragraph: Paragraph, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]: def _layout_paragraph_on_page(self, paragraph: Paragraph, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]:
"""Layout a paragraph on the page with font scaling support""" """
# This would integrate with the existing paragraph_layouter but with font scaling Layout a paragraph on the page using the core paragraph_layouter.
# For now, this is a placeholder implementation Integrates font scaling and position tracking with the proven layout logic.
# Calculate scaled line height Args:
line_height = int(paragraph.style.font_size * font_scale * 1.2) # 1.2 is line spacing factor paragraph: The paragraph to layout (already scaled if font_scale != 1.0)
page: The page to layout on
position: Current rendering position
font_scale: Font scaling factor (used for context, paragraph should already be scaled)
if not page.can_fit_line(line_height): Returns:
return False, position Tuple of (success, new_position)
"""
# Create a line and try to fit words # Convert remaining_pretext from string to Text object if needed
y_cursor = page._current_y_offset pretext_obj = None
x_cursor = page.border_size if position.remaining_pretext:
# Create a Text object from the pretext string
# Scale word spacing constraints pretext_obj = Text(
word_spacing = FontScaler.scale_word_spacing((5, 15), font_scale) # Default spacing position.remaining_pretext,
paragraph.style,
line = Line( page.draw,
spacing=word_spacing, line=None,
origin=(x_cursor, y_cursor), source=None
size=(page.available_width, line_height),
draw=page.draw,
font=FontScaler.scale_font(paragraph.style, font_scale)
) )
# Add words starting from position.word_index # Call the core paragraph layouter with alignment override if set
words_added = 0 success, failed_word_index, remaining_pretext = paragraph_layouter(
for i, word in enumerate(paragraph.words[position.word_index:], start=position.word_index): paragraph,
success, overflow = line.add_word(word) page,
if not success: start_word=position.word_index,
break pretext=pretext_obj,
words_added += 1 alignment_override=self.alignment_override
)
if words_added > 0:
page.add_child(line)
page._current_y_offset += line_height
# Create new position based on the result
new_pos = position.copy() new_pos = position.copy()
new_pos.word_index += words_added
# If we finished the paragraph, move to next block if success:
if new_pos.word_index >= len(paragraph.words): # Paragraph was fully laid out, move to next block
new_pos.block_index += 1 new_pos.block_index += 1
new_pos.word_index = 0 new_pos.word_index = 0
new_pos.remaining_pretext = None
return True, new_pos return True, new_pos
else:
# Paragraph was not fully laid out
if failed_word_index is not None:
# Update position to the word that didn't fit
new_pos.word_index = failed_word_index
# Convert Text object back to string if there's remaining pretext
if remaining_pretext is not None and hasattr(remaining_pretext, 'text'):
new_pos.remaining_pretext = remaining_pretext.text
else:
new_pos.remaining_pretext = None
return False, new_pos
else:
# No specific word failed, but layout wasn't successful
# This shouldn't normally happen, but handle it gracefully
return False, position return False, position
def _layout_heading_on_page(self, heading: Heading, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]: def _layout_heading_on_page(self, heading: Heading, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]:

View File

@ -23,6 +23,10 @@ class PageStyle:
# Background color # Background color
background_color: Tuple[int, int, int] = (255, 255, 255) background_color: Tuple[int, int, int] = (255, 255, 255)
# Typography properties
max_font_size: int = 72 # Maximum font size allowed on a page
line_spacing_multiplier: float = 1.2 # Baseline-to-baseline spacing multiplier
@property @property
def padding_top(self) -> int: def padding_top(self) -> int:
return self.padding[0] return self.padding[0]

View File

@ -245,6 +245,79 @@ class TestPageImplementation(unittest.TestCase):
for i, child in enumerate(page.children): for i, child in enumerate(page.children):
self.assertEqual(child._text, f"Child {i}") self.assertEqual(child._text, f"Child {i}")
def test_page_can_fit_line_boundary_checking(self):
"""Test that can_fit_line correctly checks bottom boundary"""
# Create page with known dimensions
# Page: 800x600, border: 40, padding: (10, 10, 10, 10)
# Content area starts at y=50 (border + padding_top = 40 + 10)
# Content area ends at y=550 (height - border - padding_bottom = 600 - 40 - 10)
style = PageStyle(
border_width=40,
padding=(10, 10, 10, 10)
)
page = Page(size=(800, 600), style=style)
# Initial y_offset should be at border + padding_top = 50
self.assertEqual(page._current_y_offset, 50)
# Test 1: Line that fits comfortably
line_height = 20
max_y = 600 - 40 - 10 # 550
self.assertTrue(page.can_fit_line(line_height))
# Would end at 50 + 20 = 70, well within 550
# Test 2: Simulate adding lines to fill the page
# Available height: 550 - 50 = 500 pixels
# With 20-pixel lines, we can fit 25 lines exactly
for i in range(24): # Add 24 lines
self.assertTrue(page.can_fit_line(20), f"Line {i+1} should fit")
# Simulate adding a line by updating y_offset
page._current_y_offset += 20
# After 24 lines: y_offset = 50 + (24 * 20) = 530
self.assertEqual(page._current_y_offset, 530)
# Test 3: One more 20-pixel line should fit (530 + 20 = 550, exactly at boundary)
self.assertTrue(page.can_fit_line(20))
page._current_y_offset += 20
self.assertEqual(page._current_y_offset, 550)
# Test 4: Now another line should NOT fit (550 + 20 = 570 > 550)
self.assertFalse(page.can_fit_line(20))
# Test 5: Even a 1-pixel line should not fit (550 + 1 = 551 > 550)
self.assertFalse(page.can_fit_line(1))
# Test 6: Edge case - exactly at boundary, 0-height line should fit
self.assertTrue(page.can_fit_line(0))
def test_page_can_fit_line_with_different_styles(self):
"""Test can_fit_line with different page styles"""
# Test with no border or padding
style_no_border = PageStyle(border_width=0, padding=(0, 0, 0, 0))
page_no_border = Page(size=(100, 100), style=style_no_border)
# With no border/padding, y_offset starts at 0
self.assertEqual(page_no_border._current_y_offset, 0)
# Can fit a 100-pixel line exactly
self.assertTrue(page_no_border.can_fit_line(100))
# Cannot fit a 101-pixel line
self.assertFalse(page_no_border.can_fit_line(101))
# Test with large border and padding
style_large = PageStyle(border_width=20, padding=(15, 15, 15, 15))
page_large = Page(size=(200, 200), style=style_large)
# y_offset starts at border + padding_top = 20 + 15 = 35
self.assertEqual(page_large._current_y_offset, 35)
# Max y = 200 - 20 - 15 = 165
# Available height = 165 - 35 = 130 pixels
self.assertTrue(page_large.can_fit_line(130))
self.assertFalse(page_large.can_fit_line(131))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -16,43 +16,12 @@ from pyWebLayout.layout.document_layouter import paragraph_layouter, DocumentLay
from pyWebLayout.style.abstract_style import AbstractStyle from pyWebLayout.style.abstract_style import AbstractStyle
from pyWebLayout.style.concrete_style import ConcreteStyle, StyleResolver, RenderingContext from pyWebLayout.style.concrete_style import ConcreteStyle, StyleResolver, RenderingContext
from pyWebLayout.style.fonts import Font from pyWebLayout.style.fonts import Font
from pyWebLayout.style.page_style import PageStyle
from pyWebLayout.concrete.page import Page
from pyWebLayout.concrete.text import Line, Text from pyWebLayout.concrete.text import Line, Text
from pyWebLayout.abstract.inline import Word from pyWebLayout.abstract.inline import Word
class MockPage:
"""A realistic mock page that behaves like a real page."""
def __init__(self, width=400, height=600, max_lines=20):
self.border_size = 20
self._current_y_offset = 50
self.available_width = width
self.available_height = height
self.max_lines = max_lines
self.lines_added = 0
self.children = []
# Create a real drawing context
self.image = Image.new('RGB', (width + 40, height + 100), 'white')
self.draw = ImageDraw.Draw(self.image)
# Create a real style resolver
context = RenderingContext(base_font_size=16)
self.style_resolver = StyleResolver(context)
def can_fit_line(self, line_height):
"""Check if another line can fit on the page."""
remaining_height = self.available_height - self._current_y_offset
can_fit = remaining_height >= line_height and self.lines_added < self.max_lines
return can_fit
def add_child(self, child):
"""Add a child element (like a Line) to the page."""
self.children.append(child)
self.lines_added += 1
return True
class MockWord(Word): class MockWord(Word):
"""A simple mock word that extends the real Word class.""" """A simple mock word that extends the real Word class."""
@ -106,8 +75,9 @@ class TestDocumentLayouterIntegration:
def test_single_page_layout_with_real_components(self): def test_single_page_layout_with_real_components(self):
"""Test layout on a single page using real Line and Text objects.""" """Test layout on a single page using real Line and Text objects."""
# Create a page that can fit content # Create a real page that can fit content
page = MockPage(width=500, height=400, max_lines=10) page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
page = Page(size=(500, 400), style=page_style)
# Create a paragraph with realistic content # Create a paragraph with realistic content
paragraph = MockParagraph( paragraph = MockParagraph(
@ -125,7 +95,6 @@ class TestDocumentLayouterIntegration:
# Verify lines were added to page # Verify lines were added to page
assert len(page.children) > 0 assert len(page.children) > 0
assert page.lines_added > 0
# Verify actual Line objects were created # Verify actual Line objects were created
for child in page.children: for child in page.children:
@ -135,8 +104,9 @@ class TestDocumentLayouterIntegration:
def test_multi_page_scenario_with_page_overflow(self): def test_multi_page_scenario_with_page_overflow(self):
"""Test realistic multi-page scenario with actual page overflow.""" """Test realistic multi-page scenario with actual page overflow."""
# Create a very small page that will definitely overflow # Create a very small real page that will definitely overflow
small_page = MockPage(width=150, height=80, max_lines=1) # Extremely small page small_page_style = PageStyle(border_width=5, padding=(5, 5, 5, 5))
small_page = Page(size=(150, 80), style=small_page_style)
# Create a long paragraph that will definitely overflow # Create a long paragraph that will definitely overflow
long_text = " ".join([f"verylongword{i:02d}" for i in range(20)]) # 20 long words long_text = " ".join([f"verylongword{i:02d}" for i in range(20)]) # 20 long words
@ -157,13 +127,13 @@ class TestDocumentLayouterIntegration:
# If it failed, verify overflow handling # If it failed, verify overflow handling
assert failed_word_index is not None # Should indicate where it failed assert failed_word_index is not None # Should indicate where it failed
assert failed_word_index < len(paragraph.words) # Should be within word range assert failed_word_index < len(paragraph.words) # Should be within word range
assert len(small_page.children) <= small_page.max_lines
print(f"✓ Multi-page test: Page overflow at word {failed_word_index}, {len(small_page.children)} lines fit") print(f"✓ Multi-page test: Page overflow at word {failed_word_index}, {len(small_page.children)} lines fit")
def test_word_spacing_constraints_in_real_lines(self): def test_word_spacing_constraints_in_real_lines(self):
"""Test that word spacing constraints are properly used in real Line objects.""" """Test that word spacing constraints are properly used in real Line objects."""
# Create page # Create real page
page = MockPage(width=400, height=300) page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
page = Page(size=(400, 300), style=page_style)
# Create paragraph with specific spacing constraints # Create paragraph with specific spacing constraints
paragraph = MockParagraph( paragraph = MockParagraph(
@ -197,7 +167,8 @@ class TestDocumentLayouterIntegration:
] ]
for alignment_name, style in alignments_to_test: for alignment_name, style in alignments_to_test:
page = MockPage(width=350, height=200) page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
page = Page(size=(350, 200), style=page_style)
paragraph = MockParagraph( paragraph = MockParagraph(
"This sentence will test different alignment strategies with word spacing.", "This sentence will test different alignment strategies with word spacing.",
style style
@ -217,8 +188,9 @@ class TestDocumentLayouterIntegration:
def test_realistic_document_with_multiple_pages(self): def test_realistic_document_with_multiple_pages(self):
"""Test a realistic document that spans multiple pages.""" """Test a realistic document that spans multiple pages."""
# Create multiple pages # Create multiple real pages
pages = [MockPage(width=400, height=300, max_lines=5) for _ in range(3)] page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
pages = [Page(size=(400, 300), style=page_style) for _ in range(3)]
# Create a document with multiple paragraphs # Create a document with multiple paragraphs
paragraphs = [ paragraphs = [
@ -275,7 +247,8 @@ class TestDocumentLayouterIntegration:
def test_word_spacing_constraint_resolution_integration(self): def test_word_spacing_constraint_resolution_integration(self):
"""Test the complete integration from AbstractStyle to Line spacing.""" """Test the complete integration from AbstractStyle to Line spacing."""
page = MockPage() page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
page = Page(size=(400, 600), style=page_style)
# Test different constraint scenarios # Test different constraint scenarios
test_cases = [ test_cases = [
@ -300,8 +273,9 @@ class TestDocumentLayouterIntegration:
] ]
for case in test_cases: for case in test_cases:
# Create fresh page for each test # Create fresh real page for each test
test_page = MockPage() test_page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
test_page = Page(size=(400, 600), style=test_page_style)
paragraph = MockParagraph( paragraph = MockParagraph(
"Testing constraint resolution with different scenarios.", "Testing constraint resolution with different scenarios.",
case["style"] case["style"]
@ -322,8 +296,9 @@ class TestDocumentLayouterIntegration:
def test_hyphenation_with_word_spacing_constraints(self): def test_hyphenation_with_word_spacing_constraints(self):
"""Test that hyphenation works correctly with word spacing constraints.""" """Test that hyphenation works correctly with word spacing constraints."""
# Create a narrow page to force hyphenation # Create a narrow real page to force hyphenation
narrow_page = MockPage(width=200, height=300) narrow_page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
narrow_page = Page(size=(200, 300), style=narrow_page_style)
# Create paragraph with long words that will need hyphenation # Create paragraph with long words that will need hyphenation
paragraph = MockParagraph( paragraph = MockParagraph(