Working version for ebook rendering!!

This commit is contained in:
Duncan Tourolle 2025-11-04 12:57:15 +01:00
parent fdb3023919
commit de18b1c2cc
8 changed files with 583 additions and 292 deletions

View File

@ -2,7 +2,7 @@
"""
Simple EPUB page renderer tool.
This tool uses the pyWebLayout epub_reader and typesetting modules to:
This tool uses the pyWebLayout epub_reader and layout modules to:
1. Load an EPUB file
2. Render the first X pages according to command line arguments
3. Save the pages as PNG images
@ -15,17 +15,19 @@ import os
import sys
import argparse
from pathlib import Path
from typing import Optional
from typing import Optional, List
# Add the parent directory to sys.path to import pyWebLayout
sys.path.insert(0, str(Path(__file__).parent.parent))
try:
from pyWebLayout.io.readers.epub_reader import read_epub
from pyWebLayout.layout.document_pagination import DocumentPaginator
from pyWebLayout.layout.ereader_layout import BidirectionalLayouter, RenderingPosition
from pyWebLayout.layout.document_layouter import paragraph_layouter
from pyWebLayout.concrete.page import Page
from pyWebLayout.style.page_style import PageStyle
from pyWebLayout.style.fonts import Font
from pyWebLayout.style.layout import Alignment
from pyWebLayout.abstract.block import Block
from PIL import Image, ImageDraw
except ImportError as e:
print(f"Error importing required modules: {e}")
@ -50,14 +52,14 @@ def render_page_to_image(page: Page) -> Image.Image:
return rendered_image
else:
# If render() doesn't return a PIL Image, create error image
error_image = Image.new('RGB', page._size, 'white')
error_image = Image.new('RGB', page.size, 'white')
draw = ImageDraw.Draw(error_image)
draw.text((20, 20), "Error: Page.render() did not return PIL Image", fill='red')
return error_image
except Exception as e:
# Create error image if rendering fails
error_image = Image.new('RGB', page._size, 'white')
error_image = Image.new('RGB', page.size, 'white')
draw = ImageDraw.Draw(error_image)
draw.text((20, 20), f"Rendering error: {str(e)}", fill='red')
print(f"Warning: Error rendering page: {e}")
@ -85,19 +87,25 @@ def extract_text_from_page(page: Page) -> str:
# Import abstract block types
from pyWebLayout.abstract.block import Paragraph, Heading, HList, Table, Image as AbstractImage
from pyWebLayout.concrete.text import Line
# Handle abstract block objects first
if isinstance(element, Paragraph):
# Extract text from paragraph
# Handle Line objects (concrete)
if isinstance(element, Line):
line_text = []
if hasattr(element, '_text_objects') and element._text_objects:
for text_obj in element._text_objects:
if hasattr(text_obj, 'text'):
line_text.append(str(text_obj.text))
if line_text:
text_lines.append(f"{indent}{' '.join(line_text)}")
# Handle abstract block objects
elif isinstance(element, (Paragraph, Heading)):
# Extract text from paragraph/heading
paragraph_text = extract_text_from_paragraph(element)
if paragraph_text:
text_lines.append(f"{indent}PARAGRAPH: {paragraph_text}")
elif isinstance(element, Heading):
# Extract text from heading
heading_text = extract_text_from_paragraph(element)
if heading_text:
text_lines.append(f"{indent}HEADING: {heading_text}")
block_type = "HEADING" if isinstance(element, Heading) else "PARAGRAPH"
text_lines.append(f"{indent}{block_type}: {paragraph_text}")
elif isinstance(element, HList):
text_lines.append(f"{indent}LIST:")
@ -115,7 +123,7 @@ def extract_text_from_page(page: Page) -> str:
elif isinstance(element, AbstractImage):
alt_text = getattr(element, 'alt_text', '')
src = getattr(element, 'src', 'Unknown')
src = getattr(element, 'source', 'Unknown')
text_lines.append(f"{indent}[IMAGE: {alt_text or src}]")
# Handle containers with children
@ -129,15 +137,6 @@ def extract_text_from_page(page: Page) -> str:
if text:
text_lines.append(f"{indent}{text}")
# Handle lines with text objects
elif hasattr(element, '_text_objects') and element._text_objects:
line_text = []
for text_obj in element._text_objects:
if hasattr(text_obj, 'text'):
line_text.append(str(text_obj.text))
if line_text:
text_lines.append(f"{indent}{' '.join(line_text)}")
# Handle other object types by showing their class name
else:
class_name = element.__class__.__name__
@ -148,8 +147,8 @@ def extract_text_from_page(page: Page) -> str:
words = []
try:
# Try to get words from the paragraph
if hasattr(para_obj, 'words') and callable(para_obj.words):
for _, word in para_obj.words():
if hasattr(para_obj, 'words_iter') and callable(para_obj.words_iter):
for _, word in para_obj.words_iter():
if hasattr(word, 'text'):
words.append(word.text)
else:
@ -183,6 +182,27 @@ def extract_text_from_page(page: Page) -> str:
return "\n".join(text_lines)
def get_all_blocks_from_book(book) -> List[Block]:
"""
Extract all blocks from all chapters in the book.
Args:
book: The Book object from epub_reader
Returns:
List of all Block objects
"""
all_blocks = []
# Iterate through all chapters
for chapter in book.chapters:
# Get blocks from the chapter
if hasattr(chapter, '_blocks'):
all_blocks.extend(chapter._blocks)
return all_blocks
def main():
"""Main function to handle command line arguments and process the EPUB."""
parser = argparse.ArgumentParser(
@ -234,6 +254,13 @@ Examples:
help='Page margin in pixels (default: 40)'
)
parser.add_argument(
'--align', '-a',
choices=['left', 'justify'],
default='left',
help='Text alignment: left or justify (default: left)'
)
args = parser.parse_args()
# Validate arguments
@ -268,42 +295,100 @@ Examples:
except Exception as e:
print(f"Error loading EPUB file: {e}")
import traceback
traceback.print_exc()
return 1
# Set up pagination
page_size = (args.width, args.height)
margins = (args.margin, args.margin, args.margin, args.margin) # top, right, bottom, left
# Extract all blocks from the book
print("Extracting content blocks...")
try:
all_blocks = get_all_blocks_from_book(book)
print(f"Extracted {len(all_blocks)} content blocks")
print(f"Setting up pagination with page size {page_size} and margins {margins}")
if not all_blocks:
print("No content blocks found in EPUB. The book might be empty.")
return 1
# Apply alignment setting to all paragraphs and headings
from pyWebLayout.abstract.block import Paragraph, Heading
from pyWebLayout.style.alignment import Alignment
alignment = Alignment.JUSTIFY if args.align == 'justify' else Alignment.LEFT
print(f"Applying {args.align} alignment to all text blocks...")
# Note: We'll pass alignment to the layouter which will handle it during rendering
# The alignment is applied at the Line level in paragraph_layouter
except Exception as e:
print(f"Error extracting blocks: {e}")
import traceback
traceback.print_exc()
return 1
# Set up page style and layouter
page_size = (args.width, args.height)
page_style = PageStyle(
background_color=(255, 255, 255),
border_width=args.margin,
border_color=(200, 200, 200),
padding=(10, 10, 10, 10), # top, right, bottom, left
line_spacing=5,
inter_block_spacing=15
)
print(f"Setting up layouter with page size {page_size} and {args.align} alignment")
try:
paginator = DocumentPaginator(
document=book,
layouter = BidirectionalLayouter(
blocks=all_blocks,
page_style=page_style,
page_size=page_size,
margins=margins,
spacing=5,
halign=Alignment.LEFT
alignment_override=alignment
)
except Exception as e:
print(f"Error setting up paginator: {e}")
print(f"Error setting up layouter: {e}")
import traceback
traceback.print_exc()
return 1
# Render pages
print(f"Rendering {args.pages} pages...")
print(f"Rendering up to {args.pages} pages...")
try:
# Generate pages
pages = paginator.paginate(max_pages=args.pages)
pages = []
current_position = RenderingPosition() # Start from beginning
for page_num in range(args.pages):
print(f"Rendering page {page_num + 1}/{args.pages}...")
try:
# Render the page
page, next_position = layouter.render_page_forward(current_position)
pages.append(page)
# Check if we've reached the end of the document
if next_position.block_index >= len(all_blocks):
print(f"Reached end of document after {page_num + 1} pages")
break
# Update position for next page
current_position = next_position
except Exception as e:
print(f"Error rendering page {page_num + 1}: {e}")
import traceback
traceback.print_exc()
break
if not pages:
print("No pages were generated. The book might be empty or there might be an issue with pagination.")
print("No pages were generated.")
return 1
print(f"Generated {len(pages)} pages")
# Render each page to an image and extract text
# Save each page to an image and extract text
for i, page in enumerate(pages):
print(f"Rendering page {i + 1}/{len(pages)}...")
print(f"Saving page {i + 1}/{len(pages)}...")
try:
# Create image from page using pyWebLayout's built-in rendering
@ -324,18 +409,22 @@ Examples:
print(f"Saved: {output_path} and {text_path}")
except Exception as e:
print(f"Error rendering page {i + 1}: {e}")
print(f"Error saving page {i + 1}: {e}")
import traceback
traceback.print_exc()
continue
print(f"\nCompleted! Rendered {len(pages)} pages to {args.output_dir}")
# Show pagination progress
if hasattr(paginator, 'get_progress'):
progress = paginator.get_progress() * 100
# Calculate progress through the book
if len(all_blocks) > 0:
progress = (current_position.block_index / len(all_blocks)) * 100
print(f"Progress through book: {progress:.1f}%")
except Exception as e:
print(f"Error during pagination/rendering: {e}")
import traceback
traceback.print_exc()
return 1
return 0

View File

@ -27,16 +27,41 @@ class Page(Renderable, Queriable):
self._children: List[Renderable] = []
self._canvas: Optional[Image.Image] = None
self._draw: Optional[ImageDraw.Draw] = None
self._current_y_offset = 0 # Track vertical position for layout
# Initialize y_offset to start of content area
# Position the first line so its baseline is close to the top boundary
# For subsequent lines, baseline-to-baseline spacing is used
self._current_y_offset = self._style.border_width + self._style.padding_top
self._is_first_line = True # Track if we're placing the first line
def free_space(self) -> Tuple[int, int]:
"""Get the remaining space on the page"""
return (self._size[0], self._size[1] - self._current_y_offset)
def can_fit_line(self, line_height: int) -> bool:
"""Check if a line of the given height can fit on the page."""
remaining_height = self.content_size[1] - (self._current_y_offset - self._style.border_width - self._style.padding_top)
return remaining_height >= line_height
def can_fit_line(self, baseline_spacing: int, ascent: int = 0, descent: int = 0) -> bool:
"""
Check if a line with the given metrics can fit on the page.
Args:
baseline_spacing: Distance from current position to next baseline
ascent: Font ascent (height above baseline), defaults to 0 for backward compat
descent: Font descent (height below baseline), defaults to 0 for backward compat
Returns:
True if the line fits within page boundaries
"""
# Calculate the maximum Y position allowed (bottom boundary)
max_y = self._size[1] - self._style.border_width - self._style.padding_bottom
# If ascent/descent not provided, use simple check (backward compatibility)
if ascent == 0 and descent == 0:
return (self._current_y_offset + baseline_spacing) <= max_y
# Calculate where the bottom of the text would be
# Text bottom = current_y_offset + ascent + descent
text_bottom = self._current_y_offset + ascent + descent
# Check if text bottom would exceed the boundary
return text_bottom <= max_y
@property
def size(self) -> Tuple[int, int]:
@ -122,7 +147,8 @@ class Page(Renderable, Queriable):
"""
self._children.clear()
self._canvas = None
self._current_y_offset = 0
# Reset y_offset to start of content area (after border and padding)
self._current_y_offset = self._style.border_width + self._style.padding_top
return self
@property

View File

@ -53,7 +53,6 @@ class LeftAlignmentHandler(AlignmentHandler):
Returns:
Tuple[int, int, bool]: Spacing, start position, and overflow flag.
"""
print("LeftAlignmentHandler:")
# Handle single word case
if len(text_objects) <= 1:
return 0, 0, False
@ -76,7 +75,6 @@ class LeftAlignmentHandler(AlignmentHandler):
# Calculate ideal spacing
actual_spacing = residual_space // num_gaps
print(actual_spacing)
# Clamp within bounds (CREngine pattern: respect max_spacing)
if actual_spacing > max_spacing:
return max_spacing, 0, False
@ -108,7 +106,6 @@ class CenterRightAlignmentHandler(AlignmentHandler):
return 0, max(0, start_position), False
actual_spacing = residual_space // (len(text_objects)-1)
print(actual_spacing)
ideal_space = (min_spacing + max_spacing)/2
if actual_spacing > 0.5*(min_spacing + max_spacing):
actual_spacing = 0.5*(min_spacing + max_spacing)
@ -138,7 +135,6 @@ class JustifyAlignmentHandler(AlignmentHandler):
actual_spacing = residual_space // num_gaps
ideal_space = (min_spacing + max_spacing)//2
print(actual_spacing)
# can we touch the end?
if actual_spacing < max_spacing:
if actual_spacing < min_spacing:
@ -228,24 +224,58 @@ class Text(Renderable, Queriable):
"""Add this text to a line"""
self._line = line
def _apply_decoration(self):
"""Apply text decoration (underline or strikethrough)"""
def _apply_decoration(self, next_text: Optional['Text'] = None, spacing: int = 0):
"""
Apply text decoration (underline or strikethrough).
Args:
next_text: The next Text object in the line (if any)
spacing: The spacing to the next text object
"""
if self._style.decoration == TextDecoration.UNDERLINE:
# Draw underline at about 90% of the height
y_position = self._origin[1] - 0.1*self._style.font_size
self._draw.line([(0, y_position), (self._width, y_position)],
fill=self._style.colour, width=max(1, int(self._style.font_size / 15)))
line_width = max(1, int(self._style.font_size / 15))
# Determine end x-coordinate
end_x = self._origin[0] + self._width
# If next text also has underline decoration, extend to connect them
if (next_text is not None and
next_text.style.decoration == TextDecoration.UNDERLINE and
next_text.style.colour == self._style.colour):
# Extend the underline through the spacing to connect with next word
end_x += spacing
self._draw.line([(self._origin[0], y_position), (end_x, y_position)],
fill=self._style.colour, width=line_width)
elif self._style.decoration == TextDecoration.STRIKETHROUGH:
# Draw strikethrough at about 50% of the height
y_position = self._origin[1] + self._middle_y
self._draw.line([(0, y_position), (self._width, y_position)],
fill=self._style.colour, width=max(1, int(self._style.font_size / 15)))
line_width = max(1, int(self._style.font_size / 15))
def render(self):
# Determine end x-coordinate
end_x = self._origin[0] + self._width
# If next text also has strikethrough decoration, extend to connect them
if (next_text is not None and
next_text.style.decoration == TextDecoration.STRIKETHROUGH and
next_text.style.colour == self._style.colour):
# Extend the strikethrough through the spacing to connect with next word
end_x += spacing
self._draw.line([(self._origin[0], y_position), (end_x, y_position)],
fill=self._style.colour, width=line_width)
def render(self, next_text: Optional['Text'] = None, spacing: int = 0):
"""
Render the text to an image.
Args:
next_text: The next Text object in the line (if any)
spacing: The spacing to the next text object
Returns:
A PIL Image containing the rendered text
"""
@ -254,11 +284,12 @@ class Text(Renderable, Queriable):
if self._style.background and self._style.background[3] > 0: # If alpha > 0
self._draw.rectangle([self._origin, self._origin+self._size], fill=self._style.background)
# Draw the text using calculated offsets to prevent cropping
self._draw.text((self.origin[0], self._origin[1]), self._text, font=self._style.font, fill=self._style.colour)
# Draw the text using baseline as anchor point ("ls" = left-baseline)
# This ensures the origin represents the baseline, not the top-left
self._draw.text((self.origin[0], self._origin[1]), self._text, font=self._style.font, fill=self._style.colour, anchor="ls")
# Apply any text decorations
self._apply_decoration()
# Apply any text decorations with knowledge of next text
self._apply_decoration(next_text, spacing)
class Line(Box):
"""
@ -268,7 +299,10 @@ class Line(Box):
def __init__(self, spacing: Tuple[int, int], origin, size, draw: ImageDraw.Draw,font: Optional[Font] = None,
callback=None, sheet=None, mode=None, halign=Alignment.CENTER,
valign=Alignment.CENTER, previous = None):
valign=Alignment.CENTER, previous = None,
min_word_length_for_brute_force: int = 8,
min_chars_before_hyphen: int = 2,
min_chars_after_hyphen: int = 2):
"""
Initialize a new line.
@ -283,6 +317,9 @@ class Line(Box):
halign: Horizontal alignment of text within the line
valign: Vertical alignment of text within the line
previous: Reference to the previous line
min_word_length_for_brute_force: Minimum word length to attempt brute force hyphenation (default: 8)
min_chars_before_hyphen: Minimum characters before hyphen in any split (default: 2)
min_chars_after_hyphen: Minimum characters after hyphen in any split (default: 2)
"""
super().__init__(origin, size, callback, sheet, mode, halign, valign)
self._text_objects: List['Text'] = [] # Store Text objects directly
@ -292,12 +329,18 @@ class Line(Box):
self._words : List['Word'] = []
self._previous = previous
self._next = None
ascent,descent = self._font.font.getmetrics()
self._baseline = self._origin[1] - ascent
ascent, descent = self._font.font.getmetrics()
# Store baseline as offset from line origin (top), not absolute position
self._baseline = ascent
self._draw = draw
self._spacing_render = (spacing[0] + spacing[1]) //2
self._position_render = 0
# Hyphenation configuration parameters
self._min_word_length_for_brute_force = min_word_length_for_brute_force
self._min_chars_before_hyphen = min_chars_before_hyphen
self._min_chars_after_hyphen = min_chars_after_hyphen
# Create the appropriate alignment handler
self._alignment_handler = self._create_alignment_handler(halign)
@ -311,7 +354,6 @@ class Line(Box):
Returns:
The appropriate alignment handler instance
"""
print("HALGIN!!!!!", alignment)
if alignment == Alignment.LEFT:
return LeftAlignmentHandler()
elif alignment == Alignment.JUSTIFY:
@ -333,75 +375,141 @@ class Line(Box):
Add a word to this line using intelligent word fitting strategies.
Args:
text: The text content of the word
font: The font to use for this word, or None to use the line's default font
word: The word to add to the line
part: Optional pretext from a previous hyphenated word
Returns:
True if the word was successfully added, False if it couldn't fit, in case of hypenation the hyphenated part is returned
Tuple of (success, overflow_text):
- success: True if word/part was added, False if it couldn't fit
- overflow_text: Remaining text if word was hyphenated, None otherwise
"""
# First, add any pretext from previous hyphenation
if part is not None:
self._text_objects.append(part)
self._words.append(word)
part.add_line(self)
# Try to add the full word
text = Text.from_word(word, self._draw)
self._text_objects.append(text)
spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(self._text_objects, self._size[0],self._spacing[0], self._spacing[1])
print(self._alignment_handler)
spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(
self._text_objects, self._size[0], self._spacing[0], self._spacing[1]
)
if not overflow:
# Word fits! Add it completely
self._words.append(word)
word.add_concete(text)
text.add_line(self)
self._position_render = position
self._spacing_render = spacing
return True, None # no overflow word is just added!
return True, None
_=self._text_objects.pop()
splits = [(Text(pair[0]+"-", word.style,self._draw, line=self, source=word), Text( pair[1], word.style, self._draw, line=self, source=word)) for pair in word.possible_hyphenation()]
# Word doesn't fit, remove it and try hyphenation
_ = self._text_objects.pop()
#worst case scenario!
if len(splits)==0 and len(word.text)>=6:
text = Text(word.text+"-", word.style, self._draw) # add hypen to know true length
word_length = sum([text.width for text in self._text_objects])
spacing_length = self._spacing[0] * (len(self._text_objects) - 1)
remaining=self._size[0] - word_length - spacing_length
fraction = remaining / text.width
# Calculate split position: fraction represents what portion of the hyphenated word fits
# We need to scale this to the original word length, accounting for the hyphen
hyphenated_length = len(word.text) + 1 # +1 for hyphen
split_in_hyphenated = round(fraction * hyphenated_length)
# Map back to original word, ensuring we don't go past the word length
spliter = min(len(word.text) - 1, max(1, split_in_hyphenated))
split = [Text(word.text[:spliter]+"-", word.style, self._draw, line=self, source=word), Text(word.text[spliter:], word.style, self._draw, line=self, source=word)]
self._text_objects.append(split[0])
word.add_concete(split)
split[0].add_line(self)
split[1].add_line(self)
self._spacing_render = self._spacing[0]
# Step 1: Try pyphen hyphenation
pyphen_splits = word.possible_hyphenation()
valid_splits = []
if pyphen_splits:
# Create Text objects for each possible split and check if they fit
for pair in pyphen_splits:
first_part_text = pair[0] + "-"
second_part_text = pair[1]
# Validate minimum character requirements
if len(pair[0]) < self._min_chars_before_hyphen:
continue
if len(pair[1]) < self._min_chars_after_hyphen:
continue
# Create Text objects
first_text = Text(first_part_text, word.style, self._draw, line=self, source=word)
second_text = Text(second_part_text, word.style, self._draw, line=self, source=word)
# Check if first part fits
self._text_objects.append(first_text)
spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(
self._text_objects, self._size[0], self._spacing[0], self._spacing[1]
)
_ = self._text_objects.pop()
if not overflow:
# This split fits! Add it to valid options
valid_splits.append((first_text, second_text, spacing, position))
# Step 2: If we have valid pyphen splits, choose the best one
if valid_splits:
# Select the split with the best (minimum) spacing
best_split = min(valid_splits, key=lambda x: x[2])
first_text, second_text, spacing, position = best_split
# Apply the split
self._text_objects.append(first_text)
first_text.line = self
word.add_concete((first_text, second_text))
self._spacing_render = spacing
self._position_render = position
return True, split[1] # we apply a brute force split
self._words.append(word)
return True, second_text
elif len(splits)==0 and len(word.text)<6:
return False, None # this endpoint means no words can be added.
# Step 3: Try brute force hyphenation (only for long words)
if len(word.text) >= self._min_word_length_for_brute_force:
# Calculate available space for the word
word_length = sum([text.width for text in self._text_objects])
spacing_length = self._spacing[0] * max(0, len(self._text_objects) - 1)
remaining = self._size[0] - word_length - spacing_length
spacings = []
positions = []
if remaining > 0:
# Create a hyphenated version to measure
test_text = Text(word.text + "-", word.style, self._draw)
for split in splits:
self._text_objects.append(split[0])
if test_text.width > 0:
# Calculate what fraction of the hyphenated word fits
fraction = remaining / test_text.width
spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(self._text_objects, self._size[0],self._spacing[0], self._spacing[1])
spacings.append(spacing)
positions.append(position)
_=self._text_objects.pop()
idx = int(np.argmin(spacings))
self._text_objects.append(splits[idx][0])
splits[idx][0].line=self
word.add_concete(splits[idx])
self._spacing_render = spacings[idx]
self._position_render = positions[idx]
self._words.append(word)
return True, splits[idx][1] # we apply a phyphenated split with best spacing
# Convert fraction to character position
# We need at least min_chars_before_hyphen and leave at least min_chars_after_hyphen
max_split_pos = len(word.text) - self._min_chars_after_hyphen
min_split_pos = self._min_chars_before_hyphen
# Calculate ideal split position based on available space
ideal_split = int(fraction * len(word.text))
split_pos = max(min_split_pos, min(ideal_split, max_split_pos))
# Ensure we meet minimum requirements
if (split_pos >= self._min_chars_before_hyphen and
len(word.text) - split_pos >= self._min_chars_after_hyphen):
# Create the split
first_part_text = word.text[:split_pos] + "-"
second_part_text = word.text[split_pos:]
first_text = Text(first_part_text, word.style, self._draw, line=self, source=word)
second_text = Text(second_part_text, word.style, self._draw, line=self, source=word)
# Verify the first part actually fits
self._text_objects.append(first_text)
spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(
self._text_objects, self._size[0], self._spacing[0], self._spacing[1]
)
if not overflow:
# Brute force split works!
first_text.line = self
second_text.line = self
word.add_concete((first_text, second_text))
self._spacing_render = spacing
self._position_render = position
self._words.append(word)
return True, second_text
else:
# Doesn't fit, remove it
_ = self._text_objects.pop()
# Step 4: Word cannot be hyphenated or split, move to next line
return False, None
def render(self):
"""
@ -422,9 +530,14 @@ class Line(Box):
# Start x_cursor at line origin plus any alignment offset
x_cursor = self._origin[0] + self._position_render
for text in self._text_objects:
for i, text in enumerate(self._text_objects):
# Update text draw context to current draw context
text._draw = self._draw
text.set_origin(np.array([x_cursor, y_cursor]))
text.render()
# Determine next text object for continuous decoration
next_text = self._text_objects[i + 1] if i + 1 < len(self._text_objects) else None
# Render with next text information for continuous underline/strikethrough
text.render(next_text, self._spacing_render)
x_cursor += self._spacing_render + text.width # x-spacing + width of text object

View File

@ -4,9 +4,9 @@ from typing import List, Tuple, Optional, Union
from pyWebLayout.concrete import Page, Line, Text
from pyWebLayout.abstract import Paragraph, Word, Link
from pyWebLayout.style.concrete_style import ConcreteStyleRegistry
from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver
def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None, alignment_override: Optional['Alignment'] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
"""
Layout a paragraph of text within a given page.
@ -18,6 +18,7 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
page: The page to layout the paragraph on
start_word: Index of the first word to process (for continuation)
pretext: Optional pretext from a previous hyphenated word
alignment_override: Optional alignment to override the paragraph's default alignment
Returns:
Tuple of:
@ -32,22 +33,71 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
if start_word >= len(paragraph.words):
return True, None, None
# Get the concrete style with resolved word spacing constraints
style_registry = ConcreteStyleRegistry(page.style_resolver)
concrete_style = style_registry.get_concrete_style(paragraph.style)
# paragraph.style is already a Font object (concrete), not AbstractStyle
# We need to get word spacing constraints from the Font's abstract style if available
# For now, use reasonable defaults based on font size
from pyWebLayout.style import Font, Alignment
# Extract word spacing constraints (min, max) for Line constructor
word_spacing_constraints = (
int(concrete_style.word_spacing_min),
int(concrete_style.word_spacing_max)
)
if isinstance(paragraph.style, Font):
# paragraph.style is already a Font (concrete style)
font = paragraph.style
# Use default word spacing constraints based on font size
# Minimum spacing should be proportional to font size for better readability
min_spacing = float(font.font_size) * 0.25 # 25% of font size
max_spacing = float(font.font_size) * 0.5 # 50% of font size
word_spacing_constraints = (int(min_spacing), int(max_spacing))
text_align = Alignment.LEFT # Default alignment
else:
# paragraph.style is an AbstractStyle, resolve it
rendering_context = RenderingContext(base_font_size=paragraph.style.font_size)
style_resolver = StyleResolver(rendering_context)
style_registry = ConcreteStyleRegistry(style_resolver)
concrete_style = style_registry.get_concrete_style(paragraph.style)
font = concrete_style.create_font()
word_spacing_constraints = (
int(concrete_style.word_spacing_min),
int(concrete_style.word_spacing_max)
)
text_align = concrete_style.text_align
def create_new_line(word: Optional[Union[Word, Text]] = None) -> Optional[Line]:
# Apply alignment override if provided
if alignment_override is not None:
text_align = alignment_override
# Cap font size to page maximum if needed
if font.font_size > page.style.max_font_size:
from pyWebLayout.style import Font
font = Font(
font_path=font._font_path,
font_size=page.style.max_font_size,
colour=font.colour,
weight=font.weight,
style=font.style,
decoration=font.decoration,
background=font.background
)
# Calculate baseline-to-baseline spacing using line spacing multiplier
# This is the vertical distance between baselines of consecutive lines
baseline_spacing = int(font.font_size * page.style.line_spacing_multiplier)
# Get font metrics for boundary checking
ascent, descent = font.font.getmetrics()
def create_new_line(word: Optional[Union[Word, Text]] = None, is_first_line: bool = False) -> Optional[Line]:
"""Helper function to create a new line, returns None if page is full."""
if not page.can_fit_line(paragraph.line_height):
# Check if this line's baseline and descenders would fit on the page
if not page.can_fit_line(baseline_spacing, ascent, descent):
return None
y_cursor = page._current_y_offset
# For the first line, position it so text starts at the top boundary
# For subsequent lines, use current y_offset which tracks baseline-to-baseline spacing
if is_first_line:
# Position line origin so that baseline (origin + ascent) is close to top
# We want minimal space above the text, so origin should be at boundary
y_cursor = page._current_y_offset
else:
y_cursor = page._current_y_offset
x_cursor = page.border_size
# Create a temporary Text object to calculate word width
@ -60,10 +110,10 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
return Line(
spacing=word_spacing_constraints,
origin=(x_cursor, y_cursor),
size=(page.available_width, paragraph.line_height),
size=(page.available_width, baseline_spacing),
draw=page.draw,
font=concrete_style.create_font(),
halign=concrete_style.text_align
font=font,
halign=text_align
)
# Create initial line
@ -72,15 +122,14 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
return False, start_word, pretext
page.add_child(current_line)
page._current_y_offset += paragraph.line_height
# Note: add_child already updates _current_y_offset based on child's origin and size
# No need to manually increment it here
# Track current position in paragraph
current_pretext = pretext
# Process words starting from start_word
for i, word in enumerate(paragraph.words[start_word:], start=start_word):
if current_pretext:
print(current_pretext.text)
success, overflow_text = current_line.add_word(word, current_pretext)
if success:
@ -93,7 +142,7 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
# If we can't create a new line, return with the current state
return False, i, overflow_text
page.add_child(current_line)
page._current_y_offset += paragraph.line_height
# Note: add_child already updates _current_y_offset
# Continue to the next word
continue
else:
@ -121,7 +170,7 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
continue
page.add_child(current_line)
page._current_y_offset += paragraph.line_height
# Note: add_child already updates _current_y_offset
# Try to add the word to the new line
success, overflow_text = current_line.add_word(word, current_pretext)
@ -142,55 +191,3 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
# All words processed successfully
return True, None, None
class DocumentLayouter:
"""
Class-based document layouter for more complex layout operations.
"""
def __init__(self, page: Page):
"""Initialize the layouter with a page."""
self.page = page
self.style_registry = ConcreteStyleRegistry(page.style_resolver)
def layout_paragraph(self, paragraph: Paragraph, start_word: int = 0, pretext: Optional[Text] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
"""
Layout a paragraph using the class-based approach.
This method provides the same functionality as the standalone function
but with better state management and reusability.
"""
return paragraph_layouter(paragraph, self.page, start_word, pretext)
def layout_document(self, paragraphs: List[Paragraph]) -> bool:
"""
Layout multiple paragraphs in sequence.
Args:
paragraphs: List of paragraphs to layout
Returns:
True if all paragraphs were laid out successfully, False otherwise
"""
for paragraph in paragraphs:
start_word = 0
pretext = None
while True:
complete, next_word, remaining_pretext = self.layout_paragraph(
paragraph, start_word, pretext
)
if complete:
# Paragraph finished
break
if next_word is None:
# Error condition
return False
# Continue on next page or handle page break
# For now, we'll just return False indicating we need more space
return False
return True

View File

@ -27,6 +27,7 @@ from pyWebLayout.concrete.page import Page
from pyWebLayout.concrete.text import Line, Text
from pyWebLayout.style.page_style import PageStyle
from pyWebLayout.style import Font
from pyWebLayout.layout.document_layouter import paragraph_layouter
@dataclass
@ -212,11 +213,12 @@ class BidirectionalLayouter:
Handles font scaling and maintains position state.
"""
def __init__(self, blocks: List[Block], page_style: PageStyle, page_size: Tuple[int, int] = (800, 600)):
def __init__(self, blocks: List[Block], page_style: PageStyle, page_size: Tuple[int, int] = (800, 600), alignment_override=None):
self.blocks = blocks
self.page_style = page_style
self.page_size = page_size
self.chapter_navigator = ChapterNavigator(blocks)
self.alignment_override = alignment_override
def render_page_forward(self, position: RenderingPosition, font_scale: float = 1.0) -> Tuple[Page, RenderingPosition]:
"""
@ -328,54 +330,66 @@ class BidirectionalLayouter:
return True, new_pos
def _layout_paragraph_on_page(self, paragraph: Paragraph, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]:
"""Layout a paragraph on the page with font scaling support"""
# This would integrate with the existing paragraph_layouter but with font scaling
# For now, this is a placeholder implementation
"""
Layout a paragraph on the page using the core paragraph_layouter.
Integrates font scaling and position tracking with the proven layout logic.
# Calculate scaled line height
line_height = int(paragraph.style.font_size * font_scale * 1.2) # 1.2 is line spacing factor
Args:
paragraph: The paragraph to layout (already scaled if font_scale != 1.0)
page: The page to layout on
position: Current rendering position
font_scale: Font scaling factor (used for context, paragraph should already be scaled)
if not page.can_fit_line(line_height):
return False, position
Returns:
Tuple of (success, new_position)
"""
# Convert remaining_pretext from string to Text object if needed
pretext_obj = None
if position.remaining_pretext:
# Create a Text object from the pretext string
pretext_obj = Text(
position.remaining_pretext,
paragraph.style,
page.draw,
line=None,
source=None
)
# Create a line and try to fit words
y_cursor = page._current_y_offset
x_cursor = page.border_size
# Scale word spacing constraints
word_spacing = FontScaler.scale_word_spacing((5, 15), font_scale) # Default spacing
line = Line(
spacing=word_spacing,
origin=(x_cursor, y_cursor),
size=(page.available_width, line_height),
draw=page.draw,
font=FontScaler.scale_font(paragraph.style, font_scale)
# Call the core paragraph layouter with alignment override if set
success, failed_word_index, remaining_pretext = paragraph_layouter(
paragraph,
page,
start_word=position.word_index,
pretext=pretext_obj,
alignment_override=self.alignment_override
)
# Add words starting from position.word_index
words_added = 0
for i, word in enumerate(paragraph.words[position.word_index:], start=position.word_index):
success, overflow = line.add_word(word)
if not success:
break
words_added += 1
if words_added > 0:
page.add_child(line)
page._current_y_offset += line_height
new_pos = position.copy()
new_pos.word_index += words_added
# If we finished the paragraph, move to next block
if new_pos.word_index >= len(paragraph.words):
new_pos.block_index += 1
new_pos.word_index = 0
# Create new position based on the result
new_pos = position.copy()
if success:
# Paragraph was fully laid out, move to next block
new_pos.block_index += 1
new_pos.word_index = 0
new_pos.remaining_pretext = None
return True, new_pos
else:
# Paragraph was not fully laid out
if failed_word_index is not None:
# Update position to the word that didn't fit
new_pos.word_index = failed_word_index
return False, position
# Convert Text object back to string if there's remaining pretext
if remaining_pretext is not None and hasattr(remaining_pretext, 'text'):
new_pos.remaining_pretext = remaining_pretext.text
else:
new_pos.remaining_pretext = None
return False, new_pos
else:
# No specific word failed, but layout wasn't successful
# This shouldn't normally happen, but handle it gracefully
return False, position
def _layout_heading_on_page(self, heading: Heading, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]:
"""Layout a heading on the page"""

View File

@ -23,6 +23,10 @@ class PageStyle:
# Background color
background_color: Tuple[int, int, int] = (255, 255, 255)
# Typography properties
max_font_size: int = 72 # Maximum font size allowed on a page
line_spacing_multiplier: float = 1.2 # Baseline-to-baseline spacing multiplier
@property
def padding_top(self) -> int:
return self.padding[0]

View File

@ -245,6 +245,79 @@ class TestPageImplementation(unittest.TestCase):
for i, child in enumerate(page.children):
self.assertEqual(child._text, f"Child {i}")
def test_page_can_fit_line_boundary_checking(self):
"""Test that can_fit_line correctly checks bottom boundary"""
# Create page with known dimensions
# Page: 800x600, border: 40, padding: (10, 10, 10, 10)
# Content area starts at y=50 (border + padding_top = 40 + 10)
# Content area ends at y=550 (height - border - padding_bottom = 600 - 40 - 10)
style = PageStyle(
border_width=40,
padding=(10, 10, 10, 10)
)
page = Page(size=(800, 600), style=style)
# Initial y_offset should be at border + padding_top = 50
self.assertEqual(page._current_y_offset, 50)
# Test 1: Line that fits comfortably
line_height = 20
max_y = 600 - 40 - 10 # 550
self.assertTrue(page.can_fit_line(line_height))
# Would end at 50 + 20 = 70, well within 550
# Test 2: Simulate adding lines to fill the page
# Available height: 550 - 50 = 500 pixels
# With 20-pixel lines, we can fit 25 lines exactly
for i in range(24): # Add 24 lines
self.assertTrue(page.can_fit_line(20), f"Line {i+1} should fit")
# Simulate adding a line by updating y_offset
page._current_y_offset += 20
# After 24 lines: y_offset = 50 + (24 * 20) = 530
self.assertEqual(page._current_y_offset, 530)
# Test 3: One more 20-pixel line should fit (530 + 20 = 550, exactly at boundary)
self.assertTrue(page.can_fit_line(20))
page._current_y_offset += 20
self.assertEqual(page._current_y_offset, 550)
# Test 4: Now another line should NOT fit (550 + 20 = 570 > 550)
self.assertFalse(page.can_fit_line(20))
# Test 5: Even a 1-pixel line should not fit (550 + 1 = 551 > 550)
self.assertFalse(page.can_fit_line(1))
# Test 6: Edge case - exactly at boundary, 0-height line should fit
self.assertTrue(page.can_fit_line(0))
def test_page_can_fit_line_with_different_styles(self):
"""Test can_fit_line with different page styles"""
# Test with no border or padding
style_no_border = PageStyle(border_width=0, padding=(0, 0, 0, 0))
page_no_border = Page(size=(100, 100), style=style_no_border)
# With no border/padding, y_offset starts at 0
self.assertEqual(page_no_border._current_y_offset, 0)
# Can fit a 100-pixel line exactly
self.assertTrue(page_no_border.can_fit_line(100))
# Cannot fit a 101-pixel line
self.assertFalse(page_no_border.can_fit_line(101))
# Test with large border and padding
style_large = PageStyle(border_width=20, padding=(15, 15, 15, 15))
page_large = Page(size=(200, 200), style=style_large)
# y_offset starts at border + padding_top = 20 + 15 = 35
self.assertEqual(page_large._current_y_offset, 35)
# Max y = 200 - 20 - 15 = 165
# Available height = 165 - 35 = 130 pixels
self.assertTrue(page_large.can_fit_line(130))
self.assertFalse(page_large.can_fit_line(131))
if __name__ == '__main__':
unittest.main()

View File

@ -16,43 +16,12 @@ from pyWebLayout.layout.document_layouter import paragraph_layouter, DocumentLay
from pyWebLayout.style.abstract_style import AbstractStyle
from pyWebLayout.style.concrete_style import ConcreteStyle, StyleResolver, RenderingContext
from pyWebLayout.style.fonts import Font
from pyWebLayout.style.page_style import PageStyle
from pyWebLayout.concrete.page import Page
from pyWebLayout.concrete.text import Line, Text
from pyWebLayout.abstract.inline import Word
class MockPage:
"""A realistic mock page that behaves like a real page."""
def __init__(self, width=400, height=600, max_lines=20):
self.border_size = 20
self._current_y_offset = 50
self.available_width = width
self.available_height = height
self.max_lines = max_lines
self.lines_added = 0
self.children = []
# Create a real drawing context
self.image = Image.new('RGB', (width + 40, height + 100), 'white')
self.draw = ImageDraw.Draw(self.image)
# Create a real style resolver
context = RenderingContext(base_font_size=16)
self.style_resolver = StyleResolver(context)
def can_fit_line(self, line_height):
"""Check if another line can fit on the page."""
remaining_height = self.available_height - self._current_y_offset
can_fit = remaining_height >= line_height and self.lines_added < self.max_lines
return can_fit
def add_child(self, child):
"""Add a child element (like a Line) to the page."""
self.children.append(child)
self.lines_added += 1
return True
class MockWord(Word):
"""A simple mock word that extends the real Word class."""
@ -106,8 +75,9 @@ class TestDocumentLayouterIntegration:
def test_single_page_layout_with_real_components(self):
"""Test layout on a single page using real Line and Text objects."""
# Create a page that can fit content
page = MockPage(width=500, height=400, max_lines=10)
# Create a real page that can fit content
page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
page = Page(size=(500, 400), style=page_style)
# Create a paragraph with realistic content
paragraph = MockParagraph(
@ -125,7 +95,6 @@ class TestDocumentLayouterIntegration:
# Verify lines were added to page
assert len(page.children) > 0
assert page.lines_added > 0
# Verify actual Line objects were created
for child in page.children:
@ -135,8 +104,9 @@ class TestDocumentLayouterIntegration:
def test_multi_page_scenario_with_page_overflow(self):
"""Test realistic multi-page scenario with actual page overflow."""
# Create a very small page that will definitely overflow
small_page = MockPage(width=150, height=80, max_lines=1) # Extremely small page
# Create a very small real page that will definitely overflow
small_page_style = PageStyle(border_width=5, padding=(5, 5, 5, 5))
small_page = Page(size=(150, 80), style=small_page_style)
# Create a long paragraph that will definitely overflow
long_text = " ".join([f"verylongword{i:02d}" for i in range(20)]) # 20 long words
@ -157,13 +127,13 @@ class TestDocumentLayouterIntegration:
# If it failed, verify overflow handling
assert failed_word_index is not None # Should indicate where it failed
assert failed_word_index < len(paragraph.words) # Should be within word range
assert len(small_page.children) <= small_page.max_lines
print(f"✓ Multi-page test: Page overflow at word {failed_word_index}, {len(small_page.children)} lines fit")
def test_word_spacing_constraints_in_real_lines(self):
"""Test that word spacing constraints are properly used in real Line objects."""
# Create page
page = MockPage(width=400, height=300)
# Create real page
page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
page = Page(size=(400, 300), style=page_style)
# Create paragraph with specific spacing constraints
paragraph = MockParagraph(
@ -197,7 +167,8 @@ class TestDocumentLayouterIntegration:
]
for alignment_name, style in alignments_to_test:
page = MockPage(width=350, height=200)
page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
page = Page(size=(350, 200), style=page_style)
paragraph = MockParagraph(
"This sentence will test different alignment strategies with word spacing.",
style
@ -217,8 +188,9 @@ class TestDocumentLayouterIntegration:
def test_realistic_document_with_multiple_pages(self):
"""Test a realistic document that spans multiple pages."""
# Create multiple pages
pages = [MockPage(width=400, height=300, max_lines=5) for _ in range(3)]
# Create multiple real pages
page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
pages = [Page(size=(400, 300), style=page_style) for _ in range(3)]
# Create a document with multiple paragraphs
paragraphs = [
@ -275,7 +247,8 @@ class TestDocumentLayouterIntegration:
def test_word_spacing_constraint_resolution_integration(self):
"""Test the complete integration from AbstractStyle to Line spacing."""
page = MockPage()
page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
page = Page(size=(400, 600), style=page_style)
# Test different constraint scenarios
test_cases = [
@ -300,8 +273,9 @@ class TestDocumentLayouterIntegration:
]
for case in test_cases:
# Create fresh page for each test
test_page = MockPage()
# Create fresh real page for each test
test_page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
test_page = Page(size=(400, 600), style=test_page_style)
paragraph = MockParagraph(
"Testing constraint resolution with different scenarios.",
case["style"]
@ -322,8 +296,9 @@ class TestDocumentLayouterIntegration:
def test_hyphenation_with_word_spacing_constraints(self):
"""Test that hyphenation works correctly with word spacing constraints."""
# Create a narrow page to force hyphenation
narrow_page = MockPage(width=200, height=300)
# Create a narrow real page to force hyphenation
narrow_page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10))
narrow_page = Page(size=(200, 300), style=narrow_page_style)
# Create paragraph with long words that will need hyphenation
paragraph = MockParagraph(