pyWebLayout/pyWebLayout/layout/document_layouter.py
Duncan Tourolle 37505d3dcc
Some checks failed
Python CI / test (push) Failing after 7m45s
Fix tests for CI?
2025-11-04 22:30:04 +01:00

356 lines
14 KiB
Python

from __future__ import annotations
from typing import List, Tuple, Optional, Union
from pyWebLayout.concrete import Page, Line, Text
from pyWebLayout.concrete.image import RenderableImage
from pyWebLayout.concrete.functional import LinkText
from pyWebLayout.abstract import Paragraph, Word, Link
from pyWebLayout.abstract.block import Image as AbstractImage
from pyWebLayout.abstract.inline import LinkedWord
from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver
def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None, alignment_override: Optional['Alignment'] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
"""
Layout a paragraph of text within a given page.
This function extracts word spacing constraints from the style system
and uses them to create properly spaced lines of text.
Args:
paragraph: The paragraph to layout
page: The page to layout the paragraph on
start_word: Index of the first word to process (for continuation)
pretext: Optional pretext from a previous hyphenated word
alignment_override: Optional alignment to override the paragraph's default alignment
Returns:
Tuple of:
- bool: True if paragraph was completely laid out, False if page ran out of space
- Optional[int]: Index of first word that didn't fit (if any)
- Optional[Text]: Remaining pretext if word was hyphenated (if any)
"""
if not paragraph.words:
return True, None, None
# Validate inputs
if start_word >= len(paragraph.words):
return True, None, None
# paragraph.style is already a Font object (concrete), not AbstractStyle
# We need to get word spacing constraints from the Font's abstract style if available
# For now, use reasonable defaults based on font size
from pyWebLayout.style import Font, Alignment
if isinstance(paragraph.style, Font):
# paragraph.style is already a Font (concrete style)
font = paragraph.style
# Use default word spacing constraints based on font size
# Minimum spacing should be proportional to font size for better readability
min_spacing = float(font.font_size) * 0.25 # 25% of font size
max_spacing = float(font.font_size) * 0.5 # 50% of font size
word_spacing_constraints = (int(min_spacing), int(max_spacing))
text_align = Alignment.LEFT # Default alignment
else:
# paragraph.style is an AbstractStyle, resolve it
# Ensure font_size is an int (it could be a FontSize enum)
from pyWebLayout.style.abstract_style import FontSize
if isinstance(paragraph.style.font_size, FontSize):
# Use a default base font size, the resolver will handle the semantic size
base_font_size = 16
else:
base_font_size = int(paragraph.style.font_size)
rendering_context = RenderingContext(base_font_size=base_font_size)
style_resolver = StyleResolver(rendering_context)
style_registry = ConcreteStyleRegistry(style_resolver)
concrete_style = style_registry.get_concrete_style(paragraph.style)
font = concrete_style.create_font()
word_spacing_constraints = (
int(concrete_style.word_spacing_min),
int(concrete_style.word_spacing_max)
)
text_align = concrete_style.text_align
# Apply alignment override if provided
if alignment_override is not None:
text_align = alignment_override
# Cap font size to page maximum if needed
if font.font_size > page.style.max_font_size:
from pyWebLayout.style import Font
font = Font(
font_path=font._font_path,
font_size=page.style.max_font_size,
colour=font.colour,
weight=font.weight,
style=font.style,
decoration=font.decoration,
background=font.background
)
# Calculate baseline-to-baseline spacing using line spacing multiplier
# This is the vertical distance between baselines of consecutive lines
baseline_spacing = int(font.font_size * page.style.line_spacing_multiplier)
# Get font metrics for boundary checking
ascent, descent = font.font.getmetrics()
def create_new_line(word: Optional[Union[Word, Text]] = None, is_first_line: bool = False) -> Optional[Line]:
"""Helper function to create a new line, returns None if page is full."""
# Check if this line's baseline and descenders would fit on the page
if not page.can_fit_line(baseline_spacing, ascent, descent):
return None
# For the first line, position it so text starts at the top boundary
# For subsequent lines, use current y_offset which tracks baseline-to-baseline spacing
if is_first_line:
# Position line origin so that baseline (origin + ascent) is close to top
# We want minimal space above the text, so origin should be at boundary
y_cursor = page._current_y_offset
else:
y_cursor = page._current_y_offset
x_cursor = page.border_size
# Create a temporary Text object to calculate word width
if word:
temp_text = Text.from_word(word, page.draw)
word_width = temp_text.width
else:
word_width = 0
return Line(
spacing=word_spacing_constraints,
origin=(x_cursor, y_cursor),
size=(page.available_width, baseline_spacing),
draw=page.draw,
font=font,
halign=text_align
)
# Create initial line
current_line = create_new_line()
if not current_line:
return False, start_word, pretext
page.add_child(current_line)
# Note: add_child already updates _current_y_offset based on child's origin and size
# No need to manually increment it here
# Track current position in paragraph
current_pretext = pretext
# Process words starting from start_word
for i, word in enumerate(paragraph.words[start_word:], start=start_word):
# Check if this is a LinkedWord and needs special handling in concrete layer
# Note: The Line.add_word method will create Text objects internally,
# but we may want to create LinkText for LinkedWord instances in future
# For now, the abstract layer (LinkedWord) carries the link info,
# and the concrete layer (LinkText) would be created during rendering
success, overflow_text = current_line.add_word(word, current_pretext)
if success:
# Word fit successfully
if overflow_text is not None:
# If there's overflow text, we need to start a new line with it
current_pretext = overflow_text
current_line = create_new_line(overflow_text)
if not current_line:
# If we can't create a new line, return with the current state
return False, i, overflow_text
page.add_child(current_line)
# Note: add_child already updates _current_y_offset
# Continue to the next word
continue
else:
# No overflow, clear pretext
current_pretext = None
else:
# Word didn't fit, need a new line
current_line = create_new_line(word)
if not current_line:
# Page is full, return current position
return False, i, overflow_text
# Check if the word will fit on the new line before adding it
temp_text = Text.from_word(word, page.draw)
if temp_text.width > current_line.size[0]:
# Word is too wide for the line, we need to hyphenate it
if len(word.text) >= 6:
# Try to hyphenate the word
splits = [(Text(pair[0], word.style, page.draw, line=current_line, source=word), Text(pair[1], word.style, page.draw, line=current_line, source=word)) for pair in word.possible_hyphenation()]
if len(splits) > 0:
# Use the first hyphenation point
first_part, second_part = splits[0]
current_line.add_word(word, first_part)
current_pretext = second_part
continue
page.add_child(current_line)
# Note: add_child already updates _current_y_offset
# Try to add the word to the new line
success, overflow_text = current_line.add_word(word, current_pretext)
if not success:
# Word still doesn't fit even on a new line
# This might happen with very long words or narrow pages
if overflow_text:
# Word was hyphenated, continue with the overflow
current_pretext = overflow_text
continue
else:
# Word cannot be broken, skip it or handle as error
# For now, we'll return indicating we couldn't process this word
return False, i, None
else:
current_pretext = overflow_text # May be None or hyphenated remainder
# All words processed successfully
return True, None, None
def image_layouter(image: AbstractImage, page: Page, max_width: Optional[int] = None,
max_height: Optional[int] = None) -> bool:
"""
Layout an image within a given page.
This function places an image on the page, respecting size constraints
and available space. Images are centered horizontally by default.
Args:
image: The abstract Image object to layout
page: The page to layout the image on
max_width: Maximum width constraint (defaults to page available width)
max_height: Maximum height constraint (defaults to remaining page height)
Returns:
bool: True if image was successfully laid out, False if page ran out of space
"""
from pyWebLayout.style import Alignment
# Use page available width if max_width not specified
if max_width is None:
max_width = page.available_width
# Calculate available height on page
available_height = page.size[1] - page._current_y_offset - page.border_size
if max_height is None:
max_height = available_height
else:
max_height = min(max_height, available_height)
# Calculate scaled dimensions
scaled_width, scaled_height = image.calculate_scaled_dimensions(max_width, max_height)
# Check if image fits on current page
if scaled_height is None or scaled_height > available_height:
return False
# Create renderable image
x_offset = page.border_size
y_offset = page._current_y_offset
# Access page.draw to ensure canvas is initialized
_ = page.draw
renderable_image = RenderableImage(
image=image,
canvas=page._canvas,
max_width=max_width,
max_height=max_height,
origin=(x_offset, y_offset),
size=(scaled_width or max_width, scaled_height or max_height),
halign=Alignment.CENTER,
valign=Alignment.TOP
)
# Add to page
page.add_child(renderable_image)
return True
class DocumentLayouter:
"""
Document layouter that orchestrates layout of various abstract elements.
Delegates to specialized layouters for different content types:
- paragraph_layouter for text paragraphs
- image_layouter for images (future)
- table_layouter for tables (future)
This class acts as a coordinator, managing the overall document flow
and page context while delegating specific layout tasks to specialized
layouter functions.
"""
def __init__(self, page: Page):
"""
Initialize the document layouter with a page.
Args:
page: The page to layout content on
"""
self.page = page
self.style_registry = ConcreteStyleRegistry(page.style_resolver)
def layout_paragraph(self, paragraph: Paragraph, start_word: int = 0,
pretext: Optional[Text] = None) -> Tuple[bool, Optional[int], Optional[Text]]:
"""
Layout a paragraph using the paragraph_layouter.
Args:
paragraph: The paragraph to layout
start_word: Index of the first word to process (for continuation)
pretext: Optional pretext from a previous hyphenated word
Returns:
Tuple of (success, failed_word_index, remaining_pretext)
"""
return paragraph_layouter(paragraph, self.page, start_word, pretext)
def layout_image(self, image: AbstractImage, max_width: Optional[int] = None,
max_height: Optional[int] = None) -> bool:
"""
Layout an image using the image_layouter.
Args:
image: The abstract Image object to layout
max_width: Maximum width constraint (defaults to page available width)
max_height: Maximum height constraint (defaults to remaining page height)
Returns:
bool: True if image was successfully laid out, False if page ran out of space
"""
return image_layouter(image, self.page, max_width, max_height)
def layout_document(self, elements: List[Union[Paragraph, AbstractImage]]) -> bool:
"""
Layout a list of abstract elements (paragraphs and images).
This method delegates to specialized layouters based on element type:
- Paragraphs are handled by layout_paragraph
- Images are handled by layout_image
- Tables and other elements can be added in the future
Args:
elements: List of abstract elements to layout
Returns:
True if all elements were successfully laid out, False otherwise
"""
for element in elements:
if isinstance(element, Paragraph):
success, _, _ = self.layout_paragraph(element)
if not success:
return False
elif isinstance(element, AbstractImage):
success = self.layout_image(element)
if not success:
return False
# Future: elif isinstance(element, Table): use table_layouter
# Future: elif isinstance(element, CodeBlock): use code_layouter
return True