This commit is contained in:
parent
899182152a
commit
4e65fe3e67
214
html_browser.py
214
html_browser.py
@ -25,8 +25,11 @@ from pyWebLayout.concrete import (
|
|||||||
from pyWebLayout.abstract.functional import (
|
from pyWebLayout.abstract.functional import (
|
||||||
Link, Button, Form, FormField, LinkType, FormFieldType
|
Link, Button, Form, FormField, LinkType, FormFieldType
|
||||||
)
|
)
|
||||||
|
from pyWebLayout.abstract.block import Paragraph
|
||||||
|
from pyWebLayout.abstract.inline import Word
|
||||||
from pyWebLayout.style.fonts import Font, FontWeight, FontStyle, TextDecoration
|
from pyWebLayout.style.fonts import Font, FontWeight, FontStyle, TextDecoration
|
||||||
from pyWebLayout.style.layout import Alignment
|
from pyWebLayout.style.layout import Alignment
|
||||||
|
from pyWebLayout.typesetting.paragraph_layout import ParagraphLayout, ParagraphLayoutResult
|
||||||
|
|
||||||
|
|
||||||
class HTMLParser:
|
class HTMLParser:
|
||||||
@ -39,7 +42,7 @@ class HTMLParser:
|
|||||||
def parse_html_string(self, html_content: str, base_url: str = "") -> Page:
|
def parse_html_string(self, html_content: str, base_url: str = "") -> Page:
|
||||||
"""Parse HTML string and return a Page object"""
|
"""Parse HTML string and return a Page object"""
|
||||||
# Create the main page
|
# Create the main page
|
||||||
page = Page(size=(800, 1600), background_color=(255, 255, 255))
|
page = Page(size=(800, 10000), background_color=(255, 255, 255))
|
||||||
self.current_container = page
|
self.current_container = page
|
||||||
self.base_url = base_url
|
self.base_url = base_url
|
||||||
|
|
||||||
@ -76,7 +79,7 @@ class HTMLParser:
|
|||||||
return self.parse_html_string(html_content, base_url)
|
return self.parse_html_string(html_content, base_url)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Create error page
|
# Create error page
|
||||||
page = Page(size=(800, 1600), background_color=(255, 255, 255))
|
page = Page(size=(800, 10000), background_color=(255, 255, 255))
|
||||||
error_text = Text(f"Error loading file: {str(e)}", Font(font_size=16, colour=(255, 0, 0)))
|
error_text = Text(f"Error loading file: {str(e)}", Font(font_size=16, colour=(255, 0, 0)))
|
||||||
page.add_child(error_text)
|
page.add_child(error_text)
|
||||||
return page
|
return page
|
||||||
@ -86,21 +89,210 @@ class HTMLParser:
|
|||||||
# Simple token-based parsing
|
# Simple token-based parsing
|
||||||
tokens = self._tokenize_html(content)
|
tokens = self._tokenize_html(content)
|
||||||
|
|
||||||
|
# Group tokens into paragraphs and other elements
|
||||||
|
self._process_tokens_into_elements(tokens, container)
|
||||||
|
|
||||||
|
def _process_tokens_into_elements(self, tokens: List[Dict], container: Container):
|
||||||
|
"""Process tokens and create appropriate elements (paragraphs, images, etc.)"""
|
||||||
i = 0
|
i = 0
|
||||||
|
current_paragraph_content = []
|
||||||
|
|
||||||
while i < len(tokens):
|
while i < len(tokens):
|
||||||
token = tokens[i]
|
token = tokens[i]
|
||||||
|
|
||||||
if token['type'] == 'text':
|
if token['type'] == 'text':
|
||||||
if token['content'].strip(): # Only add non-empty text
|
if token['content'].strip(): # Only add non-empty text
|
||||||
text_obj = Text(token['content'].strip(), self.font_stack[-1])
|
current_paragraph_content.append((token['content'].strip(), self.font_stack[-1]))
|
||||||
container.add_child(text_obj)
|
|
||||||
|
|
||||||
elif token['type'] == 'tag':
|
elif token['type'] == 'tag':
|
||||||
# Handle the tag and potentially parse content between opening and closing tags
|
tag_name = token['name']
|
||||||
i = self._handle_tag_with_content(token, tokens, i, container)
|
is_closing = token['closing']
|
||||||
continue
|
|
||||||
|
# Handle block-level elements that should end the current paragraph
|
||||||
|
if tag_name in ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'br', 'img'] and not is_closing:
|
||||||
|
# Finalize any pending paragraph content
|
||||||
|
if current_paragraph_content:
|
||||||
|
self._create_and_add_paragraph(current_paragraph_content, container)
|
||||||
|
current_paragraph_content = []
|
||||||
|
|
||||||
|
# Handle the block element
|
||||||
|
if tag_name == 'p':
|
||||||
|
# Start a new paragraph
|
||||||
|
i = self._handle_paragraph_tag(token, tokens, i, container)
|
||||||
|
continue
|
||||||
|
elif tag_name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
||||||
|
# Handle header
|
||||||
|
i = self._handle_header_tag(token, tokens, i, container)
|
||||||
|
continue
|
||||||
|
elif tag_name == 'br':
|
||||||
|
# Add line break
|
||||||
|
spacer = Box((0, 0), (1, 10))
|
||||||
|
container.add_child(spacer)
|
||||||
|
elif tag_name == 'img':
|
||||||
|
# Handle image
|
||||||
|
self._handle_tag(token, container)
|
||||||
|
elif tag_name == 'div':
|
||||||
|
# Continue processing div content
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Handle inline elements or continue processing
|
||||||
|
elif tag_name in ['b', 'strong', 'i', 'em', 'u', 'a']:
|
||||||
|
i = self._handle_inline_tag_with_content(token, tokens, i, current_paragraph_content)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Handle other tags normally
|
||||||
|
self._handle_tag(token, container)
|
||||||
|
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
|
# Finalize any remaining paragraph content
|
||||||
|
if current_paragraph_content:
|
||||||
|
self._create_and_add_paragraph(current_paragraph_content, container)
|
||||||
|
|
||||||
|
def _create_and_add_paragraph(self, content_list: List[Tuple[str, Font]], container: Container):
|
||||||
|
"""Create a paragraph from content and add it to the container using proper layout"""
|
||||||
|
if not content_list:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create a paragraph object
|
||||||
|
paragraph = Paragraph(style=content_list[0][1]) # Use first font as paragraph style
|
||||||
|
|
||||||
|
# Add words to the paragraph
|
||||||
|
for text_content, font in content_list:
|
||||||
|
words = text_content.split()
|
||||||
|
for word_text in words:
|
||||||
|
if word_text.strip():
|
||||||
|
word = Word(word_text.strip(), font)
|
||||||
|
paragraph.add_word(word)
|
||||||
|
|
||||||
|
# Use paragraph layout to break into lines
|
||||||
|
layout = ParagraphLayout(
|
||||||
|
line_width=750, # Page width minus margins
|
||||||
|
line_height=20,
|
||||||
|
word_spacing=(3, 8),
|
||||||
|
line_spacing=3,
|
||||||
|
halign=Alignment.LEFT
|
||||||
|
)
|
||||||
|
|
||||||
|
# Layout the paragraph into lines
|
||||||
|
lines = layout.layout_paragraph(paragraph)
|
||||||
|
|
||||||
|
# Add each line to the container
|
||||||
|
for line in lines:
|
||||||
|
container.add_child(line)
|
||||||
|
|
||||||
|
# Add some space after the paragraph
|
||||||
|
spacer = Box((0, 0), (1, 5))
|
||||||
|
container.add_child(spacer)
|
||||||
|
|
||||||
|
def _handle_paragraph_tag(self, token, tokens, current_index, container):
|
||||||
|
"""Handle paragraph tags with proper text flow"""
|
||||||
|
content_start = current_index + 1
|
||||||
|
content_end = self._find_matching_closing_tag(tokens, current_index, 'p')
|
||||||
|
|
||||||
|
# Collect content within the paragraph
|
||||||
|
paragraph_content = []
|
||||||
|
|
||||||
|
i = content_start
|
||||||
|
while i < content_end:
|
||||||
|
content_token = tokens[i]
|
||||||
|
if content_token['type'] == 'text':
|
||||||
|
if content_token['content'].strip():
|
||||||
|
paragraph_content.append((content_token['content'].strip(), self.font_stack[-1]))
|
||||||
|
elif content_token['type'] == 'tag' and not content_token['closing']:
|
||||||
|
# Handle inline formatting within paragraph
|
||||||
|
if content_token['name'] in ['b', 'strong', 'i', 'em', 'u', 'a']:
|
||||||
|
i = self._handle_inline_tag_with_content(content_token, tokens, i, paragraph_content)
|
||||||
|
continue
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Create and add the paragraph
|
||||||
|
if paragraph_content:
|
||||||
|
self._create_and_add_paragraph(paragraph_content, container)
|
||||||
|
|
||||||
|
return content_end + 1 if content_end < len(tokens) else len(tokens)
|
||||||
|
|
||||||
|
def _handle_header_tag(self, token, tokens, current_index, container):
|
||||||
|
"""Handle header tags with proper styling"""
|
||||||
|
tag_name = token['name']
|
||||||
|
|
||||||
|
# Push header font onto stack
|
||||||
|
size_map = {'h1': 24, 'h2': 20, 'h3': 18, 'h4': 16, 'h5': 14, 'h6': 12}
|
||||||
|
font = self.font_stack[-1].with_size(size_map[tag_name]).with_weight(FontWeight.BOLD)
|
||||||
|
self.font_stack.append(font)
|
||||||
|
|
||||||
|
content_start = current_index + 1
|
||||||
|
content_end = self._find_matching_closing_tag(tokens, current_index, tag_name)
|
||||||
|
|
||||||
|
# Collect header content
|
||||||
|
header_content = []
|
||||||
|
|
||||||
|
i = content_start
|
||||||
|
while i < content_end:
|
||||||
|
content_token = tokens[i]
|
||||||
|
if content_token['type'] == 'text':
|
||||||
|
if content_token['content'].strip():
|
||||||
|
header_content.append((content_token['content'].strip(), self.font_stack[-1]))
|
||||||
|
elif content_token['type'] == 'tag' and not content_token['closing']:
|
||||||
|
# Handle inline formatting within header
|
||||||
|
if content_token['name'] in ['b', 'strong', 'i', 'em', 'u']:
|
||||||
|
i = self._handle_inline_tag_with_content(content_token, tokens, i, header_content)
|
||||||
|
continue
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Pop the header font
|
||||||
|
if len(self.font_stack) > 1:
|
||||||
|
self.font_stack.pop()
|
||||||
|
|
||||||
|
# Create and add the header paragraph with extra spacing
|
||||||
|
if header_content:
|
||||||
|
self._create_and_add_paragraph(header_content, container)
|
||||||
|
# Add extra space after headers
|
||||||
|
spacer = Box((0, 0), (1, 10))
|
||||||
|
container.add_child(spacer)
|
||||||
|
|
||||||
|
return content_end + 1 if content_end < len(tokens) else len(tokens)
|
||||||
|
|
||||||
|
def _handle_inline_tag_with_content(self, token, tokens, current_index, paragraph_content):
|
||||||
|
"""Handle inline formatting tags and collect their content"""
|
||||||
|
tag_name = token['name']
|
||||||
|
|
||||||
|
# Push formatted font onto stack
|
||||||
|
if tag_name in ['b', 'strong']:
|
||||||
|
font = self.font_stack[-1].with_weight(FontWeight.BOLD)
|
||||||
|
self.font_stack.append(font)
|
||||||
|
elif tag_name in ['i', 'em']:
|
||||||
|
font = self.font_stack[-1].with_style(FontStyle.ITALIC)
|
||||||
|
self.font_stack.append(font)
|
||||||
|
elif tag_name == 'u':
|
||||||
|
font = self.font_stack[-1].with_decoration(TextDecoration.UNDERLINE)
|
||||||
|
self.font_stack.append(font)
|
||||||
|
elif tag_name == 'a':
|
||||||
|
font = self.font_stack[-1].with_colour((0, 0, 255)).with_decoration(TextDecoration.UNDERLINE)
|
||||||
|
self.font_stack.append(font)
|
||||||
|
|
||||||
|
content_start = current_index + 1
|
||||||
|
content_end = self._find_matching_closing_tag(tokens, current_index, tag_name)
|
||||||
|
|
||||||
|
# Collect content with the formatting applied
|
||||||
|
i = content_start
|
||||||
|
while i < content_end:
|
||||||
|
content_token = tokens[i]
|
||||||
|
if content_token['type'] == 'text':
|
||||||
|
if content_token['content'].strip():
|
||||||
|
paragraph_content.append((content_token['content'].strip(), self.font_stack[-1]))
|
||||||
|
elif content_token['type'] == 'tag' and not content_token['closing']:
|
||||||
|
# Handle nested inline formatting
|
||||||
|
if content_token['name'] in ['b', 'strong', 'i', 'em', 'u']:
|
||||||
|
i = self._handle_inline_tag_with_content(content_token, tokens, i, paragraph_content)
|
||||||
|
continue
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Pop the formatting font
|
||||||
|
if len(self.font_stack) > 1:
|
||||||
|
self.font_stack.pop()
|
||||||
|
|
||||||
|
return content_end + 1 if content_end < len(tokens) else len(tokens)
|
||||||
|
|
||||||
def _handle_tag_with_content(self, token, tokens, current_index, container):
|
def _handle_tag_with_content(self, token, tokens, current_index, container):
|
||||||
"""Handle tags and their content, returning the new index position"""
|
"""Handle tags and their content, returning the new index position"""
|
||||||
@ -517,11 +709,15 @@ class BrowserWindow:
|
|||||||
if hasattr(container, '_children'):
|
if hasattr(container, '_children'):
|
||||||
for child in container._children:
|
for child in container._children:
|
||||||
if hasattr(child, '_origin'):
|
if hasattr(child, '_origin'):
|
||||||
child_offset = (offset[0] + child._origin[0], offset[1] + child._origin[1])
|
# Convert numpy arrays to tuples for consistent coordinate handling
|
||||||
|
child_origin = tuple(child._origin) if hasattr(child._origin, '__iter__') else child._origin
|
||||||
|
child_size = tuple(child._size) if hasattr(child._size, '__iter__') else child._size
|
||||||
|
|
||||||
|
child_offset = (offset[0] + child_origin[0], offset[1] + child_origin[1])
|
||||||
|
|
||||||
# Check if element is clickable
|
# Check if element is clickable
|
||||||
if isinstance(child, (RenderableLink, RenderableButton)):
|
if isinstance(child, (RenderableLink, RenderableButton)):
|
||||||
elements.append((child, child_offset, child._size))
|
elements.append((child, child_offset, child_size))
|
||||||
|
|
||||||
# Recursively check children
|
# Recursively check children
|
||||||
if hasattr(child, '_children'):
|
if hasattr(child, '_children'):
|
||||||
|
|||||||
514
pyWebLayout/typesetting/paragraph_layout.py
Normal file
514
pyWebLayout/typesetting/paragraph_layout.py
Normal file
@ -0,0 +1,514 @@
|
|||||||
|
"""
|
||||||
|
Paragraph layout system for pyWebLayout.
|
||||||
|
|
||||||
|
This module provides functionality for breaking paragraphs into lines and managing
|
||||||
|
text flow within paragraphs, including word wrapping, hyphenation, pagination,
|
||||||
|
and state management for resumable rendering.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import List, Tuple, Optional, Union, Dict, Any
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass, asdict
|
||||||
|
from pyWebLayout.abstract.block import Paragraph
|
||||||
|
from pyWebLayout.abstract.inline import Word, FormattedSpan
|
||||||
|
from pyWebLayout.concrete.text import Line, RenderableWord
|
||||||
|
from pyWebLayout.style import Font
|
||||||
|
from pyWebLayout.style.layout import Alignment
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ParagraphRenderingState:
|
||||||
|
"""
|
||||||
|
State information for paragraph rendering that can be saved and restored.
|
||||||
|
|
||||||
|
This allows for resumable rendering when paragraphs span multiple pages
|
||||||
|
or when rendering needs to be interrupted and resumed later.
|
||||||
|
"""
|
||||||
|
paragraph_id: str # Unique identifier for the paragraph
|
||||||
|
current_word_index: int = 0 # Index of the current word being processed
|
||||||
|
current_char_index: int = 0 # Character index within the current word (for partial words)
|
||||||
|
rendered_lines: int = 0 # Number of lines already rendered
|
||||||
|
total_lines_estimated: int = 0 # Estimated total lines needed
|
||||||
|
completed: bool = False # Whether paragraph rendering is complete
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Convert state to dictionary for serialization."""
|
||||||
|
return asdict(self)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: Dict[str, Any]) -> 'ParagraphRenderingState':
|
||||||
|
"""Create state from dictionary."""
|
||||||
|
return cls(**data)
|
||||||
|
|
||||||
|
def to_json(self) -> str:
|
||||||
|
"""Convert state to JSON string."""
|
||||||
|
return json.dumps(self.to_dict())
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_json(cls, json_str: str) -> 'ParagraphRenderingState':
|
||||||
|
"""Create state from JSON string."""
|
||||||
|
return cls.from_dict(json.loads(json_str))
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ParagraphLayoutResult:
|
||||||
|
"""
|
||||||
|
Result of paragraph layout operation.
|
||||||
|
|
||||||
|
Contains the rendered lines and information about remaining content.
|
||||||
|
"""
|
||||||
|
lines: List[Line]
|
||||||
|
remaining_paragraph: Optional[Paragraph] = None
|
||||||
|
state: Optional[ParagraphRenderingState] = None
|
||||||
|
total_height: int = 0
|
||||||
|
is_complete: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
class ParagraphLayout:
|
||||||
|
"""
|
||||||
|
Handles the layout of paragraph content into lines.
|
||||||
|
|
||||||
|
This class takes a paragraph containing words and formatted spans and
|
||||||
|
breaks it down into a series of lines that fit within specified constraints.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
line_width: int,
|
||||||
|
line_height: int,
|
||||||
|
word_spacing: Tuple[int, int] = (3, 8), # min, max spacing
|
||||||
|
line_spacing: int = 2, # spacing between lines
|
||||||
|
halign: Alignment = Alignment.LEFT,
|
||||||
|
valign: Alignment = Alignment.CENTER
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize a paragraph layout manager.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
line_width: Maximum width for each line
|
||||||
|
line_height: Height of each line
|
||||||
|
word_spacing: Tuple of (min_spacing, max_spacing) between words
|
||||||
|
line_spacing: Vertical spacing between lines
|
||||||
|
halign: Horizontal alignment of text within lines
|
||||||
|
valign: Vertical alignment of text within lines
|
||||||
|
"""
|
||||||
|
self.line_width = line_width
|
||||||
|
self.line_height = line_height
|
||||||
|
self.word_spacing = word_spacing
|
||||||
|
self.line_spacing = line_spacing
|
||||||
|
self.halign = halign
|
||||||
|
self.valign = valign
|
||||||
|
|
||||||
|
def layout_paragraph(self, paragraph: Paragraph) -> List[Line]:
|
||||||
|
"""
|
||||||
|
Layout a paragraph into a series of lines.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
paragraph: The paragraph to layout
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Line objects containing the paragraph's content
|
||||||
|
"""
|
||||||
|
lines = []
|
||||||
|
|
||||||
|
# Get all words from the paragraph (including from spans)
|
||||||
|
all_words = self._collect_words_from_paragraph(paragraph)
|
||||||
|
|
||||||
|
if not all_words:
|
||||||
|
return lines
|
||||||
|
|
||||||
|
# Create lines and distribute words
|
||||||
|
current_line = None
|
||||||
|
previous_line = None
|
||||||
|
|
||||||
|
for word_text, word_font in all_words:
|
||||||
|
# Create a new line if we don't have one
|
||||||
|
if current_line is None:
|
||||||
|
current_line = Line(
|
||||||
|
spacing=self.word_spacing,
|
||||||
|
origin=(0, len(lines) * (self.line_height + self.line_spacing)),
|
||||||
|
size=(self.line_width, self.line_height),
|
||||||
|
font=word_font,
|
||||||
|
halign=self.halign,
|
||||||
|
valign=self.valign,
|
||||||
|
previous=previous_line
|
||||||
|
)
|
||||||
|
|
||||||
|
# Link the previous line to this one
|
||||||
|
if previous_line:
|
||||||
|
previous_line.set_next(current_line)
|
||||||
|
|
||||||
|
# Try to add the word to the current line
|
||||||
|
overflow = current_line.add_word(word_text, word_font)
|
||||||
|
|
||||||
|
if overflow is None:
|
||||||
|
# Word fit completely, continue with current line
|
||||||
|
continue
|
||||||
|
elif overflow == word_text:
|
||||||
|
# Entire word didn't fit, need a new line
|
||||||
|
if current_line.renderable_words:
|
||||||
|
# Current line has content, finalize it and start a new one
|
||||||
|
lines.append(current_line)
|
||||||
|
previous_line = current_line
|
||||||
|
current_line = None
|
||||||
|
# Retry with the same word on the new line
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Empty line and word still doesn't fit - this is handled by force-fitting
|
||||||
|
# The add_word method should have handled this case
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Part of the word fit, remainder is in overflow
|
||||||
|
# Finalize current line and continue with overflow
|
||||||
|
lines.append(current_line)
|
||||||
|
previous_line = current_line
|
||||||
|
current_line = None
|
||||||
|
|
||||||
|
# Continue with the overflow text
|
||||||
|
word_text = overflow
|
||||||
|
# Retry with the overflow on a new line
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Add the final line if it has content
|
||||||
|
if current_line and current_line.renderable_words:
|
||||||
|
lines.append(current_line)
|
||||||
|
|
||||||
|
return lines
|
||||||
|
|
||||||
|
def _collect_words_from_paragraph(self, paragraph: Paragraph) -> List[Tuple[str, Font]]:
|
||||||
|
"""
|
||||||
|
Collect all words from a paragraph, including from formatted spans.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
paragraph: The paragraph to collect words from
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of tuples (word_text, font) for each word in the paragraph
|
||||||
|
"""
|
||||||
|
all_words = []
|
||||||
|
|
||||||
|
# Get words directly from the paragraph
|
||||||
|
for _, word in paragraph.words():
|
||||||
|
all_words.append((word.text, word.style))
|
||||||
|
|
||||||
|
# Get words from formatted spans
|
||||||
|
for span in paragraph.spans():
|
||||||
|
for word in span.words:
|
||||||
|
all_words.append((word.text, word.style))
|
||||||
|
|
||||||
|
return all_words
|
||||||
|
|
||||||
|
def calculate_paragraph_height(self, paragraph: Paragraph) -> int:
|
||||||
|
"""
|
||||||
|
Calculate the total height needed to render a paragraph.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
paragraph: The paragraph to calculate height for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Total height in pixels needed for the paragraph
|
||||||
|
"""
|
||||||
|
lines = self.layout_paragraph(paragraph)
|
||||||
|
if not lines:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Height is number of lines * line height + spacing between lines
|
||||||
|
total_height = len(lines) * self.line_height
|
||||||
|
if len(lines) > 1:
|
||||||
|
total_height += (len(lines) - 1) * self.line_spacing
|
||||||
|
|
||||||
|
return total_height
|
||||||
|
|
||||||
|
def get_line_at_position(self, paragraph: Paragraph, y_position: int) -> Optional[Tuple[int, Line]]:
|
||||||
|
"""
|
||||||
|
Get the line at a specific Y position within the paragraph.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
paragraph: The paragraph to query
|
||||||
|
y_position: Y position relative to the paragraph's top
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (line_index, Line) or None if position is outside the paragraph
|
||||||
|
"""
|
||||||
|
lines = self.layout_paragraph(paragraph)
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
line_y = i * (self.line_height + self.line_spacing)
|
||||||
|
if line_y <= y_position < line_y + self.line_height:
|
||||||
|
return (i, line)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def fit_paragraph_in_height(self, paragraph: Paragraph, max_height: int) -> Tuple[List[Line], Optional[Paragraph]]:
|
||||||
|
"""
|
||||||
|
Fit as many lines of a paragraph as possible within a given height.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
paragraph: The paragraph to fit
|
||||||
|
max_height: Maximum height available
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (lines_that_fit, remaining_paragraph_or_None)
|
||||||
|
"""
|
||||||
|
lines = self.layout_paragraph(paragraph)
|
||||||
|
|
||||||
|
# Calculate how many lines fit
|
||||||
|
lines_that_fit = []
|
||||||
|
current_height = 0
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
line_height_needed = self.line_height
|
||||||
|
if i > 0: # Add line spacing for all lines except the first
|
||||||
|
line_height_needed += self.line_spacing
|
||||||
|
|
||||||
|
if current_height + line_height_needed <= max_height:
|
||||||
|
lines_that_fit.append(line)
|
||||||
|
current_height += line_height_needed
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
# If all lines fit, return them with no remainder
|
||||||
|
if len(lines_that_fit) == len(lines):
|
||||||
|
return (lines_that_fit, None)
|
||||||
|
|
||||||
|
# If some lines didn't fit, create a remainder paragraph
|
||||||
|
# This is a simplified approach - in a full implementation,
|
||||||
|
# you'd need to track which words were rendered and create
|
||||||
|
# a new paragraph with the remaining words
|
||||||
|
remaining_lines = lines[len(lines_that_fit):]
|
||||||
|
|
||||||
|
# For now, return the fitted lines and indicate there's more content
|
||||||
|
# A full implementation would reconstruct a paragraph from remaining words
|
||||||
|
return (lines_that_fit, paragraph if remaining_lines else None)
|
||||||
|
|
||||||
|
def layout_paragraph_with_pagination(
|
||||||
|
self,
|
||||||
|
paragraph: Paragraph,
|
||||||
|
max_height: int,
|
||||||
|
state: Optional[ParagraphRenderingState] = None
|
||||||
|
) -> ParagraphLayoutResult:
|
||||||
|
"""
|
||||||
|
Layout a paragraph with pagination support and state management.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
paragraph: The paragraph to layout
|
||||||
|
max_height: Maximum height available for rendering
|
||||||
|
state: Optional existing state to resume from
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ParagraphLayoutResult containing lines, state, and completion info
|
||||||
|
"""
|
||||||
|
# Generate a unique ID for the paragraph if not already set
|
||||||
|
paragraph_id = str(id(paragraph))
|
||||||
|
|
||||||
|
# Initialize or use existing state
|
||||||
|
if state is None:
|
||||||
|
state = ParagraphRenderingState(paragraph_id=paragraph_id)
|
||||||
|
|
||||||
|
# Get all words from the paragraph
|
||||||
|
all_words = self._collect_words_from_paragraph(paragraph)
|
||||||
|
|
||||||
|
if not all_words:
|
||||||
|
state.completed = True
|
||||||
|
return ParagraphLayoutResult(
|
||||||
|
lines=[],
|
||||||
|
state=state,
|
||||||
|
is_complete=True,
|
||||||
|
total_height=0
|
||||||
|
)
|
||||||
|
|
||||||
|
# Start from the current position in the state
|
||||||
|
remaining_words = all_words[state.current_word_index:]
|
||||||
|
|
||||||
|
# Handle partial word if needed
|
||||||
|
if state.current_char_index > 0 and remaining_words:
|
||||||
|
word_text, word_font = remaining_words[0]
|
||||||
|
partial_word = word_text[state.current_char_index:]
|
||||||
|
remaining_words[0] = (partial_word, word_font)
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
current_line = None
|
||||||
|
previous_line = None
|
||||||
|
current_height = 0
|
||||||
|
word_index = state.current_word_index
|
||||||
|
|
||||||
|
for word_text, word_font in remaining_words:
|
||||||
|
# Create a new line if we don't have one
|
||||||
|
if current_line is None:
|
||||||
|
line_y = len(lines) * (self.line_height + self.line_spacing)
|
||||||
|
current_line = Line(
|
||||||
|
spacing=self.word_spacing,
|
||||||
|
origin=(0, line_y),
|
||||||
|
size=(self.line_width, self.line_height),
|
||||||
|
font=word_font,
|
||||||
|
halign=self.halign,
|
||||||
|
valign=self.valign,
|
||||||
|
previous=previous_line
|
||||||
|
)
|
||||||
|
|
||||||
|
if previous_line:
|
||||||
|
previous_line.set_next(current_line)
|
||||||
|
|
||||||
|
# Check if adding this line would exceed max height
|
||||||
|
line_height_needed = self.line_height
|
||||||
|
if lines: # Add line spacing for all lines except the first
|
||||||
|
line_height_needed += self.line_spacing
|
||||||
|
|
||||||
|
if current_height + line_height_needed > max_height and lines:
|
||||||
|
# Can't fit another line, break here
|
||||||
|
state.current_word_index = word_index
|
||||||
|
state.current_char_index = 0
|
||||||
|
state.rendered_lines = len(lines)
|
||||||
|
state.completed = False
|
||||||
|
|
||||||
|
return ParagraphLayoutResult(
|
||||||
|
lines=lines,
|
||||||
|
state=state,
|
||||||
|
is_complete=False,
|
||||||
|
total_height=current_height,
|
||||||
|
remaining_paragraph=self._create_remaining_paragraph(paragraph, all_words, word_index)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Try to add the word to the current line
|
||||||
|
overflow = current_line.add_word(word_text, word_font)
|
||||||
|
|
||||||
|
if overflow is None:
|
||||||
|
# Word fit completely
|
||||||
|
word_index += 1
|
||||||
|
continue
|
||||||
|
elif overflow == word_text:
|
||||||
|
# Entire word didn't fit, need a new line
|
||||||
|
if current_line.renderable_words:
|
||||||
|
# Finalize current line and start a new one
|
||||||
|
lines.append(current_line)
|
||||||
|
current_height += line_height_needed
|
||||||
|
previous_line = current_line
|
||||||
|
current_line = None
|
||||||
|
# Don't increment word_index, retry with same word
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Empty line and word still doesn't fit - this should be handled by force-fitting
|
||||||
|
word_index += 1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Part of the word fit, remainder is in overflow
|
||||||
|
lines.append(current_line)
|
||||||
|
current_height += line_height_needed
|
||||||
|
previous_line = current_line
|
||||||
|
current_line = None
|
||||||
|
|
||||||
|
# Update state to track partial word
|
||||||
|
state.current_word_index = word_index
|
||||||
|
state.current_char_index = len(word_text) - len(overflow)
|
||||||
|
state.rendered_lines = len(lines)
|
||||||
|
state.completed = False
|
||||||
|
|
||||||
|
return ParagraphLayoutResult(
|
||||||
|
lines=lines,
|
||||||
|
state=state,
|
||||||
|
is_complete=False,
|
||||||
|
total_height=current_height,
|
||||||
|
remaining_paragraph=self._create_remaining_paragraph(paragraph, all_words, word_index, len(word_text) - len(overflow))
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add the final line if it has content
|
||||||
|
if current_line and current_line.renderable_words:
|
||||||
|
line_height_needed = self.line_height
|
||||||
|
if lines:
|
||||||
|
line_height_needed += self.line_spacing
|
||||||
|
|
||||||
|
# Check if we can fit the final line
|
||||||
|
if current_height + line_height_needed <= max_height:
|
||||||
|
lines.append(current_line)
|
||||||
|
current_height += line_height_needed
|
||||||
|
state.completed = True
|
||||||
|
else:
|
||||||
|
# Can't fit the final line
|
||||||
|
state.current_word_index = word_index
|
||||||
|
state.current_char_index = 0
|
||||||
|
state.rendered_lines = len(lines)
|
||||||
|
state.completed = False
|
||||||
|
|
||||||
|
return ParagraphLayoutResult(
|
||||||
|
lines=lines,
|
||||||
|
state=state,
|
||||||
|
is_complete=False,
|
||||||
|
total_height=current_height,
|
||||||
|
remaining_paragraph=self._create_remaining_paragraph(paragraph, all_words, word_index)
|
||||||
|
)
|
||||||
|
|
||||||
|
# All content fit
|
||||||
|
state.completed = True
|
||||||
|
state.rendered_lines = len(lines)
|
||||||
|
|
||||||
|
return ParagraphLayoutResult(
|
||||||
|
lines=lines,
|
||||||
|
state=state,
|
||||||
|
is_complete=True,
|
||||||
|
total_height=current_height
|
||||||
|
)
|
||||||
|
|
||||||
|
def _create_remaining_paragraph(
|
||||||
|
self,
|
||||||
|
original: Paragraph,
|
||||||
|
all_words: List[Tuple[str, Font]],
|
||||||
|
start_word_index: int,
|
||||||
|
start_char_index: int = 0
|
||||||
|
) -> Paragraph:
|
||||||
|
"""
|
||||||
|
Create a new paragraph containing the remaining unrendered content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
original: The original paragraph
|
||||||
|
all_words: All words from the original paragraph
|
||||||
|
start_word_index: Index of the first unrendered word
|
||||||
|
start_char_index: Character index within the first unrendered word
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
New paragraph with remaining content
|
||||||
|
"""
|
||||||
|
# Create a new paragraph with the same style
|
||||||
|
remaining_paragraph = Paragraph(style=original.style)
|
||||||
|
|
||||||
|
# Add remaining words
|
||||||
|
remaining_words = all_words[start_word_index:]
|
||||||
|
|
||||||
|
for i, (word_text, word_font) in enumerate(remaining_words):
|
||||||
|
# Handle partial word for the first remaining word
|
||||||
|
if i == 0 and start_char_index > 0:
|
||||||
|
word_text = word_text[start_char_index:]
|
||||||
|
|
||||||
|
if word_text: # Only add non-empty words
|
||||||
|
word = Word(word_text, word_font)
|
||||||
|
remaining_paragraph.add_word(word)
|
||||||
|
|
||||||
|
return remaining_paragraph
|
||||||
|
|
||||||
|
|
||||||
|
class ParagraphRenderer:
|
||||||
|
"""
|
||||||
|
Renders paragraphs using the layout system.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def render_paragraph(
|
||||||
|
paragraph: Paragraph,
|
||||||
|
layout: ParagraphLayout,
|
||||||
|
max_height: Optional[int] = None
|
||||||
|
) -> Tuple[List[Line], Optional[Paragraph]]:
|
||||||
|
"""
|
||||||
|
Render a paragraph into lines, optionally constrained by height.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
paragraph: The paragraph to render
|
||||||
|
layout: The layout manager to use
|
||||||
|
max_height: Optional maximum height constraint
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (rendered_lines, remaining_paragraph_or_None)
|
||||||
|
"""
|
||||||
|
if max_height is None:
|
||||||
|
lines = layout.layout_paragraph(paragraph)
|
||||||
|
return (lines, None)
|
||||||
|
else:
|
||||||
|
return layout.fit_paragraph_in_height(paragraph, max_height)
|
||||||
228
tests/test_multiline_rendering.py
Normal file
228
tests/test_multiline_rendering.py
Normal file
@ -0,0 +1,228 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test script to verify multi-line text rendering and line wrapping functionality.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from PIL import Image, ImageDraw
|
||||||
|
from pyWebLayout.concrete.text import Text, Line
|
||||||
|
from pyWebLayout.style import Font, FontStyle, FontWeight
|
||||||
|
from pyWebLayout.style.layout import Alignment
|
||||||
|
import os
|
||||||
|
|
||||||
|
def create_multiline_test(sentence, target_lines, line_width, line_height, font_size=14):
|
||||||
|
"""
|
||||||
|
Test rendering a sentence across multiple lines
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sentence: The sentence to render
|
||||||
|
target_lines: Expected number of lines
|
||||||
|
line_width: Width of each line in pixels
|
||||||
|
line_height: Height of each line in pixels
|
||||||
|
font_size: Font size to use
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (actual_lines_used, lines_list, combined_image)
|
||||||
|
"""
|
||||||
|
font_style = Font(
|
||||||
|
font_path=None,
|
||||||
|
font_size=font_size,
|
||||||
|
colour=(0, 0, 0, 255)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Split sentence into words
|
||||||
|
words = sentence.split()
|
||||||
|
|
||||||
|
# Create lines and distribute words
|
||||||
|
lines = []
|
||||||
|
current_line = None
|
||||||
|
words_remaining = words.copy()
|
||||||
|
|
||||||
|
while words_remaining:
|
||||||
|
# Create a new line
|
||||||
|
current_line = Line(
|
||||||
|
spacing=(3, 8), # min, max spacing
|
||||||
|
origin=(0, len(lines) * line_height),
|
||||||
|
size=(line_width, line_height),
|
||||||
|
font=font_style,
|
||||||
|
halign=Alignment.LEFT
|
||||||
|
)
|
||||||
|
|
||||||
|
lines.append(current_line)
|
||||||
|
|
||||||
|
# Add words to current line until it's full
|
||||||
|
words_added_to_line = []
|
||||||
|
while words_remaining:
|
||||||
|
word = words_remaining[0]
|
||||||
|
result = current_line.add_word(word)
|
||||||
|
|
||||||
|
if result is None:
|
||||||
|
# Word fit in the line
|
||||||
|
words_added_to_line.append(word)
|
||||||
|
words_remaining.pop(0)
|
||||||
|
else:
|
||||||
|
# Word didn't fit, try next line
|
||||||
|
break
|
||||||
|
|
||||||
|
# If no words were added to this line, we have a problem
|
||||||
|
if not words_added_to_line:
|
||||||
|
print(f"ERROR: Word '{words_remaining[0]}' is too long for line width {line_width}")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Create combined image showing all lines
|
||||||
|
total_height = len(lines) * line_height
|
||||||
|
combined_image = Image.new('RGBA', (line_width, total_height), (255, 255, 255, 255))
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
line_img = line.render()
|
||||||
|
y_pos = i * line_height
|
||||||
|
combined_image.paste(line_img, (0, y_pos), line_img)
|
||||||
|
|
||||||
|
# Add a subtle line border for visualization
|
||||||
|
draw = ImageDraw.Draw(combined_image)
|
||||||
|
draw.rectangle([(0, y_pos), (line_width-1, y_pos + line_height-1)], outline=(200, 200, 200), width=1)
|
||||||
|
|
||||||
|
return len(lines), lines, combined_image
|
||||||
|
|
||||||
|
def test_sentence_wrapping():
|
||||||
|
"""Test various sentences with different expected line counts"""
|
||||||
|
|
||||||
|
test_cases = [
|
||||||
|
{
|
||||||
|
"sentence": "This is a simple test sentence that should wrap to exactly two lines.",
|
||||||
|
"expected_lines": 2,
|
||||||
|
"line_width": 200,
|
||||||
|
"description": "Two-line sentence"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "This is a much longer sentence that contains many more words and should definitely wrap across three lines when rendered with the specified width constraints.",
|
||||||
|
"expected_lines": 3,
|
||||||
|
"line_width": 180,
|
||||||
|
"description": "Three-line sentence"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Here we have an even longer sentence with significantly more content that will require four lines to properly display all the text when using the constrained width setting.",
|
||||||
|
"expected_lines": 4,
|
||||||
|
"line_width": 160,
|
||||||
|
"description": "Four-line sentence"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "Short sentence.",
|
||||||
|
"expected_lines": 1,
|
||||||
|
"line_width": 300,
|
||||||
|
"description": "Single line sentence"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"sentence": "This sentence has some really long words like supercalifragilisticexpialidocious that might need hyphenation.",
|
||||||
|
"expected_lines": 3,
|
||||||
|
"line_width": 150,
|
||||||
|
"description": "Sentence with long words"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
print("Testing multi-line sentence rendering...\n")
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for i, test_case in enumerate(test_cases):
|
||||||
|
sentence = test_case["sentence"]
|
||||||
|
expected_lines = test_case["expected_lines"]
|
||||||
|
line_width = test_case["line_width"]
|
||||||
|
description = test_case["description"]
|
||||||
|
|
||||||
|
print(f"Test {i+1}: {description}")
|
||||||
|
print(f" Sentence: \"{sentence}\"")
|
||||||
|
print(f" Expected lines: {expected_lines}")
|
||||||
|
print(f" Line width: {line_width}px")
|
||||||
|
|
||||||
|
# Run the test
|
||||||
|
actual_lines, lines, combined_image = create_multiline_test(
|
||||||
|
sentence, expected_lines, line_width, 25, font_size=12
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f" Actual lines: {actual_lines}")
|
||||||
|
|
||||||
|
# Show word distribution
|
||||||
|
for j, line in enumerate(lines):
|
||||||
|
words_in_line = [word.word.text for word in line.renderable_words]
|
||||||
|
print(f" Line {j+1}: {' '.join(words_in_line)}")
|
||||||
|
|
||||||
|
# Save the result
|
||||||
|
output_filename = f"test_multiline_{i+1}_{description.lower().replace(' ', '_').replace('-', '_')}.png"
|
||||||
|
combined_image.save(output_filename)
|
||||||
|
print(f" Saved as: {output_filename}")
|
||||||
|
|
||||||
|
# Check if it matches expectations
|
||||||
|
if actual_lines == expected_lines:
|
||||||
|
print(f" ✓ SUCCESS: Got expected {expected_lines} lines")
|
||||||
|
else:
|
||||||
|
print(f" ✗ MISMATCH: Expected {expected_lines} lines, got {actual_lines}")
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"test": description,
|
||||||
|
"expected": expected_lines,
|
||||||
|
"actual": actual_lines,
|
||||||
|
"success": actual_lines == expected_lines,
|
||||||
|
"filename": output_filename
|
||||||
|
})
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
print("="*60)
|
||||||
|
print("SUMMARY")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
successful_tests = sum(1 for r in results if r["success"])
|
||||||
|
total_tests = len(results)
|
||||||
|
|
||||||
|
print(f"Tests passed: {successful_tests}/{total_tests}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
status = "✓ PASS" if result["success"] else "✗ FAIL"
|
||||||
|
print(f"{status} {result['test']}: {result['actual']}/{result['expected']} lines ({result['filename']})")
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def test_fixed_width_scenarios():
|
||||||
|
"""Test specific width scenarios to verify line utilization"""
|
||||||
|
print("\n" + "="*60)
|
||||||
|
print("TESTING FIXED WIDTH SCENARIOS")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
# Test with progressively narrower widths
|
||||||
|
sentence = "The quick brown fox jumps over the lazy dog near the riverbank."
|
||||||
|
widths = [300, 200, 150, 100, 80]
|
||||||
|
|
||||||
|
for width in widths:
|
||||||
|
print(f"\nTesting width: {width}px")
|
||||||
|
actual_lines, lines, combined_image = create_multiline_test(
|
||||||
|
sentence, None, width, 20, font_size=12
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate utilization
|
||||||
|
for j, line in enumerate(lines):
|
||||||
|
words_in_line = [word.word.text for word in line.renderable_words]
|
||||||
|
line_text = ' '.join(words_in_line)
|
||||||
|
utilization = (line._current_width / width) * 100
|
||||||
|
print(f" Line {j+1}: \"{line_text}\" (width: {line._current_width}/{width}px, {utilization:.1f}% utilization)")
|
||||||
|
|
||||||
|
output_filename = f"test_width_{width}px.png"
|
||||||
|
combined_image.save(output_filename)
|
||||||
|
print(f" Saved as: {output_filename}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("Running multi-line text rendering verification tests...\n")
|
||||||
|
|
||||||
|
# Test sentence wrapping
|
||||||
|
results = test_sentence_wrapping()
|
||||||
|
|
||||||
|
# Test fixed width scenarios
|
||||||
|
test_fixed_width_scenarios()
|
||||||
|
|
||||||
|
print(f"\nAll tests completed. Check the generated PNG files for visual verification.")
|
||||||
|
print("Look for:")
|
||||||
|
print("- Proper line wrapping at expected breakpoints")
|
||||||
|
print("- Good utilization of available line width")
|
||||||
|
print("- No text cropping at line boundaries")
|
||||||
|
print("- Proper word spacing and alignment")
|
||||||
Loading…
x
Reference in New Issue
Block a user