678 lines
26 KiB
Python
678 lines
26 KiB
Python
from typing import List, Tuple, Optional, Dict, Any
|
|
import numpy as np
|
|
import re
|
|
import os
|
|
from urllib.parse import urljoin, urlparse
|
|
from PIL import Image
|
|
|
|
from pyWebLayout.core.base import Renderable, Layoutable
|
|
from .box import Box
|
|
from pyWebLayout.style.layout import Alignment
|
|
from .text import Text
|
|
from .image import RenderableImage
|
|
from .functional import RenderableLink, RenderableButton
|
|
from pyWebLayout.abstract.block import Block, Paragraph, Heading, HList, Image as AbstractImage, HeadingLevel, ListStyle
|
|
from pyWebLayout.abstract.inline import Word
|
|
from pyWebLayout.abstract.functional import Link, LinkType
|
|
from pyWebLayout.style.fonts import Font, FontWeight, FontStyle, TextDecoration
|
|
from pyWebLayout.typesetting.paragraph_layout import ParagraphLayout, ParagraphLayoutResult
|
|
from pyWebLayout.io.readers.html_extraction import parse_html_string
|
|
from pyWebLayout.typesetting.document_cursor import DocumentCursor, DocumentPosition
|
|
|
|
|
|
class Container(Box, Layoutable):
|
|
"""
|
|
A container that can hold multiple renderable objects and lay them out.
|
|
"""
|
|
def __init__(self, origin, size, direction='vertical', spacing=5,
|
|
callback=None, sheet=None, mode=None,
|
|
halign=Alignment.CENTER, valign=Alignment.CENTER,
|
|
padding: Tuple[int, int, int, int] = (10, 10, 10, 10)):
|
|
"""
|
|
Initialize a container.
|
|
|
|
Args:
|
|
origin: Top-left corner coordinates
|
|
size: Width and height of the container
|
|
direction: Layout direction ('vertical' or 'horizontal')
|
|
spacing: Space between elements
|
|
callback: Optional callback function
|
|
sheet: Optional image sheet
|
|
mode: Optional image mode
|
|
halign: Horizontal alignment
|
|
valign: Vertical alignment
|
|
padding: Padding as (top, right, bottom, left)
|
|
"""
|
|
super().__init__(origin, size, callback, sheet, mode, halign, valign)
|
|
self._children: List[Renderable] = []
|
|
self._direction = direction
|
|
self._spacing = spacing
|
|
self._padding = padding
|
|
|
|
def add_child(self, child: Renderable):
|
|
"""Add a child element to this container"""
|
|
self._children.append(child)
|
|
return self
|
|
|
|
def layout(self):
|
|
"""Layout the children according to the container's direction and spacing"""
|
|
if not self._children:
|
|
return
|
|
|
|
# Get available space after padding
|
|
padding_top, padding_right, padding_bottom, padding_left = self._padding
|
|
available_width = self._size[0] - padding_left - padding_right
|
|
available_height = self._size[1] - padding_top - padding_bottom
|
|
|
|
# Calculate total content size
|
|
if self._direction == 'vertical':
|
|
total_height = sum(getattr(child, '_size', [0, 0])[1] for child in self._children)
|
|
total_height += self._spacing * (len(self._children) - 1)
|
|
|
|
# Position each child
|
|
current_y = padding_top
|
|
for child in self._children:
|
|
if hasattr(child, '_size') and hasattr(child, '_origin'):
|
|
child_width, child_height = child._size
|
|
|
|
# Calculate horizontal position based on alignment
|
|
if self._halign == Alignment.LEFT:
|
|
x_pos = padding_left
|
|
elif self._halign == Alignment.RIGHT:
|
|
x_pos = padding_left + available_width - child_width
|
|
else: # CENTER
|
|
x_pos = padding_left + (available_width - child_width) // 2
|
|
|
|
# Set child position
|
|
child._origin = np.array([x_pos, current_y])
|
|
|
|
# Move down for next child
|
|
current_y += child_height + self._spacing
|
|
|
|
# Layout the child if it's layoutable
|
|
if isinstance(child, Layoutable):
|
|
child.layout()
|
|
|
|
else: # horizontal
|
|
total_width = sum(getattr(child, '_size', [0, 0])[0] for child in self._children)
|
|
total_width += self._spacing * (len(self._children) - 1)
|
|
|
|
# Position each child
|
|
current_x = padding_left
|
|
for child in self._children:
|
|
if hasattr(child, '_size') and hasattr(child, '_origin'):
|
|
child_width, child_height = child._size
|
|
|
|
# Calculate vertical position based on alignment
|
|
if self._valign == Alignment.TOP:
|
|
y_pos = padding_top
|
|
elif self._valign == Alignment.BOTTOM:
|
|
y_pos = padding_top + available_height - child_height
|
|
else: # CENTER
|
|
y_pos = padding_top + (available_height - child_height) // 2
|
|
|
|
# Set child position
|
|
child._origin = np.array([current_x, y_pos])
|
|
|
|
# Move right for next child
|
|
current_x += child_width + self._spacing
|
|
|
|
# Layout the child if it's layoutable
|
|
if isinstance(child, Layoutable):
|
|
child.layout()
|
|
|
|
def render(self) -> Image:
|
|
"""Render the container with all its children"""
|
|
# Make sure children are laid out
|
|
self.layout()
|
|
|
|
# Create base canvas
|
|
canvas = super().render()
|
|
|
|
# Render each child and paste it onto the canvas
|
|
for child in self._children:
|
|
if hasattr(child, '_origin'):
|
|
child_img = child.render()
|
|
# Calculate child position relative to container
|
|
rel_pos = tuple(child._origin - self._origin)
|
|
# Paste the child onto the canvas
|
|
canvas.paste(child_img, rel_pos, child_img)
|
|
|
|
return canvas
|
|
|
|
|
|
class Page(Container):
|
|
"""
|
|
Top-level container representing an HTML page.
|
|
"""
|
|
def __init__(self, size=(800, 600), background_color=(255, 255, 255), mode='RGBA'):
|
|
"""
|
|
Initialize a page.
|
|
|
|
Args:
|
|
size: Width and height of the page
|
|
background_color: Background color as RGB tuple
|
|
mode: Image mode
|
|
"""
|
|
super().__init__(
|
|
origin=(0, 0),
|
|
size=size,
|
|
direction='vertical',
|
|
spacing=10,
|
|
mode=mode,
|
|
halign=Alignment.CENTER, # Center horizontally to match test expectation
|
|
valign=Alignment.TOP,
|
|
padding=(10, 10, 10, 10) # Use 10 padding to match test expectation
|
|
)
|
|
self._background_color = background_color
|
|
|
|
def render_document(self, document, start_block: int = 0, max_blocks: Optional[int] = None) -> 'Page':
|
|
"""
|
|
Render blocks from a Document into this page.
|
|
|
|
Args:
|
|
document: The Document object to render
|
|
start_block: Which block to start rendering from (for pagination)
|
|
max_blocks: Maximum number of blocks to render (None for all remaining)
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
# Clear existing children
|
|
self._children.clear()
|
|
|
|
# Get blocks to render
|
|
blocks = document.blocks[start_block:]
|
|
if max_blocks is not None:
|
|
blocks = blocks[:max_blocks]
|
|
|
|
# Convert abstract blocks to renderable objects and add to page
|
|
for block in blocks:
|
|
renderable = self._convert_block_to_renderable(block)
|
|
if renderable:
|
|
self.add_child(renderable)
|
|
|
|
return self
|
|
|
|
def render_blocks(self, blocks: List[Block]) -> 'Page':
|
|
"""
|
|
Render a list of abstract blocks into this page.
|
|
|
|
Args:
|
|
blocks: List of Block objects to render
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
# Clear existing children
|
|
self._children.clear()
|
|
|
|
# Convert abstract blocks to renderable objects and add to page
|
|
for block in blocks:
|
|
renderable = self._convert_block_to_renderable(block)
|
|
if renderable:
|
|
self.add_child(renderable)
|
|
|
|
return self
|
|
|
|
def render_chapter(self, chapter) -> 'Page':
|
|
"""
|
|
Render a Chapter into this page.
|
|
|
|
Args:
|
|
chapter: The Chapter object to render
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
return self.render_blocks(chapter.blocks)
|
|
|
|
def render_from_cursor(self, cursor: DocumentCursor, max_height: Optional[int] = None) -> Tuple['Page', DocumentCursor]:
|
|
"""
|
|
Render content starting from a document cursor position, filling the page
|
|
and returning the cursor position where the page ends.
|
|
|
|
Args:
|
|
cursor: Starting position in the document
|
|
max_height: Maximum height to fill (defaults to page height minus padding)
|
|
|
|
Returns:
|
|
Tuple of (self, end_cursor) where end_cursor points to where next page should start
|
|
"""
|
|
# Clear existing children
|
|
self._children.clear()
|
|
|
|
if max_height is None:
|
|
max_height = self._size[1] - 40 # Account for top/bottom padding
|
|
|
|
current_height = 0
|
|
end_cursor = DocumentCursor(cursor.document, cursor.position.copy())
|
|
|
|
# Keep adding content until we reach the height limit
|
|
while current_height < max_height:
|
|
# Get current block
|
|
block = end_cursor.get_current_block()
|
|
if block is None:
|
|
break # End of document
|
|
|
|
# Convert block to renderable
|
|
renderable = self._convert_block_to_renderable(block)
|
|
if renderable:
|
|
# Check if adding this renderable would exceed height
|
|
renderable_height = getattr(renderable, '_size', [0, 0])[1]
|
|
|
|
if current_height + renderable_height > max_height:
|
|
# This block would exceed the page - handle partial rendering
|
|
if isinstance(block, Paragraph):
|
|
# For paragraphs, we can render partial content
|
|
partial_renderable = self._render_partial_paragraph(
|
|
block, max_height - current_height, end_cursor
|
|
)
|
|
if partial_renderable:
|
|
self.add_child(partial_renderable)
|
|
current_height += getattr(partial_renderable, '_size', [0, 0])[1]
|
|
break
|
|
else:
|
|
# Add the full block
|
|
self.add_child(renderable)
|
|
current_height += renderable_height
|
|
|
|
# Move cursor to next block
|
|
if not end_cursor.advance_block():
|
|
break # End of document
|
|
else:
|
|
# Skip blocks that can't be rendered
|
|
if not end_cursor.advance_block():
|
|
break
|
|
|
|
return self, end_cursor
|
|
|
|
def _render_partial_paragraph(self, paragraph: Paragraph, available_height: int, cursor: DocumentCursor) -> Optional[Container]:
|
|
"""
|
|
Render part of a paragraph that fits in the available height.
|
|
Updates the cursor to point to the remaining content.
|
|
|
|
Args:
|
|
paragraph: The paragraph to partially render
|
|
available_height: Available height for content
|
|
cursor: Cursor to update with new position
|
|
|
|
Returns:
|
|
Container with partial paragraph content or None
|
|
"""
|
|
# Use the paragraph layout system to break into lines
|
|
layout = ParagraphLayout(
|
|
line_width=self._size[0] - 40, # Account for margins
|
|
line_height=20,
|
|
word_spacing=(3, 8),
|
|
line_spacing=3,
|
|
halign=Alignment.LEFT
|
|
)
|
|
|
|
# Layout the paragraph into lines
|
|
lines = layout.layout_paragraph(paragraph)
|
|
|
|
if not lines:
|
|
return None
|
|
|
|
# Calculate how many lines we can fit
|
|
line_height = 23 # 20 + 3 spacing
|
|
max_lines = available_height // line_height
|
|
|
|
if max_lines <= 0:
|
|
return None
|
|
|
|
# Take only the lines that fit
|
|
lines_to_render = lines[:max_lines]
|
|
|
|
# Update cursor position to point to remaining content
|
|
if max_lines < len(lines):
|
|
# We have remaining lines - update cursor to point to next line in paragraph
|
|
cursor.position.paragraph_line_index = max_lines
|
|
else:
|
|
# We rendered the entire paragraph - cursor should advance to next block
|
|
cursor.advance_block()
|
|
|
|
# Create container for the partial paragraph
|
|
paragraph_container = Container(
|
|
origin=(0, 0),
|
|
size=(self._size[0], len(lines_to_render) * line_height),
|
|
direction='vertical',
|
|
spacing=0,
|
|
padding=(0, 0, 0, 0)
|
|
)
|
|
|
|
# Add the lines we can fit
|
|
for line in lines_to_render:
|
|
paragraph_container.add_child(line)
|
|
|
|
return paragraph_container
|
|
|
|
def get_position_bookmark(self) -> Optional[DocumentPosition]:
|
|
"""
|
|
Get a bookmark position representing the start of content on this page.
|
|
This can be used to return to this exact page later.
|
|
|
|
Returns:
|
|
DocumentPosition that can be used to recreate this page
|
|
"""
|
|
# This would be set by render_from_cursor method
|
|
return getattr(self, '_start_position', None)
|
|
|
|
def set_start_position(self, position: DocumentPosition):
|
|
"""
|
|
Set the document position that this page starts from.
|
|
|
|
Args:
|
|
position: The starting position for this page
|
|
"""
|
|
self._start_position = position
|
|
|
|
def fill_with_blocks(self, blocks: List[Block], start_index: int = 0) -> Tuple[int, List[Block]]:
|
|
"""
|
|
Fill this page with blocks using the external pagination system.
|
|
|
|
This method uses the new BlockPaginator system to handle different
|
|
block types with appropriate handlers. It replaces the internal
|
|
pagination logic and provides better support for partial content
|
|
and remainders.
|
|
|
|
Args:
|
|
blocks: List of blocks to add to the page
|
|
start_index: Index in blocks list to start from
|
|
|
|
Returns:
|
|
Tuple of (next_start_index, remainder_blocks)
|
|
- next_start_index: Index where pagination stopped
|
|
- remainder_blocks: Any partial blocks that need to continue on next page
|
|
"""
|
|
from pyWebLayout.typesetting.block_pagination import BlockPaginator
|
|
|
|
paginator = BlockPaginator()
|
|
return paginator.fill_page(self, blocks, start_index)
|
|
|
|
def try_add_block_external(self, block: Block, available_height: Optional[int] = None) -> Tuple[bool, Optional[Block], int]:
|
|
"""
|
|
Try to add a single block to this page using external handlers.
|
|
|
|
This method uses the BlockPaginator system to determine if a block
|
|
can fit on the page and handle any remainder content.
|
|
|
|
Args:
|
|
block: The block to try to add
|
|
available_height: Available height (defaults to remaining page height)
|
|
|
|
Returns:
|
|
Tuple of (success, remainder_block, height_used)
|
|
- success: Whether the block was successfully added
|
|
- remainder_block: Any remaining content that couldn't fit
|
|
- height_used: Height consumed by the added content
|
|
"""
|
|
from pyWebLayout.typesetting.block_pagination import BlockPaginator
|
|
|
|
if available_height is None:
|
|
# Calculate available height based on current content
|
|
current_height = self._calculate_current_content_height()
|
|
max_height = self._size[1] - 40 # Account for padding
|
|
available_height = max_height - current_height
|
|
|
|
paginator = BlockPaginator()
|
|
result = paginator.paginate_block(block, self, available_height)
|
|
|
|
if result.success and result.renderable:
|
|
self.add_child(result.renderable)
|
|
return True, result.remainder, result.height_used
|
|
else:
|
|
return False, result.remainder if result.can_continue else None, 0
|
|
|
|
def _calculate_current_content_height(self) -> int:
|
|
"""Calculate the height currently used by content on this page."""
|
|
if not self._children:
|
|
return 0
|
|
|
|
# Trigger layout to ensure positions are calculated
|
|
self.layout()
|
|
|
|
max_bottom = 0
|
|
for child in self._children:
|
|
if hasattr(child, '_origin') and hasattr(child, '_size'):
|
|
child_bottom = child._origin[1] + child._size[1]
|
|
max_bottom = max(max_bottom, child_bottom)
|
|
|
|
return max_bottom
|
|
|
|
def _convert_block_to_renderable(self, block: Block) -> Optional[Renderable]:
|
|
"""
|
|
Convert an abstract block to a renderable object.
|
|
|
|
Args:
|
|
block: Abstract block to convert
|
|
|
|
Returns:
|
|
Renderable object or None if conversion failed
|
|
"""
|
|
try:
|
|
if isinstance(block, Paragraph):
|
|
return self._convert_paragraph(block)
|
|
elif isinstance(block, Heading):
|
|
return self._convert_heading(block)
|
|
elif isinstance(block, HList):
|
|
return self._convert_list(block)
|
|
elif isinstance(block, AbstractImage):
|
|
return self._convert_image(block)
|
|
else:
|
|
# For other block types, try to extract text content
|
|
return self._convert_generic_block(block)
|
|
except Exception as e:
|
|
# Return error text for failed conversions
|
|
error_font = Font(colour=(255, 0, 0))
|
|
return Text(f"[Conversion Error: {str(e)}]", error_font)
|
|
|
|
def _convert_paragraph(self, paragraph: Paragraph) -> Optional[Container]:
|
|
"""Convert a paragraph block to a Container with proper Line objects."""
|
|
# Extract text content directly
|
|
text_content = self._extract_text_from_block(paragraph)
|
|
if not text_content:
|
|
return None
|
|
|
|
# Get the original font from the paragraph's first word
|
|
paragraph_font = Font(font_size=16) # Default fallback
|
|
|
|
# Try to extract font from the paragraph's words
|
|
try:
|
|
for _, word in paragraph.words():
|
|
if hasattr(word, 'font') and word.font:
|
|
paragraph_font = word.font
|
|
break
|
|
except:
|
|
pass # Use default if extraction fails
|
|
|
|
# Calculate available width using the page's padding system
|
|
padding_left = self._padding[3] # Left padding
|
|
padding_right = self._padding[1] # Right padding
|
|
available_width = self._size[0] - padding_left - padding_right
|
|
|
|
# Split into words
|
|
words = text_content.split()
|
|
if not words:
|
|
return None
|
|
|
|
# Import the Line class
|
|
from .text import Line
|
|
|
|
# Create lines using the proper Line class with justified alignment
|
|
lines = []
|
|
line_height = paragraph_font.font_size + 4 # Font size + small line spacing
|
|
word_spacing = (3, 8) # min, max spacing between words
|
|
|
|
# Create lines by adding words until they don't fit
|
|
word_index = 0
|
|
line_y_offset = 0
|
|
|
|
while word_index < len(words):
|
|
# Create a new line with proper bounding box
|
|
line_origin = (0, line_y_offset)
|
|
line_size = (available_width, line_height)
|
|
|
|
# Use JUSTIFY alignment for better text flow
|
|
line = Line(
|
|
spacing=word_spacing,
|
|
origin=line_origin,
|
|
size=line_size,
|
|
font=paragraph_font,
|
|
halign=Alignment.JUSTIFY
|
|
)
|
|
|
|
# Add words to this line until it's full
|
|
while word_index < len(words):
|
|
remaining_text = line.add_word(words[word_index], paragraph_font)
|
|
|
|
if remaining_text is None:
|
|
# Word fit completely
|
|
word_index += 1
|
|
else:
|
|
# Word didn't fit, move to next line
|
|
# Check if the remaining text is the same as the original word
|
|
if remaining_text == words[word_index]:
|
|
# Word couldn't fit at all, skip to next line
|
|
break
|
|
else:
|
|
# Word was partially fit (hyphenated), update the word
|
|
words[word_index] = remaining_text
|
|
break
|
|
|
|
# Add the line if it has any words
|
|
if len(line.renderable_words) > 0:
|
|
lines.append(line)
|
|
line_y_offset += line_height
|
|
else:
|
|
# Prevent infinite loop if no words can fit
|
|
word_index += 1
|
|
|
|
if not lines:
|
|
return None
|
|
|
|
# Create a container for the lines
|
|
total_height = len(lines) * line_height
|
|
paragraph_container = Container(
|
|
origin=(0, 0),
|
|
size=(available_width, total_height),
|
|
direction='vertical',
|
|
spacing=0, # Lines handle their own spacing
|
|
padding=(0, 0, 0, 0) # No additional padding since page handles it
|
|
)
|
|
|
|
# Add each line to the container
|
|
for line in lines:
|
|
paragraph_container.add_child(line)
|
|
|
|
return paragraph_container
|
|
|
|
def _convert_heading(self, heading: Heading) -> Optional[Text]:
|
|
"""Convert a heading block to a Text renderable with appropriate font."""
|
|
# Extract text content
|
|
words = []
|
|
for _, word in heading.words():
|
|
words.append(word.text)
|
|
|
|
if words:
|
|
text_content = ' '.join(words)
|
|
# Create heading font based on level
|
|
size_map = {
|
|
HeadingLevel.H1: 24,
|
|
HeadingLevel.H2: 20,
|
|
HeadingLevel.H3: 18,
|
|
HeadingLevel.H4: 16,
|
|
HeadingLevel.H5: 14,
|
|
HeadingLevel.H6: 12
|
|
}
|
|
|
|
font_size = size_map.get(heading.level, 16)
|
|
heading_font = Font(font_size=font_size, weight=FontWeight.BOLD)
|
|
|
|
return Text(text_content, heading_font)
|
|
return None
|
|
|
|
def _convert_list(self, hlist: HList) -> Optional[Container]:
|
|
"""Convert a list block to a Container with list items."""
|
|
list_container = Container(
|
|
origin=(0, 0),
|
|
size=(self._size[0] - 40, 100), # Adjust size as needed
|
|
direction='vertical',
|
|
spacing=5,
|
|
padding=(5, 20, 5, 20) # Add indentation
|
|
)
|
|
|
|
for item in hlist.items():
|
|
# Convert each list item
|
|
item_text = self._extract_text_from_block(item)
|
|
if item_text:
|
|
# Add bullet or number prefix
|
|
if hlist.style == ListStyle.UNORDERED:
|
|
prefix = "• "
|
|
else:
|
|
# For ordered lists, we'd need to track the index
|
|
prefix = "- "
|
|
|
|
item_font = Font()
|
|
full_text = prefix + item_text
|
|
text_renderable = Text(full_text, item_font)
|
|
list_container.add_child(text_renderable)
|
|
|
|
return list_container if list_container._children else None
|
|
|
|
def _convert_image(self, image: AbstractImage) -> Optional[Renderable]:
|
|
"""Convert an image block to a RenderableImage."""
|
|
try:
|
|
# Try to create the image
|
|
renderable_image = RenderableImage(image, max_width=400, max_height=300)
|
|
return renderable_image
|
|
except Exception as e:
|
|
print(f"Image rendering failed: {e}")
|
|
# Return placeholder text if image fails
|
|
error_font = Font(colour=(128, 128, 128))
|
|
return Text(f"[Image: {image.alt_text or image.src if hasattr(image, 'src') else 'Unknown'}]", error_font)
|
|
|
|
def _convert_generic_block(self, block: Block) -> Optional[Text]:
|
|
"""Convert a generic block by extracting its text content."""
|
|
text_content = self._extract_text_from_block(block)
|
|
if text_content:
|
|
return Text(text_content, Font())
|
|
return None
|
|
|
|
def _extract_text_from_block(self, block: Block) -> str:
|
|
"""Extract plain text content from any block type."""
|
|
if hasattr(block, 'words') and callable(block.words):
|
|
words = []
|
|
for _, word in block.words():
|
|
words.append(word.text)
|
|
return ' '.join(words)
|
|
elif hasattr(block, 'text'):
|
|
return str(block.text)
|
|
elif hasattr(block, '__str__'):
|
|
return str(block)
|
|
else:
|
|
return ""
|
|
|
|
def render(self) -> Image:
|
|
"""Render the page with all its content"""
|
|
# Make sure children are laid out
|
|
self.layout()
|
|
|
|
# Create base canvas with background color
|
|
canvas = Image.new(self._mode, tuple(self._size), self._background_color)
|
|
|
|
# Render each child and paste it onto the canvas
|
|
for child in self._children:
|
|
if hasattr(child, '_origin'):
|
|
child_img = child.render()
|
|
# Calculate child position relative to page
|
|
rel_pos = tuple(child._origin)
|
|
# Paste the child onto the canvas with alpha channel if available
|
|
if 'A' in self._mode and child_img.mode == 'RGBA':
|
|
canvas.paste(child_img, rel_pos, child_img)
|
|
else:
|
|
canvas.paste(child_img, rel_pos)
|
|
|
|
return canvas
|