from typing import List, Tuple, Optional, Dict, Any import numpy as np import re import os from urllib.parse import urljoin, urlparse from PIL import Image from pyWebLayout.core.base import Renderable, Layoutable from .box import Box from pyWebLayout.style.layout import Alignment from .text import Text from .image import RenderableImage from .functional import RenderableLink, RenderableButton from pyWebLayout.abstract.block import Block, Paragraph, Heading, HList, Image as AbstractImage, HeadingLevel, ListStyle from pyWebLayout.abstract.inline import Word from pyWebLayout.abstract.functional import Link, LinkType from pyWebLayout.style.fonts import Font, FontWeight, FontStyle, TextDecoration from pyWebLayout.typesetting.paragraph_layout import ParagraphLayout, ParagraphLayoutResult from pyWebLayout.io.readers.html_extraction import parse_html_string from pyWebLayout.typesetting.document_cursor import DocumentCursor, DocumentPosition class Container(Box, Layoutable): """ A container that can hold multiple renderable objects and lay them out. """ def __init__(self, origin, size, direction='vertical', spacing=5, callback=None, sheet=None, mode=None, halign=Alignment.CENTER, valign=Alignment.CENTER, padding: Tuple[int, int, int, int] = (10, 10, 10, 10)): """ Initialize a container. Args: origin: Top-left corner coordinates size: Width and height of the container direction: Layout direction ('vertical' or 'horizontal') spacing: Space between elements callback: Optional callback function sheet: Optional image sheet mode: Optional image mode halign: Horizontal alignment valign: Vertical alignment padding: Padding as (top, right, bottom, left) """ super().__init__(origin, size, callback, sheet, mode, halign, valign) self._children: List[Renderable] = [] self._direction = direction self._spacing = spacing self._padding = padding def add_child(self, child: Renderable): """Add a child element to this container""" self._children.append(child) return self def layout(self): """Layout the children according to the container's direction and spacing""" if not self._children: return # Get available space after padding padding_top, padding_right, padding_bottom, padding_left = self._padding available_width = self._size[0] - padding_left - padding_right available_height = self._size[1] - padding_top - padding_bottom # Calculate total content size if self._direction == 'vertical': total_height = sum(getattr(child, '_size', [0, 0])[1] for child in self._children) total_height += self._spacing * (len(self._children) - 1) # Position each child current_y = padding_top for child in self._children: if hasattr(child, '_size') and hasattr(child, '_origin'): child_width, child_height = child._size # Calculate horizontal position based on alignment if self._halign == Alignment.LEFT: x_pos = padding_left elif self._halign == Alignment.RIGHT: x_pos = padding_left + available_width - child_width else: # CENTER x_pos = padding_left + (available_width - child_width) // 2 # Set child position child._origin = np.array([x_pos, current_y]) # Move down for next child current_y += child_height + self._spacing # Layout the child if it's layoutable if isinstance(child, Layoutable): child.layout() else: # horizontal total_width = sum(getattr(child, '_size', [0, 0])[0] for child in self._children) total_width += self._spacing * (len(self._children) - 1) # Position each child current_x = padding_left for child in self._children: if hasattr(child, '_size') and hasattr(child, '_origin'): child_width, child_height = child._size # Calculate vertical position based on alignment if self._valign == Alignment.TOP: y_pos = padding_top elif self._valign == Alignment.BOTTOM: y_pos = padding_top + available_height - child_height else: # CENTER y_pos = padding_top + (available_height - child_height) // 2 # Set child position child._origin = np.array([current_x, y_pos]) # Move right for next child current_x += child_width + self._spacing # Layout the child if it's layoutable if isinstance(child, Layoutable): child.layout() def render(self) -> Image: """Render the container with all its children""" # Make sure children are laid out self.layout() # Create base canvas canvas = super().render() # Render each child and paste it onto the canvas for child in self._children: if hasattr(child, '_origin'): child_img = child.render() # Calculate child position relative to container rel_pos = tuple(child._origin - self._origin) # Paste the child onto the canvas canvas.paste(child_img, rel_pos, child_img) return canvas class Page(Container): """ Top-level container representing an HTML page. """ def __init__(self, size=(800, 600), background_color=(255, 255, 255), mode='RGBA'): """ Initialize a page. Args: size: Width and height of the page background_color: Background color as RGB tuple mode: Image mode """ super().__init__( origin=(0, 0), size=size, direction='vertical', spacing=10, mode=mode, halign=Alignment.CENTER, # Center horizontally to match test expectation valign=Alignment.TOP, padding=(10, 10, 10, 10) # Use 10 padding to match test expectation ) self._background_color = background_color def render_document(self, document, start_block: int = 0, max_blocks: Optional[int] = None) -> 'Page': """ Render blocks from a Document into this page. Args: document: The Document object to render start_block: Which block to start rendering from (for pagination) max_blocks: Maximum number of blocks to render (None for all remaining) Returns: Self for method chaining """ # Clear existing children self._children.clear() # Get blocks to render blocks = document.blocks[start_block:] if max_blocks is not None: blocks = blocks[:max_blocks] # Convert abstract blocks to renderable objects and add to page for block in blocks: renderable = self._convert_block_to_renderable(block) if renderable: self.add_child(renderable) return self def render_blocks(self, blocks: List[Block]) -> 'Page': """ Render a list of abstract blocks into this page. Args: blocks: List of Block objects to render Returns: Self for method chaining """ # Clear existing children self._children.clear() # Convert abstract blocks to renderable objects and add to page for block in blocks: renderable = self._convert_block_to_renderable(block) if renderable: self.add_child(renderable) return self def render_chapter(self, chapter) -> 'Page': """ Render a Chapter into this page. Args: chapter: The Chapter object to render Returns: Self for method chaining """ return self.render_blocks(chapter.blocks) def render_from_cursor(self, cursor: DocumentCursor, max_height: Optional[int] = None) -> Tuple['Page', DocumentCursor]: """ Render content starting from a document cursor position, filling the page and returning the cursor position where the page ends. Args: cursor: Starting position in the document max_height: Maximum height to fill (defaults to page height minus padding) Returns: Tuple of (self, end_cursor) where end_cursor points to where next page should start """ # Clear existing children self._children.clear() if max_height is None: max_height = self._size[1] - 40 # Account for top/bottom padding current_height = 0 end_cursor = DocumentCursor(cursor.document, cursor.position.copy()) # Keep adding content until we reach the height limit while current_height < max_height: # Get current block block = end_cursor.get_current_block() if block is None: break # End of document # Convert block to renderable renderable = self._convert_block_to_renderable(block) if renderable: # Check if adding this renderable would exceed height renderable_height = getattr(renderable, '_size', [0, 0])[1] if current_height + renderable_height > max_height: # This block would exceed the page - handle partial rendering if isinstance(block, Paragraph): # For paragraphs, we can render partial content partial_renderable = self._render_partial_paragraph( block, max_height - current_height, end_cursor ) if partial_renderable: self.add_child(partial_renderable) current_height += getattr(partial_renderable, '_size', [0, 0])[1] break else: # Add the full block self.add_child(renderable) current_height += renderable_height # Move cursor to next block if not end_cursor.advance_block(): break # End of document else: # Skip blocks that can't be rendered if not end_cursor.advance_block(): break return self, end_cursor def _render_partial_paragraph(self, paragraph: Paragraph, available_height: int, cursor: DocumentCursor) -> Optional[Container]: """ Render part of a paragraph that fits in the available height. Updates the cursor to point to the remaining content. Args: paragraph: The paragraph to partially render available_height: Available height for content cursor: Cursor to update with new position Returns: Container with partial paragraph content or None """ # Use the paragraph layout system to break into lines layout = ParagraphLayout( line_width=self._size[0] - 40, # Account for margins line_height=20, word_spacing=(3, 8), line_spacing=3, halign=Alignment.LEFT ) # Layout the paragraph into lines lines = layout.layout_paragraph(paragraph) if not lines: return None # Calculate how many lines we can fit line_height = 23 # 20 + 3 spacing max_lines = available_height // line_height if max_lines <= 0: return None # Take only the lines that fit lines_to_render = lines[:max_lines] # Update cursor position to point to remaining content if max_lines < len(lines): # We have remaining lines - update cursor to point to next line in paragraph cursor.position.paragraph_line_index = max_lines else: # We rendered the entire paragraph - cursor should advance to next block cursor.advance_block() # Create container for the partial paragraph paragraph_container = Container( origin=(0, 0), size=(self._size[0], len(lines_to_render) * line_height), direction='vertical', spacing=0, padding=(0, 0, 0, 0) ) # Add the lines we can fit for line in lines_to_render: paragraph_container.add_child(line) return paragraph_container def get_position_bookmark(self) -> Optional[DocumentPosition]: """ Get a bookmark position representing the start of content on this page. This can be used to return to this exact page later. Returns: DocumentPosition that can be used to recreate this page """ # This would be set by render_from_cursor method return getattr(self, '_start_position', None) def set_start_position(self, position: DocumentPosition): """ Set the document position that this page starts from. Args: position: The starting position for this page """ self._start_position = position def fill_with_blocks(self, blocks: List[Block], start_index: int = 0) -> Tuple[int, List[Block]]: """ Fill this page with blocks using the external pagination system. This method uses the new BlockPaginator system to handle different block types with appropriate handlers. It replaces the internal pagination logic and provides better support for partial content and remainders. Args: blocks: List of blocks to add to the page start_index: Index in blocks list to start from Returns: Tuple of (next_start_index, remainder_blocks) - next_start_index: Index where pagination stopped - remainder_blocks: Any partial blocks that need to continue on next page """ from pyWebLayout.typesetting.block_pagination import BlockPaginator paginator = BlockPaginator() return paginator.fill_page(self, blocks, start_index) def try_add_block_external(self, block: Block, available_height: Optional[int] = None) -> Tuple[bool, Optional[Block], int]: """ Try to add a single block to this page using external handlers. This method uses the BlockPaginator system to determine if a block can fit on the page and handle any remainder content. Args: block: The block to try to add available_height: Available height (defaults to remaining page height) Returns: Tuple of (success, remainder_block, height_used) - success: Whether the block was successfully added - remainder_block: Any remaining content that couldn't fit - height_used: Height consumed by the added content """ from pyWebLayout.typesetting.block_pagination import BlockPaginator if available_height is None: # Calculate available height based on current content current_height = self._calculate_current_content_height() max_height = self._size[1] - 40 # Account for padding available_height = max_height - current_height paginator = BlockPaginator() result = paginator.paginate_block(block, self, available_height) if result.success and result.renderable: self.add_child(result.renderable) return True, result.remainder, result.height_used else: return False, result.remainder if result.can_continue else None, 0 def _calculate_current_content_height(self) -> int: """Calculate the height currently used by content on this page.""" if not self._children: return 0 # Trigger layout to ensure positions are calculated self.layout() max_bottom = 0 for child in self._children: if hasattr(child, '_origin') and hasattr(child, '_size'): child_bottom = child._origin[1] + child._size[1] max_bottom = max(max_bottom, child_bottom) return max_bottom def _convert_block_to_renderable(self, block: Block) -> Optional[Renderable]: """ Convert an abstract block to a renderable object. Args: block: Abstract block to convert Returns: Renderable object or None if conversion failed """ try: if isinstance(block, Paragraph): return self._convert_paragraph(block) elif isinstance(block, Heading): return self._convert_heading(block) elif isinstance(block, HList): return self._convert_list(block) elif isinstance(block, AbstractImage): return self._convert_image(block) else: # For other block types, try to extract text content return self._convert_generic_block(block) except Exception as e: # Return error text for failed conversions error_font = Font(colour=(255, 0, 0)) return Text(f"[Conversion Error: {str(e)}]", error_font) def _convert_paragraph(self, paragraph: Paragraph) -> Optional[Container]: """Convert a paragraph block to a Container with proper Line objects.""" # Extract text content directly text_content = self._extract_text_from_block(paragraph) if not text_content: return None # Get the original font from the paragraph's first word paragraph_font = Font(font_size=16) # Default fallback # Try to extract font from the paragraph's words try: for _, word in paragraph.words(): if hasattr(word, 'font') and word.font: paragraph_font = word.font break except: pass # Use default if extraction fails # Calculate available width using the page's padding system padding_left = self._padding[3] # Left padding padding_right = self._padding[1] # Right padding available_width = self._size[0] - padding_left - padding_right # Split into words words = text_content.split() if not words: return None # Import the Line class from .text import Line # Create lines using the proper Line class with justified alignment lines = [] line_height = paragraph_font.font_size + 4 # Font size + small line spacing word_spacing = (3, 8) # min, max spacing between words # Create lines by adding words until they don't fit word_index = 0 line_y_offset = 0 while word_index < len(words): # Create a new line with proper bounding box line_origin = (0, line_y_offset) line_size = (available_width, line_height) # Use JUSTIFY alignment for better text flow line = Line( spacing=word_spacing, origin=line_origin, size=line_size, font=paragraph_font, halign=Alignment.JUSTIFY ) # Add words to this line until it's full while word_index < len(words): remaining_text = line.add_word(words[word_index], paragraph_font) if remaining_text is None: # Word fit completely word_index += 1 else: # Word didn't fit, move to next line # Check if the remaining text is the same as the original word if remaining_text == words[word_index]: # Word couldn't fit at all, skip to next line break else: # Word was partially fit (hyphenated), update the word words[word_index] = remaining_text break # Add the line if it has any words if len(line.renderable_words) > 0: lines.append(line) line_y_offset += line_height else: # Prevent infinite loop if no words can fit word_index += 1 if not lines: return None # Create a container for the lines total_height = len(lines) * line_height paragraph_container = Container( origin=(0, 0), size=(available_width, total_height), direction='vertical', spacing=0, # Lines handle their own spacing padding=(0, 0, 0, 0) # No additional padding since page handles it ) # Add each line to the container for line in lines: paragraph_container.add_child(line) return paragraph_container def _convert_heading(self, heading: Heading) -> Optional[Text]: """Convert a heading block to a Text renderable with appropriate font.""" # Extract text content words = [] for _, word in heading.words(): words.append(word.text) if words: text_content = ' '.join(words) # Create heading font based on level size_map = { HeadingLevel.H1: 24, HeadingLevel.H2: 20, HeadingLevel.H3: 18, HeadingLevel.H4: 16, HeadingLevel.H5: 14, HeadingLevel.H6: 12 } font_size = size_map.get(heading.level, 16) heading_font = Font(font_size=font_size, weight=FontWeight.BOLD) return Text(text_content, heading_font) return None def _convert_list(self, hlist: HList) -> Optional[Container]: """Convert a list block to a Container with list items.""" list_container = Container( origin=(0, 0), size=(self._size[0] - 40, 100), # Adjust size as needed direction='vertical', spacing=5, padding=(5, 20, 5, 20) # Add indentation ) for item in hlist.items(): # Convert each list item item_text = self._extract_text_from_block(item) if item_text: # Add bullet or number prefix if hlist.style == ListStyle.UNORDERED: prefix = "• " else: # For ordered lists, we'd need to track the index prefix = "- " item_font = Font() full_text = prefix + item_text text_renderable = Text(full_text, item_font) list_container.add_child(text_renderable) return list_container if list_container._children else None def _convert_image(self, image: AbstractImage) -> Optional[Renderable]: """Convert an image block to a RenderableImage.""" try: # Try to create the image renderable_image = RenderableImage(image, max_width=400, max_height=300) return renderable_image except Exception as e: print(f"Image rendering failed: {e}") # Return placeholder text if image fails error_font = Font(colour=(128, 128, 128)) return Text(f"[Image: {image.alt_text or image.src if hasattr(image, 'src') else 'Unknown'}]", error_font) def _convert_generic_block(self, block: Block) -> Optional[Text]: """Convert a generic block by extracting its text content.""" text_content = self._extract_text_from_block(block) if text_content: return Text(text_content, Font()) return None def _extract_text_from_block(self, block: Block) -> str: """Extract plain text content from any block type.""" if hasattr(block, 'words') and callable(block.words): words = [] for _, word in block.words(): words.append(word.text) return ' '.join(words) elif hasattr(block, 'text'): return str(block.text) elif hasattr(block, '__str__'): return str(block) else: return "" def render(self) -> Image: """Render the page with all its content""" # Make sure children are laid out self.layout() # Create base canvas with background color canvas = Image.new(self._mode, tuple(self._size), self._background_color) # Render each child and paste it onto the canvas for child in self._children: if hasattr(child, '_origin'): child_img = child.render() # Calculate child position relative to page rel_pos = tuple(child._origin) # Paste the child onto the canvas with alpha channel if available if 'A' in self._mode and child_img.mode == 'RGBA': canvas.paste(child_img, rel_pos, child_img) else: canvas.paste(child_img, rel_pos) return canvas