pyWebLayout/pyWebLayout/concrete/page.py

from typing import List, Tuple, Optional, Dict, Any
import numpy as np
import re
import os
from urllib.parse import urljoin, urlparse
from PIL import Image

from pyWebLayout.core.base import Renderable, Layoutable
from .box import Box
from pyWebLayout.style.layout import Alignment
from .text import Text
from .image import RenderableImage
from .functional import RenderableLink, RenderableButton
from pyWebLayout.abstract.block import Block, Paragraph, Heading, HList, Image as AbstractImage, HeadingLevel, ListStyle
from pyWebLayout.abstract.inline import Word
from pyWebLayout.abstract.functional import Link, LinkType
from pyWebLayout.style.fonts import Font, FontWeight, FontStyle, TextDecoration
from pyWebLayout.typesetting.paragraph_layout import ParagraphLayout, ParagraphLayoutResult
from pyWebLayout.io.readers.html_extraction import parse_html_string
from pyWebLayout.typesetting.document_cursor import DocumentCursor, DocumentPosition


class Container(Box, Layoutable):
    """
    A container that can hold multiple renderable objects and lay them out.
    """
    def __init__(self, origin, size, direction='vertical', spacing=5,
                 callback=None, sheet=None, mode=None,
                 halign=Alignment.CENTER, valign=Alignment.CENTER,
                 padding: Tuple[int, int, int, int] = (10, 10, 10, 10)):
        """
        Initialize a container.

        Args:
            origin: Top-left corner coordinates
            size: Width and height of the container
            direction: Layout direction ('vertical' or 'horizontal')
            spacing: Space between elements
            callback: Optional callback function
            sheet: Optional image sheet
            mode: Optional image mode
            halign: Horizontal alignment
            valign: Vertical alignment
            padding: Padding as (top, right, bottom, left)
        """
        super().__init__(origin, size, callback, sheet, mode, halign, valign)
        self._children: List[Renderable] = []
        self._direction = direction
        self._spacing = spacing
        self._padding = padding

    def add_child(self, child: Renderable):
        """Add a child element to this container"""
        self._children.append(child)
        return self

    def layout(self):
        """Layout the children according to the container's direction and spacing"""
        if not self._children:
            return

        # Get available space after padding
        padding_top, padding_right, padding_bottom, padding_left = self._padding
        available_width = self._size[0] - padding_left - padding_right
        available_height = self._size[1] - padding_top - padding_bottom

        # Calculate total content size
        if self._direction == 'vertical':
            total_height = sum(getattr(child, '_size', [0, 0])[1] for child in self._children)
            total_height += self._spacing * (len(self._children) - 1)

            # Position each child
            current_y = padding_top
            for child in self._children:
                if hasattr(child, '_size') and hasattr(child, '_origin'):
                    child_width, child_height = child._size

                    # Calculate horizontal position based on alignment
                    if self._halign == Alignment.LEFT:
                        x_pos = padding_left
                    elif self._halign == Alignment.RIGHT:
                        x_pos = padding_left + available_width - child_width
                    else:  # CENTER
                        x_pos = padding_left + (available_width - child_width) // 2

                    # Set child position
                    child._origin = np.array([x_pos, current_y])

                    # Move down for next child
                    current_y += child_height + self._spacing

                    # Layout the child if it's layoutable
                    if isinstance(child, Layoutable):
                        child.layout()

        else:  # horizontal
            total_width = sum(getattr(child, '_size', [0, 0])[0] for child in self._children)
            total_width += self._spacing * (len(self._children) - 1)

            # Position each child
            current_x = padding_left
            for child in self._children:
                if hasattr(child, '_size') and hasattr(child, '_origin'):
                    child_width, child_height = child._size

                    # Calculate vertical position based on alignment
                    if self._valign == Alignment.TOP:
                        y_pos = padding_top
                    elif self._valign == Alignment.BOTTOM:
                        y_pos = padding_top + available_height - child_height
                    else:  # CENTER
                        y_pos = padding_top + (available_height - child_height) // 2

                    # Set child position
                    child._origin = np.array([current_x, y_pos])

                    # Move right for next child
                    current_x += child_width + self._spacing

                    # Layout the child if it's layoutable
                    if isinstance(child, Layoutable):
                        child.layout()

    def render(self) -> Image:
        """Render the container with all its children"""
        # Make sure children are laid out
        self.layout()

        # Create base canvas
        canvas = super().render()

        # Render each child and paste it onto the canvas
        for child in self._children:
            if hasattr(child, '_origin'):
                child_img = child.render()
                # Calculate child position relative to container
                rel_pos = tuple(child._origin - self._origin)
                # Paste the child onto the canvas
                canvas.paste(child_img, rel_pos, child_img)

        return canvas


class Page(Container):
    """
    Top-level container representing an HTML page.
    """
    def __init__(self, size=(800, 600), background_color=(255, 255, 255), mode='RGBA'):
        """
        Initialize a page.

        Args:
            size: Width and height of the page
            background_color: Background color as RGB tuple
            mode: Image mode
        """
        super().__init__(
            origin=(0, 0),
            size=size,
            direction='vertical',
            spacing=10,
            mode=mode,
            halign=Alignment.CENTER,  # Center horizontally to match test expectation
            valign=Alignment.TOP,
            padding=(10, 10, 10, 10)  # Use 10 padding to match test expectation
        )
        self._background_color = background_color

    def render_document(self, document, start_block: int = 0, max_blocks: Optional[int] = None) -> 'Page':
        """
        Render blocks from a Document into this page.

        Args:
            document: The Document object to render
            start_block: Which block to start rendering from (for pagination)
            max_blocks: Maximum number of blocks to render (None for all remaining)

        Returns:
            Self for method chaining
        """
        # Clear existing children
        self._children.clear()

        # Get blocks to render
        blocks = document.blocks[start_block:]
        if max_blocks is not None:
            blocks = blocks[:max_blocks]

        # Convert abstract blocks to renderable objects and add to page
        for block in blocks:
            renderable = self._convert_block_to_renderable(block)
            if renderable:
                self.add_child(renderable)

        return self

    def render_blocks(self, blocks: List[Block]) -> 'Page':
        """
        Render a list of abstract blocks into this page.

        Args:
            blocks: List of Block objects to render

        Returns:
            Self for method chaining
        """
        # Clear existing children
        self._children.clear()

        # Convert abstract blocks to renderable objects and add to page
        for block in blocks:
            renderable = self._convert_block_to_renderable(block)
            if renderable:
                self.add_child(renderable)

        return self

    def render_chapter(self, chapter) -> 'Page':
        """
        Render a Chapter into this page.

        Args:
            chapter: The Chapter object to render

        Returns:
            Self for method chaining
        """
        return self.render_blocks(chapter.blocks)

    def render_from_cursor(self, cursor: DocumentCursor, max_height: Optional[int] = None) -> Tuple['Page', DocumentCursor]:
        """
        Render content starting from a document cursor position, filling the page
        and returning the cursor position where the page ends.

        Args:
            cursor: Starting position in the document
            max_height: Maximum height to fill (defaults to page height minus padding)

        Returns:
            Tuple of (self, end_cursor) where end_cursor points to where next page should start
        """
        # Clear existing children
        self._children.clear()

        if max_height is None:
            max_height = self._size[1] - 40  # Account for top/bottom padding

        current_height = 0
        end_cursor = DocumentCursor(cursor.document, cursor.position.copy())

        # Keep adding content until we reach the height limit
        while current_height < max_height:
            # Get current block
            block = end_cursor.get_current_block()
            if block is None:
                break  # End of document

            # Convert block to renderable
            renderable = self._convert_block_to_renderable(block)
            if renderable:
                # Check if adding this renderable would exceed height
                renderable_height = getattr(renderable, '_size', [0, 0])[1]

                if current_height + renderable_height > max_height:
                    # This block would exceed the page - handle partial rendering
                    if isinstance(block, Paragraph):
                        # For paragraphs, we can render partial content
                        partial_renderable = self._render_partial_paragraph(
                            block, max_height - current_height, end_cursor
                        )
                        if partial_renderable:
                            self.add_child(partial_renderable)
                            current_height += getattr(partial_renderable, '_size', [0, 0])[1]
                    break
                else:
                    # Add the full block
                    self.add_child(renderable)
                    current_height += renderable_height

                    # Move cursor to next block
                    if not end_cursor.advance_block():
                        break  # End of document
            else:
                # Skip blocks that can't be rendered
                if not end_cursor.advance_block():
                    break

        return self, end_cursor

    def _render_partial_paragraph(self, paragraph: Paragraph, available_height: int, cursor: DocumentCursor) -> Optional[Container]:
        """
        Render part of a paragraph that fits in the available height.
        Updates the cursor to point to the remaining content.

        Args:
            paragraph: The paragraph to partially render
            available_height: Available height for content
            cursor: Cursor to update with new position

        Returns:
            Container with partial paragraph content or None
        """
        # Use the paragraph layout system to break into lines
        layout = ParagraphLayout(
            line_width=self._size[0] - 40,  # Account for margins
            line_height=20,
            word_spacing=(3, 8),
            line_spacing=3,
            halign=Alignment.LEFT
        )

        # Layout the paragraph into lines
        lines = layout.layout_paragraph(paragraph)

        if not lines:
            return None

        # Calculate how many lines we can fit
        line_height = 23  # 20 + 3 spacing
        max_lines = available_height // line_height

        if max_lines <= 0:
            return None

        # Take only the lines that fit
        lines_to_render = lines[:max_lines]

        # Update cursor position to point to remaining content
        if max_lines < len(lines):
            # We have remaining lines - update cursor to point to next line in paragraph
            cursor.position.paragraph_line_index = max_lines
        else:
            # We rendered the entire paragraph - cursor should advance to next block
            cursor.advance_block()

        # Create container for the partial paragraph
        paragraph_container = Container(
            origin=(0, 0),
            size=(self._size[0], len(lines_to_render) * line_height),
            direction='vertical',
            spacing=0,
            padding=(0, 0, 0, 0)
        )

        # Add the lines we can fit
        for line in lines_to_render:
            paragraph_container.add_child(line)

        return paragraph_container

    def get_position_bookmark(self) -> Optional[DocumentPosition]:
        """
        Get a bookmark position representing the start of content on this page.
        This can be used to return to this exact page later.

        Returns:
            DocumentPosition that can be used to recreate this page
        """
        # This would be set by render_from_cursor method
        return getattr(self, '_start_position', None)

    def set_start_position(self, position: DocumentPosition):
        """
        Set the document position that this page starts from.

        Args:
            position: The starting position for this page
        """
        self._start_position = position

    def fill_with_blocks(self, blocks: List[Block], start_index: int = 0) -> Tuple[int, List[Block]]:
        """
        Fill this page with blocks using the external pagination system.

        This method uses the new BlockPaginator system to handle different
        block types with appropriate handlers. It replaces the internal
        pagination logic and provides better support for partial content
        and remainders.

        Args:
            blocks: List of blocks to add to the page
            start_index: Index in blocks list to start from

        Returns:
            Tuple of (next_start_index, remainder_blocks)
            - next_start_index: Index where pagination stopped
            - remainder_blocks: Any partial blocks that need to continue on next page
        """
        from pyWebLayout.typesetting.block_pagination import BlockPaginator

        paginator = BlockPaginator()
        return paginator.fill_page(self, blocks, start_index)

    def try_add_block_external(self, block: Block, available_height: Optional[int] = None) -> Tuple[bool, Optional[Block], int]:
        """
        Try to add a single block to this page using external handlers.

        This method uses the BlockPaginator system to determine if a block
        can fit on the page and handle any remainder content.

        Args:
            block: The block to try to add
            available_height: Available height (defaults to remaining page height)

        Returns:
            Tuple of (success, remainder_block, height_used)
            - success: Whether the block was successfully added
            - remainder_block: Any remaining content that couldn't fit
            - height_used: Height consumed by the added content
        """
        from pyWebLayout.typesetting.block_pagination import BlockPaginator

        if available_height is None:
            # Calculate available height based on current content
            current_height = self._calculate_current_content_height()
            max_height = self._size[1] - 40  # Account for padding
            available_height = max_height - current_height

        paginator = BlockPaginator()
        result = paginator.paginate_block(block, self, available_height)

        if result.success and result.renderable:
            self.add_child(result.renderable)
            return True, result.remainder, result.height_used
        else:
            return False, result.remainder if result.can_continue else None, 0

    def _calculate_current_content_height(self) -> int:
        """Calculate the height currently used by content on this page."""
        if not self._children:
            return 0

        # Trigger layout to ensure positions are calculated
        self.layout()

        max_bottom = 0
        for child in self._children:
            if hasattr(child, '_origin') and hasattr(child, '_size'):
                child_bottom = child._origin[1] + child._size[1]
                max_bottom = max(max_bottom, child_bottom)

        return max_bottom

    def _convert_block_to_renderable(self, block: Block) -> Optional[Renderable]:
        """
        Convert an abstract block to a renderable object.

        Args:
            block: Abstract block to convert

        Returns:
            Renderable object or None if conversion failed
        """
        try:
            if isinstance(block, Paragraph):
                return self._convert_paragraph(block)
            elif isinstance(block, Heading):
                return self._convert_heading(block)
            elif isinstance(block, HList):
                return self._convert_list(block)
            elif isinstance(block, AbstractImage):
                return self._convert_image(block)
            else:
                # For other block types, try to extract text content
                return self._convert_generic_block(block)
        except Exception as e:
            # Return error text for failed conversions
            error_font = Font(colour=(255, 0, 0))
            return Text(f"[Conversion Error: {str(e)}]", error_font)

    def _convert_paragraph(self, paragraph: Paragraph) -> Optional[Container]:
        """Convert a paragraph block to a Container with proper Line objects."""
        # Extract text content directly
        text_content = self._extract_text_from_block(paragraph)
        if not text_content:
            return None

        # Get the original font from the paragraph's first word
        paragraph_font = Font(font_size=16)  # Default fallback

        # Try to extract font from the paragraph's words
        try:
            for _, word in paragraph.words():
                if hasattr(word, 'font') and word.font:
                    paragraph_font = word.font
                    break
        except:
            pass  # Use default if extraction fails

        # Calculate available width using the page's padding system
        padding_left = self._padding[3]  # Left padding
        padding_right = self._padding[1]  # Right padding
        available_width = self._size[0] - padding_left - padding_right

        # Split into words
        words = text_content.split()
        if not words:
            return None

        # Import the Line class
        from .text import Line

        # Create lines using the proper Line class with justified alignment
        lines = []
        line_height = paragraph_font.font_size + 4  # Font size + small line spacing
        word_spacing = (3, 8)  # min, max spacing between words

        # Create lines by adding words until they don't fit
        word_index = 0
        line_y_offset = 0

        while word_index < len(words):
            # Create a new line with proper bounding box
            line_origin = (0, line_y_offset)
            line_size = (available_width, line_height)

            # Use JUSTIFY alignment for better text flow
            line = Line(
                spacing=word_spacing,
                origin=line_origin,
                size=line_size,
                font=paragraph_font,
                halign=Alignment.JUSTIFY
            )

            # Add words to this line until it's full
            while word_index < len(words):
                remaining_text = line.add_word(words[word_index], paragraph_font)

                if remaining_text is None:
                    # Word fit completely
                    word_index += 1
                else:
                    # Word didn't fit, move to next line
                    # Check if the remaining text is the same as the original word
                    if remaining_text == words[word_index]:
                        # Word couldn't fit at all, skip to next line
                        break
                    else:
                        # Word was partially fit (hyphenated), update the word
                        words[word_index] = remaining_text
                        break

            # Add the line if it has any words
            if len(line.renderable_words) > 0:
                lines.append(line)
                line_y_offset += line_height
            else:
                # Prevent infinite loop if no words can fit
                word_index += 1

        if not lines:
            return None

        # Create a container for the lines
        total_height = len(lines) * line_height
        paragraph_container = Container(
            origin=(0, 0),
            size=(available_width, total_height),
            direction='vertical',
            spacing=0,  # Lines handle their own spacing
            padding=(0, 0, 0, 0)  # No additional padding since page handles it
        )

        # Add each line to the container
        for line in lines:
            paragraph_container.add_child(line)

        return paragraph_container

    def _convert_heading(self, heading: Heading) -> Optional[Text]:
        """Convert a heading block to a Text renderable with appropriate font."""
        # Extract text content
        words = []
        for _, word in heading.words():
            words.append(word.text)

        if words:
            text_content = ' '.join(words)
            # Create heading font based on level
            size_map = {
                HeadingLevel.H1: 24,
                HeadingLevel.H2: 20,
                HeadingLevel.H3: 18,
                HeadingLevel.H4: 16,
                HeadingLevel.H5: 14,
                HeadingLevel.H6: 12
            }

            font_size = size_map.get(heading.level, 16)
            heading_font = Font(font_size=font_size, weight=FontWeight.BOLD)

            return Text(text_content, heading_font)
        return None

    def _convert_list(self, hlist: HList) -> Optional[Container]:
        """Convert a list block to a Container with list items."""
        list_container = Container(
            origin=(0, 0),
            size=(self._size[0] - 40, 100),  # Adjust size as needed
            direction='vertical',
            spacing=5,
            padding=(5, 20, 5, 20)  # Add indentation
        )

        for item in hlist.items():
            # Convert each list item
            item_text = self._extract_text_from_block(item)
            if item_text:
                # Add bullet or number prefix
                if hlist.style == ListStyle.UNORDERED:
                    prefix = "• "
                else:
                    # For ordered lists, we'd need to track the index
                    prefix = "- "

                item_font = Font()
                full_text = prefix + item_text
                text_renderable = Text(full_text, item_font)
                list_container.add_child(text_renderable)

        return list_container if list_container._children else None

    def _convert_image(self, image: AbstractImage) -> Optional[Renderable]:
        """Convert an image block to a RenderableImage."""
        try:
            # Try to create the image
            renderable_image = RenderableImage(image, max_width=400, max_height=300)
            return renderable_image
        except Exception as e:
            print(f"Image rendering failed: {e}")
            # Return placeholder text if image fails
            error_font = Font(colour=(128, 128, 128))
            return Text(f"[Image: {image.alt_text or image.src if hasattr(image, 'src') else 'Unknown'}]", error_font)

    def _convert_generic_block(self, block: Block) -> Optional[Text]:
        """Convert a generic block by extracting its text content."""
        text_content = self._extract_text_from_block(block)
        if text_content:
            return Text(text_content, Font())
        return None

    def _extract_text_from_block(self, block: Block) -> str:
        """Extract plain text content from any block type."""
        if hasattr(block, 'words') and callable(block.words):
            words = []
            for _, word in block.words():
                words.append(word.text)
            return ' '.join(words)
        elif hasattr(block, 'text'):
            return str(block.text)
        elif hasattr(block, '__str__'):
            return str(block)
        else:
            return ""

    def render(self) -> Image:
        """Render the page with all its content"""
        # Make sure children are laid out
        self.layout()

        # Create base canvas with background color
        canvas = Image.new(self._mode, tuple(self._size), self._background_color)

        # Render each child and paste it onto the canvas
        for child in self._children:
            if hasattr(child, '_origin'):
                child_img = child.render()
                # Calculate child position relative to page
                rel_pos = tuple(child._origin)
                # Paste the child onto the canvas with alpha channel if available
                if 'A' in self._mode and child_img.mode == 'RGBA':
                    canvas.paste(child_img, rel_pos, child_img)
                else:
                    canvas.paste(child_img, rel_pos)

        return canvas