pyWebLayout/html_browser.py

#!/usr/bin/env python3
"""
Simple HTML Browser using pyWebLayout

This browser can render basic HTML content using the pyWebLayout concrete objects.
It supports text, images, links, forms, and basic styling.
"""

import re
import tkinter as tk
from tkinter import ttk, messagebox, filedialog, simpledialog
from PIL import Image, ImageTk, ImageDraw
from typing import Dict, List, Optional, Tuple, Any
import webbrowser
import os
from urllib.parse import urljoin, urlparse
import requests
from io import BytesIO
import pyperclip

# Import pyWebLayout components
from pyWebLayout.concrete import (
    Page, Container, Box, Text, RenderableImage,
    RenderableLink, RenderableButton, RenderableForm, RenderableFormField
)
from pyWebLayout.abstract.functional import (
    Link, Button, Form, FormField, LinkType, FormFieldType
)
from pyWebLayout.abstract.block import Paragraph
from pyWebLayout.abstract.inline import Word
from pyWebLayout.style.fonts import Font, FontWeight, FontStyle, TextDecoration
from pyWebLayout.style.layout import Alignment
from pyWebLayout.typesetting.paragraph_layout import ParagraphLayout, ParagraphLayoutResult


class HTMLParser:
    """Simple HTML parser that converts HTML to pyWebLayout objects"""

    def __init__(self):
        self.font_stack = [Font(font_size=14)]  # Default font
        self.current_container = None

    def parse_html_string(self, html_content: str, base_url: str = "") -> Page:
        """Parse HTML string and return a Page object"""
        # Create the main page
        page = Page(size=(800, 10000), background_color=(255, 255, 255))
        self.current_container = page
        self.base_url = base_url

        # Simple HTML parsing using regex (not production-ready, but works for demo)
        # Remove comments and scripts
        html_content = re.sub(r'<!--.*?-->', '', html_content, flags=re.DOTALL)
        html_content = re.sub(r'<script.*?</script>', '', html_content, flags=re.DOTALL)
        html_content = re.sub(r'<style.*?</style>', '', html_content, flags=re.DOTALL)

        # Extract title
        title_match = re.search(r'<title>(.*?)</title>', html_content, re.IGNORECASE)
        if title_match:
            page.title = title_match.group(1)

        # Extract body content
        body_match = re.search(r'<body[^>]*>(.*?)</body>', html_content, re.DOTALL | re.IGNORECASE)
        if body_match:
            body_content = body_match.group(1)
        else:
            # If no body tag, use the entire content
            body_content = html_content

        # Parse the body content
        self._parse_content(body_content, page)

        return page

    def parse_html_file(self, file_path: str) -> Page:
        """Parse HTML file and return a Page object"""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                html_content = f.read()
            base_url = os.path.dirname(os.path.abspath(file_path))
            return self.parse_html_string(html_content, base_url)
        except Exception as e:
            # Create error page
            page = Page(size=(800, 10000), background_color=(255, 255, 255))
            error_text = Text(f"Error loading file: {str(e)}", Font(font_size=16, colour=(255, 0, 0)))
            page.add_child(error_text)
            return page

    def _parse_content(self, content: str, container: Container):
        """Parse HTML content and add elements to container"""
        # Simple token-based parsing
        tokens = self._tokenize_html(content)

        # Group tokens into paragraphs and other elements
        self._process_tokens_into_elements(tokens, container)

    def _process_tokens_into_elements(self, tokens: List[Dict], container: Container):
        """Process tokens and create appropriate elements (paragraphs, images, etc.)"""
        i = 0
        current_paragraph_content = []

        while i < len(tokens):
            token = tokens[i]

            if token['type'] == 'text':
                if token['content'].strip():  # Only add non-empty text
                    current_paragraph_content.append((token['content'].strip(), self.font_stack[-1]))

            elif token['type'] == 'tag':
                tag_name = token['name']
                is_closing = token['closing']

                # Handle block-level elements that should end the current paragraph
                if tag_name in ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'br', 'img'] and not is_closing:
                    # Finalize any pending paragraph content
                    if current_paragraph_content:
                        self._create_and_add_paragraph(current_paragraph_content, container)
                        current_paragraph_content = []

                    # Handle the block element
                    if tag_name == 'p':
                        # Start a new paragraph
                        i = self._handle_paragraph_tag(token, tokens, i, container)
                        continue
                    elif tag_name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
                        # Handle header
                        i = self._handle_header_tag(token, tokens, i, container)
                        continue
                    elif tag_name == 'br':
                        # Add line break
                        spacer = Box((0, 0), (1, 10))
                        container.add_child(spacer)
                    elif tag_name == 'img':
                        # Handle image
                        self._handle_tag(token, container)
                    elif tag_name == 'div':
                        # Continue processing div content
                        pass

                # Handle inline elements or continue processing
                elif tag_name in ['b', 'strong', 'i', 'em', 'u', 'a']:
                    i = self._handle_inline_tag_with_content(token, tokens, i, current_paragraph_content)
                    continue
                else:
                    # Handle other tags normally
                    self._handle_tag(token, container)

            i += 1

        # Finalize any remaining paragraph content
        if current_paragraph_content:
            self._create_and_add_paragraph(current_paragraph_content, container)

    def _create_and_add_paragraph(self, content_list: List[Tuple[str, Font]], container: Container):
        """Create a paragraph from content and add it to the container using proper layout"""
        if not content_list:
            return

        # Create a paragraph object
        paragraph = Paragraph(style=content_list[0][1])  # Use first font as paragraph style

        # Add words to the paragraph
        for text_content, font in content_list:
            words = text_content.split()
            for word_text in words:
                if word_text.strip():
                    word = Word(word_text.strip(), font)
                    paragraph.add_word(word)

        # Use paragraph layout to break into lines
        layout = ParagraphLayout(
            line_width=750,  # Page width minus margins
            line_height=20,
            word_spacing=(3, 8),
            line_spacing=3,
            halign=Alignment.LEFT
        )

        # Layout the paragraph into lines
        lines = layout.layout_paragraph(paragraph)

        # Add each line to the container
        for line in lines:
            container.add_child(line)

        # Add some space after the paragraph
        spacer = Box((0, 0), (1, 5))
        container.add_child(spacer)

    def _handle_paragraph_tag(self, token, tokens, current_index, container):
        """Handle paragraph tags with proper text flow"""
        content_start = current_index + 1
        content_end = self._find_matching_closing_tag(tokens, current_index, 'p')

        # Collect content within the paragraph
        paragraph_content = []

        i = content_start
        while i < content_end:
            content_token = tokens[i]
            if content_token['type'] == 'text':
                if content_token['content'].strip():
                    paragraph_content.append((content_token['content'].strip(), self.font_stack[-1]))
            elif content_token['type'] == 'tag' and not content_token['closing']:
                # Handle inline formatting within paragraph
                if content_token['name'] in ['b', 'strong', 'i', 'em', 'u', 'a']:
                    i = self._handle_inline_tag_with_content(content_token, tokens, i, paragraph_content)
                    continue
            i += 1

        # Create and add the paragraph
        if paragraph_content:
            self._create_and_add_paragraph(paragraph_content, container)

        return content_end + 1 if content_end < len(tokens) else len(tokens)

    def _handle_header_tag(self, token, tokens, current_index, container):
        """Handle header tags with proper styling"""
        tag_name = token['name']

        # Push header font onto stack
        size_map = {'h1': 24, 'h2': 20, 'h3': 18, 'h4': 16, 'h5': 14, 'h6': 12}
        font = self.font_stack[-1].with_size(size_map[tag_name]).with_weight(FontWeight.BOLD)
        self.font_stack.append(font)

        content_start = current_index + 1
        content_end = self._find_matching_closing_tag(tokens, current_index, tag_name)

        # Collect header content
        header_content = []

        i = content_start
        while i < content_end:
            content_token = tokens[i]
            if content_token['type'] == 'text':
                if content_token['content'].strip():
                    header_content.append((content_token['content'].strip(), self.font_stack[-1]))
            elif content_token['type'] == 'tag' and not content_token['closing']:
                # Handle inline formatting within header
                if content_token['name'] in ['b', 'strong', 'i', 'em', 'u']:
                    i = self._handle_inline_tag_with_content(content_token, tokens, i, header_content)
                    continue
            i += 1

        # Pop the header font
        if len(self.font_stack) > 1:
            self.font_stack.pop()

        # Create and add the header paragraph with extra spacing
        if header_content:
            self._create_and_add_paragraph(header_content, container)
            # Add extra space after headers
            spacer = Box((0, 0), (1, 10))
            container.add_child(spacer)

        return content_end + 1 if content_end < len(tokens) else len(tokens)

    def _handle_inline_tag_with_content(self, token, tokens, current_index, paragraph_content):
        """Handle inline formatting tags and collect their content"""
        tag_name = token['name']

        # Push formatted font onto stack
        if tag_name in ['b', 'strong']:
            font = self.font_stack[-1].with_weight(FontWeight.BOLD)
            self.font_stack.append(font)
        elif tag_name in ['i', 'em']:
            font = self.font_stack[-1].with_style(FontStyle.ITALIC)
            self.font_stack.append(font)
        elif tag_name == 'u':
            font = self.font_stack[-1].with_decoration(TextDecoration.UNDERLINE)
            self.font_stack.append(font)
        elif tag_name == 'a':
            font = self.font_stack[-1].with_colour((0, 0, 255)).with_decoration(TextDecoration.UNDERLINE)
            self.font_stack.append(font)

        content_start = current_index + 1
        content_end = self._find_matching_closing_tag(tokens, current_index, tag_name)

        # Collect content with the formatting applied
        i = content_start
        while i < content_end:
            content_token = tokens[i]
            if content_token['type'] == 'text':
                if content_token['content'].strip():
                    paragraph_content.append((content_token['content'].strip(), self.font_stack[-1]))
            elif content_token['type'] == 'tag' and not content_token['closing']:
                # Handle nested inline formatting
                if content_token['name'] in ['b', 'strong', 'i', 'em', 'u']:
                    i = self._handle_inline_tag_with_content(content_token, tokens, i, paragraph_content)
                    continue
            i += 1

        # Pop the formatting font
        if len(self.font_stack) > 1:
            self.font_stack.pop()

        return content_end + 1 if content_end < len(tokens) else len(tokens)

    def _handle_tag_with_content(self, token, tokens, current_index, container):
        """Handle tags and their content, returning the new index position"""
        tag_name = token['name']
        is_closing = token['closing']

        if is_closing:
            # Handle closing tags
            if tag_name in ['b', 'strong', 'i', 'em', 'u', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
                if len(self.font_stack) > 1:  # Don't pop the last font
                    self.font_stack.pop()
            return current_index + 1

        # For opening tags that affect text styling, parse their content with the new style
        if tag_name in ['b', 'strong', 'i', 'em', 'u', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
            # Push new font onto stack
            self._handle_tag(token, container)

            # Find the matching closing tag and parse content in between
            content_start = current_index + 1
            content_end = self._find_matching_closing_tag(tokens, current_index, tag_name)

            if content_end > content_start:
                # Parse content between opening and closing tags with current font style
                for j in range(content_start, content_end):
                    content_token = tokens[j]
                    if content_token['type'] == 'text':
                        if content_token['content'].strip():
                            text_obj = Text(content_token['content'].strip(), self.font_stack[-1])
                            container.add_child(text_obj)
                    elif content_token['type'] == 'tag' and not content_token['closing']:
                        # Handle nested tags
                        self._handle_tag(content_token, container)

            # Pop the font from stack
            if len(self.font_stack) > 1:
                self.font_stack.pop()

            return content_end + 1 if content_end < len(tokens) else len(tokens)

        else:
            # Handle other tags normally
            self._handle_tag(token, container)
            return current_index + 1

    def _find_matching_closing_tag(self, tokens, start_index, tag_name):
        """Find the index of the matching closing tag"""
        open_count = 1
        i = start_index + 1

        while i < len(tokens) and open_count > 0:
            token = tokens[i]
            if token['type'] == 'tag' and token['name'] == tag_name:
                if token['closing']:
                    open_count -= 1
                else:
                    open_count += 1
            i += 1

        return i - 1 if open_count == 0 else len(tokens)

    def _tokenize_html(self, content: str) -> List[Dict]:
        """Simple HTML tokenizer"""
        tokens = []
        tag_pattern = r'<(/?)([^>]+)>'

        last_end = 0
        for match in re.finditer(tag_pattern, content):
            # Add text before tag
            text_content = content[last_end:match.start()]
            if text_content:
                tokens.append({'type': 'text', 'content': text_content})

            # Add tag
            is_closing = bool(match.group(1))
            tag_content = match.group(2)
            tag_parts = tag_content.split()
            tag_name = tag_parts[0].lower()

            # Parse attributes
            attributes = {}
            if len(tag_parts) > 1:
                attr_text = ' '.join(tag_parts[1:])
                attr_pattern = r'(\w+)=(?:"([^"]*)"|\'([^\']*)\'|([^\s>]+))'
                for attr_match in re.finditer(attr_pattern, attr_text):
                    attr_name = attr_match.group(1).lower()
                    attr_value = attr_match.group(2) or attr_match.group(3) or attr_match.group(4)
                    attributes[attr_name] = attr_value

            tokens.append({
                'type': 'tag',
                'name': tag_name,
                'closing': is_closing,
                'attributes': attributes,
                'content': tag_content
            })

            last_end = match.end()

        # Add remaining text
        if last_end < len(content):
            text_content = content[last_end:]
            if text_content:
                tokens.append({'type': 'text', 'content': text_content})

        return tokens

    def _handle_tag(self, token: Dict, container: Container):
        """Handle HTML tags"""
        tag_name = token['name']
        is_closing = token['closing']
        attributes = token['attributes']

        if is_closing:
            # Handle closing tags
            if tag_name in ['b', 'strong']:
                self.font_stack.pop()
            elif tag_name in ['i', 'em']:
                self.font_stack.pop()
            elif tag_name == 'u':
                self.font_stack.pop()
            return

        # Handle opening tags
        if tag_name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
            # Headers
            size_map = {'h1': 24, 'h2': 20, 'h3': 18, 'h4': 16, 'h5': 14, 'h6': 12}
            font = self.font_stack[-1].with_size(size_map[tag_name]).with_weight(FontWeight.BOLD)
            self.font_stack.append(font)

        elif tag_name in ['b', 'strong']:
            # Bold text
            font = self.font_stack[-1].with_weight(FontWeight.BOLD)
            self.font_stack.append(font)

        elif tag_name in ['i', 'em']:
            # Italic text
            font = self.font_stack[-1].with_style(FontStyle.ITALIC)
            self.font_stack.append(font)

        elif tag_name == 'u':
            # Underlined text
            font = self.font_stack[-1].with_decoration(TextDecoration.UNDERLINE)
            self.font_stack.append(font)

        elif tag_name == 'a':
            # Links
            href = attributes.get('href', '#')
            title = attributes.get('title', href)

            # Determine link type
            if href.startswith('http'):
                link_type = LinkType.EXTERNAL
            elif href.startswith('#'):
                link_type = LinkType.INTERNAL
            else:
                link_type = LinkType.INTERNAL

            # Create link callback
            def link_callback(location, **kwargs):
                return f"Navigate to: {location}"

            link = Link(href, link_type, link_callback, title=title)
            link_font = self.font_stack[-1].with_colour((0, 0, 255)).with_decoration(TextDecoration.UNDERLINE)

            # For now, just add the link text with link styling
            link_text = attributes.get('title', href)
            renderable_link = RenderableLink(link, link_text, link_font)
            container.add_child(renderable_link)

        elif tag_name == 'img':
            # Images
            src = attributes.get('src', '')
            alt = attributes.get('alt', 'Image')
            width = attributes.get('width')
            height = attributes.get('height')

            if src:
                # Resolve relative URLs
                if self.base_url and not src.startswith(('http://', 'https://')):
                    if os.path.isdir(self.base_url):
                        src = os.path.join(self.base_url, src)
                    else:
                        src = urljoin(self.base_url, src)

                try:
                    # Create abstract image
                    from pyWebLayout.abstract.block import Image as AbstractImage
                    abstract_img = AbstractImage(src, alt)

                    # Parse dimensions if provided
                    max_width = int(width) if width and width.isdigit() else None
                    max_height = int(height) if height and height.isdigit() else None

                    renderable_img = RenderableImage(abstract_img, max_width, max_height)
                    container.add_child(renderable_img)

                except Exception as e:
                    # Add error text if image fails to load
                    error_text = Text(f"[Image Error: {alt}]", Font(colour=(255, 0, 0)))
                    container.add_child(error_text)

        elif tag_name == 'br':
            # Line breaks - add some vertical space
            spacer = Box((0, 0), (1, 10))
            container.add_child(spacer)

        elif tag_name == 'p':
            # Paragraphs - add some vertical space
            spacer = Box((0, 0), (1, 5))
            container.add_child(spacer)

        elif tag_name in ['div', 'span']:
            # Generic containers - just continue parsing
            pass


class BrowserWindow:
    """Main browser window using Tkinter"""

    def __init__(self):
        self.root = tk.Tk()
        self.root.title("pyWebLayout HTML Browser")
        self.root.geometry("900x700")

        self.current_page = None
        self.history = []
        self.history_index = -1

        # Text selection variables
        self.selection_start = None
        self.selection_end = None
        self.is_selecting = False
        self.selected_text = ""
        self.text_elements = []  # Store text elements with positions
        self.selection_overlay = None  # Canvas overlay for selection highlighting

        self.setup_ui()

    def setup_ui(self):
        """Setup the user interface"""
        # Create main frame
        main_frame = ttk.Frame(self.root)
        main_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)

        # Navigation frame
        nav_frame = ttk.Frame(main_frame)
        nav_frame.pack(fill=tk.X, pady=(0, 5))

        # Navigation buttons
        self.back_btn = ttk.Button(nav_frame, text="←", command=self.go_back, state=tk.DISABLED)
        self.back_btn.pack(side=tk.LEFT, padx=(0, 5))

        self.forward_btn = ttk.Button(nav_frame, text="→", command=self.go_forward, state=tk.DISABLED)
        self.forward_btn.pack(side=tk.LEFT, padx=(0, 5))

        self.refresh_btn = ttk.Button(nav_frame, text="⟳", command=self.refresh)
        self.refresh_btn.pack(side=tk.LEFT, padx=(0, 10))

        # Address bar
        ttk.Label(nav_frame, text="URL:").pack(side=tk.LEFT)
        self.url_var = tk.StringVar()
        self.url_entry = ttk.Entry(nav_frame, textvariable=self.url_var, width=50)
        self.url_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(5, 5))
        self.url_entry.bind('<Return>', self.navigate_to_url)

        self.go_btn = ttk.Button(nav_frame, text="Go", command=self.navigate_to_url)
        self.go_btn.pack(side=tk.LEFT, padx=(0, 10))

        # File operations
        self.open_btn = ttk.Button(nav_frame, text="Open File", command=self.open_file)
        self.open_btn.pack(side=tk.LEFT)

        # Content frame with scrollbars
        content_frame = ttk.Frame(main_frame)
        content_frame.pack(fill=tk.BOTH, expand=True)

        # Create canvas with scrollbars
        self.canvas = tk.Canvas(content_frame, bg='white')

        v_scrollbar = ttk.Scrollbar(content_frame, orient=tk.VERTICAL, command=self.canvas.yview)
        h_scrollbar = ttk.Scrollbar(content_frame, orient=tk.HORIZONTAL, command=self.canvas.xview)

        self.canvas.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)

        v_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
        h_scrollbar.pack(side=tk.BOTTOM, fill=tk.X)
        self.canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

        # Status bar
        self.status_var = tk.StringVar(value="Ready")
        status_bar = ttk.Label(main_frame, textvariable=self.status_var, relief=tk.SUNKEN)
        status_bar.pack(fill=tk.X, pady=(5, 0))

        # Bind mouse events
        self.canvas.bind('<Button-1>', self.on_click)
        self.canvas.bind('<B1-Motion>', self.on_drag)
        self.canvas.bind('<ButtonRelease-1>', self.on_release)
        self.canvas.bind('<Motion>', self.on_mouse_move)

        # Keyboard shortcuts
        self.root.bind('<Control-c>', self.copy_selection)
        self.root.bind('<Control-a>', self.select_all)

        # Context menu
        self.setup_context_menu()

        # Make canvas focusable
        self.canvas.config(highlightthickness=1)
        self.canvas.focus_set()

        # Load default page
        self.load_default_page()

    def setup_context_menu(self):
        """Setup the right-click context menu"""
        self.context_menu = tk.Menu(self.root, tearoff=0)
        self.context_menu.add_command(label="Copy", command=self.copy_selection)
        self.context_menu.add_command(label="Select All", command=self.select_all)

        # Bind right-click to show context menu
        self.canvas.bind('<Button-3>', self.show_context_menu)

    def show_context_menu(self, event):
        """Show context menu at mouse position"""
        try:
            self.context_menu.tk_popup(event.x_root, event.y_root)
        finally:
            self.context_menu.grab_release()

    def on_drag(self, event):
        """Handle mouse dragging for text selection"""
        canvas_x = self.canvas.canvasx(event.x)
        canvas_y = self.canvas.canvasy(event.y)

        if not self.is_selecting:
            # Start selection
            self.is_selecting = True
            self.selection_start = (canvas_x, canvas_y)
            self.selection_end = (canvas_x, canvas_y)
        else:
            # Update selection end
            self.selection_end = (canvas_x, canvas_y)

        # Update visual selection
        self.update_selection_visual()

        # Update status
        self.status_var.set("Selecting text...")

    def on_release(self, event):
        """Handle mouse release to complete text selection"""
        if self.is_selecting:
            canvas_x = self.canvas.canvasx(event.x)
            canvas_y = self.canvas.canvasy(event.y)
            self.selection_end = (canvas_x, canvas_y)

            # Extract selected text
            self.extract_selected_text()

            # Update status
            if self.selected_text:
                self.status_var.set(f"Selected: {len(self.selected_text)} characters")
            else:
                self.status_var.set("No text selected")
                self.clear_selection()

    def update_selection_visual(self):
        """Update the visual representation of text selection"""
        # Remove existing selection overlay
        if self.selection_overlay:
            self.canvas.delete(self.selection_overlay)

        if self.selection_start and self.selection_end:
            # Create selection rectangle
            x1, y1 = self.selection_start
            x2, y2 = self.selection_end

            # Ensure proper coordinates (top-left to bottom-right)
            left = min(x1, x2)
            top = min(y1, y2)
            right = max(x1, x2)
            bottom = max(y1, y2)

            # Draw selection rectangle with transparency effect
            self.selection_overlay = self.canvas.create_rectangle(
                left, top, right, bottom,
                fill='blue', stipple='gray50', outline='blue', width=1
            )

    def extract_selected_text(self):
        """Extract text that falls within the selection area"""
        if not self.selection_start or not self.selection_end:
            self.selected_text = ""
            return

        # Get selection bounds
        x1, y1 = self.selection_start
        x2, y2 = self.selection_end
        left = min(x1, x2)
        top = min(y1, y2)
        right = max(x1, x2)
        bottom = max(y1, y2)

        # Extract text elements in selection area
        selected_elements = []
        self._collect_text_in_area(self.current_page, (0, 0), left, top, right, bottom, selected_elements)

        # Sort by position (top to bottom, left to right)
        selected_elements.sort(key=lambda x: (x[2], x[1]))  # Sort by y, then x

        # Combine text
        self.selected_text = " ".join([element[0] for element in selected_elements])

    def _collect_text_in_area(self, container, offset, left, top, right, bottom, collected):
        """Recursively collect text elements within the selection area"""
        if not hasattr(container, '_children'):
            return

        for child in container._children:
            if hasattr(child, '_origin') and hasattr(child, '_size'):
                # Calculate absolute position
                child_origin = tuple(child._origin) if hasattr(child._origin, '__iter__') else child._origin
                child_size = tuple(child._size) if hasattr(child._size, '__iter__') else child._size

                abs_x = offset[0] + child_origin[0]
                abs_y = offset[1] + child_origin[1]
                abs_w = child_size[0]
                abs_h = child_size[1]

                # Check if element intersects with selection area
                if (abs_x < right and abs_x + abs_w > left and
                    abs_y < bottom and abs_y + abs_h > top):

                    # If it's a text element, add its text
                    if isinstance(child, Text):
                        text_content = getattr(child, '_text', '')
                        if text_content.strip():
                            collected.append((text_content.strip(), abs_x, abs_y))

                    # If it's a line with words, extract word text
                    elif hasattr(child, '_words'):
                        for word in child._words:
                            if hasattr(word, 'text'):
                                word_text = word.text
                                if word_text.strip():
                                    collected.append((word_text.strip(), abs_x, abs_y))

                    # Recursively check children
                    if hasattr(child, '_children'):
                        self._collect_text_in_area(child, (abs_x, abs_y), left, top, right, bottom, collected)

    def copy_selection(self, event=None):
        """Copy selected text to clipboard"""
        if self.selected_text:
            try:
                pyperclip.copy(self.selected_text)
                self.status_var.set(f"Copied {len(self.selected_text)} characters to clipboard")
            except Exception as e:
                self.status_var.set(f"Error copying to clipboard: {str(e)}")
        else:
            self.status_var.set("No text selected to copy")

    def select_all(self, event=None):
        """Select all text on the page"""
        if not self.current_page:
            return

        # Set selection to entire canvas area
        canvas_width = self.canvas.winfo_width()
        canvas_height = self.canvas.winfo_height()

        self.selection_start = (0, 0)
        self.selection_end = (canvas_width, canvas_height)
        self.is_selecting = True

        # Extract all text
        self.extract_selected_text()

        # Update visual
        self.update_selection_visual()

        if self.selected_text:
            self.status_var.set(f"Selected all text: {len(self.selected_text)} characters")
        else:
            self.status_var.set("No text found to select")

    def clear_selection(self):
        """Clear the current text selection"""
        self.selection_start = None
        self.selection_end = None
        self.is_selecting = False
        self.selected_text = ""

        # Remove visual selection
        if self.selection_overlay:
            self.canvas.delete(self.selection_overlay)
            self.selection_overlay = None

        self.status_var.set("Selection cleared")

    def load_default_page(self):
        """Load a default welcome page"""
        html_content = """
        <html>
        <head><title>pyWebLayout Browser - Welcome</title></head>
        <body>
            <h1>Welcome to pyWebLayout Browser</h1>
            <p>This is a simple HTML browser built using pyWebLayout components.</p>

            <h2>Features:</h2>
            <ul>
                <li>Basic HTML rendering</li>
                <li>Text formatting (bold, italic, underline)</li>
                <li>Headers (H1-H6)</li>
                <li>Links (clickable)</li>
                <li>Images</li>
                <li>Forms (basic support)</li>
            </ul>

            <h2>Try these features:</h2>
            <p><b>Bold text</b>, <i>italic text</i>, and <u>underlined text</u></p>

            <p>Sample link: <a href="https://www.example.com" title="External link">Visit Example.com</a></p>

            <h3>File Operations</h3>
            <p>Use the "Open File" button to load local HTML files.</p>

            <p>Or enter a URL in the address bar above.</p>
        </body>
        </html>
        """

        parser = HTMLParser()
        self.current_page = parser.parse_html_string(html_content)
        self.render_page()
        self.status_var.set("Welcome page loaded")

    def navigate_to_url(self, event=None):
        """Navigate to the URL in the address bar"""
        url = self.url_var.get().strip()
        if not url:
            return

        self.status_var.set(f"Loading {url}...")
        self.root.update()

        try:
            if url.startswith(('http://', 'https://')):
                # Web URL
                response = requests.get(url, timeout=10)
                response.raise_for_status()
                html_content = response.text

                parser = HTMLParser()
                self.current_page = parser.parse_html_string(html_content, url)

            elif os.path.isfile(url):
                # Local file
                parser = HTMLParser()
                self.current_page = parser.parse_html_file(url)

            else:
                # Try to treat as a local file path
                if not url.startswith('file://'):
                    url = 'file://' + os.path.abspath(url)

                file_path = url.replace('file://', '')
                if os.path.isfile(file_path):
                    parser = HTMLParser()
                    self.current_page = parser.parse_html_file(file_path)
                else:
                    raise FileNotFoundError(f"File not found: {file_path}")

            # Add to history
            self.add_to_history(url)
            self.render_page()
            self.status_var.set(f"Loaded {url}")

        except Exception as e:
            self.status_var.set(f"Error loading {url}: {str(e)}")
            messagebox.showerror("Error", f"Failed to load {url}:\n{str(e)}")

    def open_file(self):
        """Open a local HTML file"""
        file_path = filedialog.askopenfilename(
            title="Open HTML File",
            filetypes=[("HTML files", "*.html *.htm"), ("All files", "*.*")]
        )

        if file_path:
            self.url_var.set(file_path)
            self.navigate_to_url()

    def render_page(self):
        """Render the current page to the canvas"""
        if not self.current_page:
            return

        # Clear canvas
        self.canvas.delete("all")

        # Render the page to PIL Image
        page_image = self.current_page.render()

        # Convert to PhotoImage
        self.photo = ImageTk.PhotoImage(page_image)

        # Display on canvas
        self.canvas.create_image(0, 0, anchor=tk.NW, image=self.photo)

        # Update scroll region
        self.canvas.configure(scrollregion=self.canvas.bbox("all"))

        # Store page elements for interaction
        self.page_elements = self._get_clickable_elements(self.current_page)

    def _get_clickable_elements(self, container, offset=(0, 0)) -> List[Tuple]:
        """Get list of clickable elements with their positions"""
        elements = []

        if hasattr(container, '_children'):
            for child in container._children:
                if hasattr(child, '_origin'):
                    # Convert numpy arrays to tuples for consistent coordinate handling
                    child_origin = tuple(child._origin) if hasattr(child._origin, '__iter__') else child._origin
                    child_size = tuple(child._size) if hasattr(child._size, '__iter__') else child._size

                    child_offset = (offset[0] + child_origin[0], offset[1] + child_origin[1])

                    # Check if element is clickable
                    if isinstance(child, (RenderableLink, RenderableButton)):
                        elements.append((child, child_offset, child_size))

                    # Recursively check children
                    if hasattr(child, '_children'):
                        elements.extend(self._get_clickable_elements(child, child_offset))

        return elements

    def on_click(self, event):
        """Handle mouse clicks on the canvas"""
        # Convert canvas coordinates to image coordinates
        canvas_x = self.canvas.canvasx(event.x)
        canvas_y = self.canvas.canvasy(event.y)

        # Check if click is on any clickable element
        for element, offset, size in self.page_elements:
            element_x, element_y = offset
            element_w, element_h = size

            if (element_x <= canvas_x <= element_x + element_w and
                element_y <= canvas_y <= element_y + element_h):

                # Handle the click
                if isinstance(element, RenderableLink):
                    result = element._callback()
                    if result:
                        self.status_var.set(result)
                        # For external links, open in system browser
                        if element._link.link_type == LinkType.EXTERNAL:
                            webbrowser.open(element._link.location)

                elif isinstance(element, RenderableButton):
                    result = element._callback()
                    if result:
                        self.status_var.set(f"Button clicked: {result}")

                break

    def on_mouse_move(self, event):
        """Handle mouse movement for hover effects"""
        # Convert canvas coordinates to image coordinates
        canvas_x = self.canvas.canvasx(event.x)
        canvas_y = self.canvas.canvasy(event.y)

        # Check if mouse is over any clickable element
        cursor = "arrow"
        for element, offset, size in self.page_elements:
            element_x, element_y = offset
            element_w, element_h = size

            if (element_x <= canvas_x <= element_x + element_w and
                element_y <= canvas_y <= element_y + element_h):
                cursor = "hand2"
                break

        self.canvas.configure(cursor=cursor)

    def add_to_history(self, url):
        """Add URL to navigation history"""
        # Remove any forward history
        self.history = self.history[:self.history_index + 1]

        # Add new URL
        self.history.append(url)
        self.history_index = len(self.history) - 1

        # Update navigation buttons
        self.update_nav_buttons()

    def update_nav_buttons(self):
        """Update the state of navigation buttons"""
        self.back_btn.configure(state=tk.NORMAL if self.history_index > 0 else tk.DISABLED)
        self.forward_btn.configure(state=tk.NORMAL if self.history_index < len(self.history) - 1 else tk.DISABLED)

    def go_back(self):
        """Navigate back in history"""
        if self.history_index > 0:
            self.history_index -= 1
            url = self.history[self.history_index]
            self.url_var.set(url)
            self.navigate_to_url()

    def go_forward(self):
        """Navigate forward in history"""
        if self.history_index < len(self.history) - 1:
            self.history_index += 1
            url = self.history[self.history_index]
            self.url_var.set(url)
            self.navigate_to_url()

    def refresh(self):
        """Refresh the current page"""
        if self.current_page:
            current_url = self.url_var.get()
            if current_url:
                self.navigate_to_url()
            else:
                self.load_default_page()

    def run(self):
        """Start the browser"""
        self.root.mainloop()


def main():
    """Main function to run the browser"""
    print("Starting pyWebLayout HTML Browser...")

    try:
        browser = BrowserWindow()
        browser.run()
    except Exception as e:
        print(f"Error starting browser: {e}")
        import traceback
        traceback.print_exc()


if __name__ == "__main__":
    main()