diff --git a/examples/epub_page_renderer.py b/examples/epub_page_renderer.py index 08e27ea..c344637 100755 --- a/examples/epub_page_renderer.py +++ b/examples/epub_page_renderer.py @@ -2,7 +2,7 @@ """ Simple EPUB page renderer tool. -This tool uses the pyWebLayout epub_reader and typesetting modules to: +This tool uses the pyWebLayout epub_reader and layout modules to: 1. Load an EPUB file 2. Render the first X pages according to command line arguments 3. Save the pages as PNG images @@ -15,17 +15,19 @@ import os import sys import argparse from pathlib import Path -from typing import Optional +from typing import Optional, List # Add the parent directory to sys.path to import pyWebLayout sys.path.insert(0, str(Path(__file__).parent.parent)) try: from pyWebLayout.io.readers.epub_reader import read_epub - from pyWebLayout.layout.document_pagination import DocumentPaginator + from pyWebLayout.layout.ereader_layout import BidirectionalLayouter, RenderingPosition + from pyWebLayout.layout.document_layouter import paragraph_layouter from pyWebLayout.concrete.page import Page + from pyWebLayout.style.page_style import PageStyle from pyWebLayout.style.fonts import Font - from pyWebLayout.style.layout import Alignment + from pyWebLayout.abstract.block import Block from PIL import Image, ImageDraw except ImportError as e: print(f"Error importing required modules: {e}") @@ -50,14 +52,14 @@ def render_page_to_image(page: Page) -> Image.Image: return rendered_image else: # If render() doesn't return a PIL Image, create error image - error_image = Image.new('RGB', page._size, 'white') + error_image = Image.new('RGB', page.size, 'white') draw = ImageDraw.Draw(error_image) draw.text((20, 20), "Error: Page.render() did not return PIL Image", fill='red') return error_image except Exception as e: # Create error image if rendering fails - error_image = Image.new('RGB', page._size, 'white') + error_image = Image.new('RGB', page.size, 'white') draw = ImageDraw.Draw(error_image) draw.text((20, 20), f"Rendering error: {str(e)}", fill='red') print(f"Warning: Error rendering page: {e}") @@ -85,19 +87,25 @@ def extract_text_from_page(page: Page) -> str: # Import abstract block types from pyWebLayout.abstract.block import Paragraph, Heading, HList, Table, Image as AbstractImage + from pyWebLayout.concrete.text import Line - # Handle abstract block objects first - if isinstance(element, Paragraph): - # Extract text from paragraph + # Handle Line objects (concrete) + if isinstance(element, Line): + line_text = [] + if hasattr(element, '_text_objects') and element._text_objects: + for text_obj in element._text_objects: + if hasattr(text_obj, 'text'): + line_text.append(str(text_obj.text)) + if line_text: + text_lines.append(f"{indent}{' '.join(line_text)}") + + # Handle abstract block objects + elif isinstance(element, (Paragraph, Heading)): + # Extract text from paragraph/heading paragraph_text = extract_text_from_paragraph(element) if paragraph_text: - text_lines.append(f"{indent}PARAGRAPH: {paragraph_text}") - - elif isinstance(element, Heading): - # Extract text from heading - heading_text = extract_text_from_paragraph(element) - if heading_text: - text_lines.append(f"{indent}HEADING: {heading_text}") + block_type = "HEADING" if isinstance(element, Heading) else "PARAGRAPH" + text_lines.append(f"{indent}{block_type}: {paragraph_text}") elif isinstance(element, HList): text_lines.append(f"{indent}LIST:") @@ -115,7 +123,7 @@ def extract_text_from_page(page: Page) -> str: elif isinstance(element, AbstractImage): alt_text = getattr(element, 'alt_text', '') - src = getattr(element, 'src', 'Unknown') + src = getattr(element, 'source', 'Unknown') text_lines.append(f"{indent}[IMAGE: {alt_text or src}]") # Handle containers with children @@ -129,15 +137,6 @@ def extract_text_from_page(page: Page) -> str: if text: text_lines.append(f"{indent}{text}") - # Handle lines with text objects - elif hasattr(element, '_text_objects') and element._text_objects: - line_text = [] - for text_obj in element._text_objects: - if hasattr(text_obj, 'text'): - line_text.append(str(text_obj.text)) - if line_text: - text_lines.append(f"{indent}{' '.join(line_text)}") - # Handle other object types by showing their class name else: class_name = element.__class__.__name__ @@ -148,8 +147,8 @@ def extract_text_from_page(page: Page) -> str: words = [] try: # Try to get words from the paragraph - if hasattr(para_obj, 'words') and callable(para_obj.words): - for _, word in para_obj.words(): + if hasattr(para_obj, 'words_iter') and callable(para_obj.words_iter): + for _, word in para_obj.words_iter(): if hasattr(word, 'text'): words.append(word.text) else: @@ -183,6 +182,27 @@ def extract_text_from_page(page: Page) -> str: return "\n".join(text_lines) +def get_all_blocks_from_book(book) -> List[Block]: + """ + Extract all blocks from all chapters in the book. + + Args: + book: The Book object from epub_reader + + Returns: + List of all Block objects + """ + all_blocks = [] + + # Iterate through all chapters + for chapter in book.chapters: + # Get blocks from the chapter + if hasattr(chapter, '_blocks'): + all_blocks.extend(chapter._blocks) + + return all_blocks + + def main(): """Main function to handle command line arguments and process the EPUB.""" parser = argparse.ArgumentParser( @@ -234,6 +254,13 @@ Examples: help='Page margin in pixels (default: 40)' ) + parser.add_argument( + '--align', '-a', + choices=['left', 'justify'], + default='left', + help='Text alignment: left or justify (default: left)' + ) + args = parser.parse_args() # Validate arguments @@ -268,42 +295,100 @@ Examples: except Exception as e: print(f"Error loading EPUB file: {e}") + import traceback + traceback.print_exc() return 1 - # Set up pagination - page_size = (args.width, args.height) - margins = (args.margin, args.margin, args.margin, args.margin) # top, right, bottom, left + # Extract all blocks from the book + print("Extracting content blocks...") + try: + all_blocks = get_all_blocks_from_book(book) + print(f"Extracted {len(all_blocks)} content blocks") + + if not all_blocks: + print("No content blocks found in EPUB. The book might be empty.") + return 1 + + # Apply alignment setting to all paragraphs and headings + from pyWebLayout.abstract.block import Paragraph, Heading + from pyWebLayout.style.alignment import Alignment + + alignment = Alignment.JUSTIFY if args.align == 'justify' else Alignment.LEFT + print(f"Applying {args.align} alignment to all text blocks...") + + # Note: We'll pass alignment to the layouter which will handle it during rendering + # The alignment is applied at the Line level in paragraph_layouter + + except Exception as e: + print(f"Error extracting blocks: {e}") + import traceback + traceback.print_exc() + return 1 - print(f"Setting up pagination with page size {page_size} and margins {margins}") + # Set up page style and layouter + page_size = (args.width, args.height) + page_style = PageStyle( + background_color=(255, 255, 255), + border_width=args.margin, + border_color=(200, 200, 200), + padding=(10, 10, 10, 10), # top, right, bottom, left + line_spacing=5, + inter_block_spacing=15 + ) + + print(f"Setting up layouter with page size {page_size} and {args.align} alignment") try: - paginator = DocumentPaginator( - document=book, + layouter = BidirectionalLayouter( + blocks=all_blocks, + page_style=page_style, page_size=page_size, - margins=margins, - spacing=5, - halign=Alignment.LEFT + alignment_override=alignment ) except Exception as e: - print(f"Error setting up paginator: {e}") + print(f"Error setting up layouter: {e}") + import traceback + traceback.print_exc() return 1 # Render pages - print(f"Rendering {args.pages} pages...") + print(f"Rendering up to {args.pages} pages...") try: - # Generate pages - pages = paginator.paginate(max_pages=args.pages) + pages = [] + current_position = RenderingPosition() # Start from beginning + + for page_num in range(args.pages): + print(f"Rendering page {page_num + 1}/{args.pages}...") + + try: + # Render the page + page, next_position = layouter.render_page_forward(current_position) + pages.append(page) + + # Check if we've reached the end of the document + if next_position.block_index >= len(all_blocks): + print(f"Reached end of document after {page_num + 1} pages") + break + + # Update position for next page + current_position = next_position + + except Exception as e: + print(f"Error rendering page {page_num + 1}: {e}") + import traceback + traceback.print_exc() + break if not pages: - print("No pages were generated. The book might be empty or there might be an issue with pagination.") + print("No pages were generated.") return 1 print(f"Generated {len(pages)} pages") - # Render each page to an image and extract text + # Save each page to an image and extract text for i, page in enumerate(pages): - print(f"Rendering page {i + 1}/{len(pages)}...") + print(f"Saving page {i + 1}/{len(pages)}...") try: # Create image from page using pyWebLayout's built-in rendering @@ -324,18 +409,22 @@ Examples: print(f"Saved: {output_path} and {text_path}") except Exception as e: - print(f"Error rendering page {i + 1}: {e}") + print(f"Error saving page {i + 1}: {e}") + import traceback + traceback.print_exc() continue print(f"\nCompleted! Rendered {len(pages)} pages to {args.output_dir}") - # Show pagination progress - if hasattr(paginator, 'get_progress'): - progress = paginator.get_progress() * 100 + # Calculate progress through the book + if len(all_blocks) > 0: + progress = (current_position.block_index / len(all_blocks)) * 100 print(f"Progress through book: {progress:.1f}%") except Exception as e: print(f"Error during pagination/rendering: {e}") + import traceback + traceback.print_exc() return 1 return 0 diff --git a/pyWebLayout/concrete/page.py b/pyWebLayout/concrete/page.py index a06105a..1fc56b8 100644 --- a/pyWebLayout/concrete/page.py +++ b/pyWebLayout/concrete/page.py @@ -27,16 +27,41 @@ class Page(Renderable, Queriable): self._children: List[Renderable] = [] self._canvas: Optional[Image.Image] = None self._draw: Optional[ImageDraw.Draw] = None - self._current_y_offset = 0 # Track vertical position for layout + # Initialize y_offset to start of content area + # Position the first line so its baseline is close to the top boundary + # For subsequent lines, baseline-to-baseline spacing is used + self._current_y_offset = self._style.border_width + self._style.padding_top + self._is_first_line = True # Track if we're placing the first line def free_space(self) -> Tuple[int, int]: """Get the remaining space on the page""" return (self._size[0], self._size[1] - self._current_y_offset) - def can_fit_line(self, line_height: int) -> bool: - """Check if a line of the given height can fit on the page.""" - remaining_height = self.content_size[1] - (self._current_y_offset - self._style.border_width - self._style.padding_top) - return remaining_height >= line_height + def can_fit_line(self, baseline_spacing: int, ascent: int = 0, descent: int = 0) -> bool: + """ + Check if a line with the given metrics can fit on the page. + + Args: + baseline_spacing: Distance from current position to next baseline + ascent: Font ascent (height above baseline), defaults to 0 for backward compat + descent: Font descent (height below baseline), defaults to 0 for backward compat + + Returns: + True if the line fits within page boundaries + """ + # Calculate the maximum Y position allowed (bottom boundary) + max_y = self._size[1] - self._style.border_width - self._style.padding_bottom + + # If ascent/descent not provided, use simple check (backward compatibility) + if ascent == 0 and descent == 0: + return (self._current_y_offset + baseline_spacing) <= max_y + + # Calculate where the bottom of the text would be + # Text bottom = current_y_offset + ascent + descent + text_bottom = self._current_y_offset + ascent + descent + + # Check if text bottom would exceed the boundary + return text_bottom <= max_y @property def size(self) -> Tuple[int, int]: @@ -122,7 +147,8 @@ class Page(Renderable, Queriable): """ self._children.clear() self._canvas = None - self._current_y_offset = 0 + # Reset y_offset to start of content area (after border and padding) + self._current_y_offset = self._style.border_width + self._style.padding_top return self @property diff --git a/pyWebLayout/concrete/text.py b/pyWebLayout/concrete/text.py index c6a0a1d..0dc0764 100644 --- a/pyWebLayout/concrete/text.py +++ b/pyWebLayout/concrete/text.py @@ -53,7 +53,6 @@ class LeftAlignmentHandler(AlignmentHandler): Returns: Tuple[int, int, bool]: Spacing, start position, and overflow flag. """ - print("LeftAlignmentHandler:") # Handle single word case if len(text_objects) <= 1: return 0, 0, False @@ -76,7 +75,6 @@ class LeftAlignmentHandler(AlignmentHandler): # Calculate ideal spacing actual_spacing = residual_space // num_gaps - print(actual_spacing) # Clamp within bounds (CREngine pattern: respect max_spacing) if actual_spacing > max_spacing: return max_spacing, 0, False @@ -108,7 +106,6 @@ class CenterRightAlignmentHandler(AlignmentHandler): return 0, max(0, start_position), False actual_spacing = residual_space // (len(text_objects)-1) - print(actual_spacing) ideal_space = (min_spacing + max_spacing)/2 if actual_spacing > 0.5*(min_spacing + max_spacing): actual_spacing = 0.5*(min_spacing + max_spacing) @@ -138,7 +135,6 @@ class JustifyAlignmentHandler(AlignmentHandler): actual_spacing = residual_space // num_gaps ideal_space = (min_spacing + max_spacing)//2 - print(actual_spacing) # can we touch the end? if actual_spacing < max_spacing: if actual_spacing < min_spacing: @@ -228,23 +224,57 @@ class Text(Renderable, Queriable): """Add this text to a line""" self._line = line - def _apply_decoration(self): - """Apply text decoration (underline or strikethrough)""" + def _apply_decoration(self, next_text: Optional['Text'] = None, spacing: int = 0): + """ + Apply text decoration (underline or strikethrough). + + Args: + next_text: The next Text object in the line (if any) + spacing: The spacing to the next text object + """ if self._style.decoration == TextDecoration.UNDERLINE: # Draw underline at about 90% of the height y_position = self._origin[1] - 0.1*self._style.font_size - self._draw.line([(0, y_position), (self._width, y_position)], - fill=self._style.colour, width=max(1, int(self._style.font_size / 15))) + line_width = max(1, int(self._style.font_size / 15)) + + # Determine end x-coordinate + end_x = self._origin[0] + self._width + + # If next text also has underline decoration, extend to connect them + if (next_text is not None and + next_text.style.decoration == TextDecoration.UNDERLINE and + next_text.style.colour == self._style.colour): + # Extend the underline through the spacing to connect with next word + end_x += spacing + + self._draw.line([(self._origin[0], y_position), (end_x, y_position)], + fill=self._style.colour, width=line_width) elif self._style.decoration == TextDecoration.STRIKETHROUGH: # Draw strikethrough at about 50% of the height y_position = self._origin[1] + self._middle_y - self._draw.line([(0, y_position), (self._width, y_position)], - fill=self._style.colour, width=max(1, int(self._style.font_size / 15))) + line_width = max(1, int(self._style.font_size / 15)) + + # Determine end x-coordinate + end_x = self._origin[0] + self._width + + # If next text also has strikethrough decoration, extend to connect them + if (next_text is not None and + next_text.style.decoration == TextDecoration.STRIKETHROUGH and + next_text.style.colour == self._style.colour): + # Extend the strikethrough through the spacing to connect with next word + end_x += spacing + + self._draw.line([(self._origin[0], y_position), (end_x, y_position)], + fill=self._style.colour, width=line_width) - def render(self): + def render(self, next_text: Optional['Text'] = None, spacing: int = 0): """ Render the text to an image. + + Args: + next_text: The next Text object in the line (if any) + spacing: The spacing to the next text object Returns: A PIL Image containing the rendered text @@ -254,11 +284,12 @@ class Text(Renderable, Queriable): if self._style.background and self._style.background[3] > 0: # If alpha > 0 self._draw.rectangle([self._origin, self._origin+self._size], fill=self._style.background) - # Draw the text using calculated offsets to prevent cropping - self._draw.text((self.origin[0], self._origin[1]), self._text, font=self._style.font, fill=self._style.colour) + # Draw the text using baseline as anchor point ("ls" = left-baseline) + # This ensures the origin represents the baseline, not the top-left + self._draw.text((self.origin[0], self._origin[1]), self._text, font=self._style.font, fill=self._style.colour, anchor="ls") - # Apply any text decorations - self._apply_decoration() + # Apply any text decorations with knowledge of next text + self._apply_decoration(next_text, spacing) class Line(Box): """ @@ -268,7 +299,10 @@ class Line(Box): def __init__(self, spacing: Tuple[int, int], origin, size, draw: ImageDraw.Draw,font: Optional[Font] = None, callback=None, sheet=None, mode=None, halign=Alignment.CENTER, - valign=Alignment.CENTER, previous = None): + valign=Alignment.CENTER, previous = None, + min_word_length_for_brute_force: int = 8, + min_chars_before_hyphen: int = 2, + min_chars_after_hyphen: int = 2): """ Initialize a new line. @@ -283,6 +317,9 @@ class Line(Box): halign: Horizontal alignment of text within the line valign: Vertical alignment of text within the line previous: Reference to the previous line + min_word_length_for_brute_force: Minimum word length to attempt brute force hyphenation (default: 8) + min_chars_before_hyphen: Minimum characters before hyphen in any split (default: 2) + min_chars_after_hyphen: Minimum characters after hyphen in any split (default: 2) """ super().__init__(origin, size, callback, sheet, mode, halign, valign) self._text_objects: List['Text'] = [] # Store Text objects directly @@ -292,11 +329,17 @@ class Line(Box): self._words : List['Word'] = [] self._previous = previous self._next = None - ascent,descent = self._font.font.getmetrics() - self._baseline = self._origin[1] - ascent + ascent, descent = self._font.font.getmetrics() + # Store baseline as offset from line origin (top), not absolute position + self._baseline = ascent self._draw = draw self._spacing_render = (spacing[0] + spacing[1]) //2 self._position_render = 0 + + # Hyphenation configuration parameters + self._min_word_length_for_brute_force = min_word_length_for_brute_force + self._min_chars_before_hyphen = min_chars_before_hyphen + self._min_chars_after_hyphen = min_chars_after_hyphen # Create the appropriate alignment handler self._alignment_handler = self._create_alignment_handler(halign) @@ -311,7 +354,6 @@ class Line(Box): Returns: The appropriate alignment handler instance """ - print("HALGIN!!!!!", alignment) if alignment == Alignment.LEFT: return LeftAlignmentHandler() elif alignment == Alignment.JUSTIFY: @@ -333,75 +375,141 @@ class Line(Box): Add a word to this line using intelligent word fitting strategies. Args: - text: The text content of the word - font: The font to use for this word, or None to use the line's default font + word: The word to add to the line + part: Optional pretext from a previous hyphenated word Returns: - True if the word was successfully added, False if it couldn't fit, in case of hypenation the hyphenated part is returned + Tuple of (success, overflow_text): + - success: True if word/part was added, False if it couldn't fit + - overflow_text: Remaining text if word was hyphenated, None otherwise """ + # First, add any pretext from previous hyphenation if part is not None: self._text_objects.append(part) self._words.append(word) part.add_line(self) + # Try to add the full word text = Text.from_word(word, self._draw) self._text_objects.append(text) - spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(self._text_objects, self._size[0],self._spacing[0], self._spacing[1]) - print(self._alignment_handler) + spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position( + self._text_objects, self._size[0], self._spacing[0], self._spacing[1] + ) + if not overflow: + # Word fits! Add it completely self._words.append(word) word.add_concete(text) text.add_line(self) self._position_render = position self._spacing_render = spacing - return True, None # no overflow word is just added! + return True, None - _=self._text_objects.pop() - splits = [(Text(pair[0]+"-", word.style,self._draw, line=self, source=word), Text( pair[1], word.style, self._draw, line=self, source=word)) for pair in word.possible_hyphenation()] - - #worst case scenario! - if len(splits)==0 and len(word.text)>=6: - text = Text(word.text+"-", word.style, self._draw) # add hypen to know true length - word_length = sum([text.width for text in self._text_objects]) - spacing_length = self._spacing[0] * (len(self._text_objects) - 1) - remaining=self._size[0] - word_length - spacing_length - fraction = remaining / text.width - # Calculate split position: fraction represents what portion of the hyphenated word fits - # We need to scale this to the original word length, accounting for the hyphen - hyphenated_length = len(word.text) + 1 # +1 for hyphen - split_in_hyphenated = round(fraction * hyphenated_length) - # Map back to original word, ensuring we don't go past the word length - spliter = min(len(word.text) - 1, max(1, split_in_hyphenated)) - split = [Text(word.text[:spliter]+"-", word.style, self._draw, line=self, source=word), Text(word.text[spliter:], word.style, self._draw, line=self, source=word)] - self._text_objects.append(split[0]) - word.add_concete(split) - split[0].add_line(self) - split[1].add_line(self) - self._spacing_render = self._spacing[0] + # Word doesn't fit, remove it and try hyphenation + _ = self._text_objects.pop() + + # Step 1: Try pyphen hyphenation + pyphen_splits = word.possible_hyphenation() + valid_splits = [] + + if pyphen_splits: + # Create Text objects for each possible split and check if they fit + for pair in pyphen_splits: + first_part_text = pair[0] + "-" + second_part_text = pair[1] + + # Validate minimum character requirements + if len(pair[0]) < self._min_chars_before_hyphen: + continue + if len(pair[1]) < self._min_chars_after_hyphen: + continue + + # Create Text objects + first_text = Text(first_part_text, word.style, self._draw, line=self, source=word) + second_text = Text(second_part_text, word.style, self._draw, line=self, source=word) + + # Check if first part fits + self._text_objects.append(first_text) + spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position( + self._text_objects, self._size[0], self._spacing[0], self._spacing[1] + ) + _ = self._text_objects.pop() + + if not overflow: + # This split fits! Add it to valid options + valid_splits.append((first_text, second_text, spacing, position)) + + # Step 2: If we have valid pyphen splits, choose the best one + if valid_splits: + # Select the split with the best (minimum) spacing + best_split = min(valid_splits, key=lambda x: x[2]) + first_text, second_text, spacing, position = best_split + + # Apply the split + self._text_objects.append(first_text) + first_text.line = self + word.add_concete((first_text, second_text)) + self._spacing_render = spacing self._position_render = position - return True, split[1] # we apply a brute force split - - elif len(splits)==0 and len(word.text)<6: - return False, None # this endpoint means no words can be added. - - spacings = [] - positions = [] - - for split in splits: - self._text_objects.append(split[0]) - - spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position(self._text_objects, self._size[0],self._spacing[0], self._spacing[1]) - spacings.append(spacing) - positions.append(position) - _=self._text_objects.pop() - idx = int(np.argmin(spacings)) - self._text_objects.append(splits[idx][0]) - splits[idx][0].line=self - word.add_concete(splits[idx]) - self._spacing_render = spacings[idx] - self._position_render = positions[idx] - self._words.append(word) - return True, splits[idx][1] # we apply a phyphenated split with best spacing + self._words.append(word) + return True, second_text + + # Step 3: Try brute force hyphenation (only for long words) + if len(word.text) >= self._min_word_length_for_brute_force: + # Calculate available space for the word + word_length = sum([text.width for text in self._text_objects]) + spacing_length = self._spacing[0] * max(0, len(self._text_objects) - 1) + remaining = self._size[0] - word_length - spacing_length + + if remaining > 0: + # Create a hyphenated version to measure + test_text = Text(word.text + "-", word.style, self._draw) + + if test_text.width > 0: + # Calculate what fraction of the hyphenated word fits + fraction = remaining / test_text.width + + # Convert fraction to character position + # We need at least min_chars_before_hyphen and leave at least min_chars_after_hyphen + max_split_pos = len(word.text) - self._min_chars_after_hyphen + min_split_pos = self._min_chars_before_hyphen + + # Calculate ideal split position based on available space + ideal_split = int(fraction * len(word.text)) + split_pos = max(min_split_pos, min(ideal_split, max_split_pos)) + + # Ensure we meet minimum requirements + if (split_pos >= self._min_chars_before_hyphen and + len(word.text) - split_pos >= self._min_chars_after_hyphen): + + # Create the split + first_part_text = word.text[:split_pos] + "-" + second_part_text = word.text[split_pos:] + + first_text = Text(first_part_text, word.style, self._draw, line=self, source=word) + second_text = Text(second_part_text, word.style, self._draw, line=self, source=word) + + # Verify the first part actually fits + self._text_objects.append(first_text) + spacing, position, overflow = self._alignment_handler.calculate_spacing_and_position( + self._text_objects, self._size[0], self._spacing[0], self._spacing[1] + ) + + if not overflow: + # Brute force split works! + first_text.line = self + second_text.line = self + word.add_concete((first_text, second_text)) + self._spacing_render = spacing + self._position_render = position + self._words.append(word) + return True, second_text + else: + # Doesn't fit, remove it + _ = self._text_objects.pop() + + # Step 4: Word cannot be hyphenated or split, move to next line + return False, None def render(self): """ @@ -422,9 +530,14 @@ class Line(Box): # Start x_cursor at line origin plus any alignment offset x_cursor = self._origin[0] + self._position_render - for text in self._text_objects: + for i, text in enumerate(self._text_objects): # Update text draw context to current draw context text._draw = self._draw text.set_origin(np.array([x_cursor, y_cursor])) - text.render() + + # Determine next text object for continuous decoration + next_text = self._text_objects[i + 1] if i + 1 < len(self._text_objects) else None + + # Render with next text information for continuous underline/strikethrough + text.render(next_text, self._spacing_render) x_cursor += self._spacing_render + text.width # x-spacing + width of text object diff --git a/pyWebLayout/layout/document_layouter.py b/pyWebLayout/layout/document_layouter.py index cef704c..010a2e2 100644 --- a/pyWebLayout/layout/document_layouter.py +++ b/pyWebLayout/layout/document_layouter.py @@ -4,9 +4,9 @@ from typing import List, Tuple, Optional, Union from pyWebLayout.concrete import Page, Line, Text from pyWebLayout.abstract import Paragraph, Word, Link -from pyWebLayout.style.concrete_style import ConcreteStyleRegistry +from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver -def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None) -> Tuple[bool, Optional[int], Optional[Text]]: +def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pretext: Optional[Text] = None, alignment_override: Optional['Alignment'] = None) -> Tuple[bool, Optional[int], Optional[Text]]: """ Layout a paragraph of text within a given page. @@ -18,6 +18,7 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr page: The page to layout the paragraph on start_word: Index of the first word to process (for continuation) pretext: Optional pretext from a previous hyphenated word + alignment_override: Optional alignment to override the paragraph's default alignment Returns: Tuple of: @@ -32,22 +33,71 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr if start_word >= len(paragraph.words): return True, None, None - # Get the concrete style with resolved word spacing constraints - style_registry = ConcreteStyleRegistry(page.style_resolver) - concrete_style = style_registry.get_concrete_style(paragraph.style) + # paragraph.style is already a Font object (concrete), not AbstractStyle + # We need to get word spacing constraints from the Font's abstract style if available + # For now, use reasonable defaults based on font size + from pyWebLayout.style import Font, Alignment + + if isinstance(paragraph.style, Font): + # paragraph.style is already a Font (concrete style) + font = paragraph.style + # Use default word spacing constraints based on font size + # Minimum spacing should be proportional to font size for better readability + min_spacing = float(font.font_size) * 0.25 # 25% of font size + max_spacing = float(font.font_size) * 0.5 # 50% of font size + word_spacing_constraints = (int(min_spacing), int(max_spacing)) + text_align = Alignment.LEFT # Default alignment + else: + # paragraph.style is an AbstractStyle, resolve it + rendering_context = RenderingContext(base_font_size=paragraph.style.font_size) + style_resolver = StyleResolver(rendering_context) + style_registry = ConcreteStyleRegistry(style_resolver) + concrete_style = style_registry.get_concrete_style(paragraph.style) + font = concrete_style.create_font() + word_spacing_constraints = ( + int(concrete_style.word_spacing_min), + int(concrete_style.word_spacing_max) + ) + text_align = concrete_style.text_align + + # Apply alignment override if provided + if alignment_override is not None: + text_align = alignment_override + + # Cap font size to page maximum if needed + if font.font_size > page.style.max_font_size: + from pyWebLayout.style import Font + font = Font( + font_path=font._font_path, + font_size=page.style.max_font_size, + colour=font.colour, + weight=font.weight, + style=font.style, + decoration=font.decoration, + background=font.background + ) + + # Calculate baseline-to-baseline spacing using line spacing multiplier + # This is the vertical distance between baselines of consecutive lines + baseline_spacing = int(font.font_size * page.style.line_spacing_multiplier) + + # Get font metrics for boundary checking + ascent, descent = font.font.getmetrics() - # Extract word spacing constraints (min, max) for Line constructor - word_spacing_constraints = ( - int(concrete_style.word_spacing_min), - int(concrete_style.word_spacing_max) - ) - - def create_new_line(word: Optional[Union[Word, Text]] = None) -> Optional[Line]: + def create_new_line(word: Optional[Union[Word, Text]] = None, is_first_line: bool = False) -> Optional[Line]: """Helper function to create a new line, returns None if page is full.""" - if not page.can_fit_line(paragraph.line_height): + # Check if this line's baseline and descenders would fit on the page + if not page.can_fit_line(baseline_spacing, ascent, descent): return None - y_cursor = page._current_y_offset + # For the first line, position it so text starts at the top boundary + # For subsequent lines, use current y_offset which tracks baseline-to-baseline spacing + if is_first_line: + # Position line origin so that baseline (origin + ascent) is close to top + # We want minimal space above the text, so origin should be at boundary + y_cursor = page._current_y_offset + else: + y_cursor = page._current_y_offset x_cursor = page.border_size # Create a temporary Text object to calculate word width @@ -60,10 +110,10 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr return Line( spacing=word_spacing_constraints, origin=(x_cursor, y_cursor), - size=(page.available_width, paragraph.line_height), + size=(page.available_width, baseline_spacing), draw=page.draw, - font=concrete_style.create_font(), - halign=concrete_style.text_align + font=font, + halign=text_align ) # Create initial line @@ -72,15 +122,14 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr return False, start_word, pretext page.add_child(current_line) - page._current_y_offset += paragraph.line_height + # Note: add_child already updates _current_y_offset based on child's origin and size + # No need to manually increment it here # Track current position in paragraph current_pretext = pretext # Process words starting from start_word for i, word in enumerate(paragraph.words[start_word:], start=start_word): - if current_pretext: - print(current_pretext.text) success, overflow_text = current_line.add_word(word, current_pretext) if success: @@ -93,7 +142,7 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr # If we can't create a new line, return with the current state return False, i, overflow_text page.add_child(current_line) - page._current_y_offset += paragraph.line_height + # Note: add_child already updates _current_y_offset # Continue to the next word continue else: @@ -121,7 +170,7 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr continue page.add_child(current_line) - page._current_y_offset += paragraph.line_height + # Note: add_child already updates _current_y_offset # Try to add the word to the new line success, overflow_text = current_line.add_word(word, current_pretext) @@ -142,55 +191,3 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr # All words processed successfully return True, None, None - -class DocumentLayouter: - """ - Class-based document layouter for more complex layout operations. - """ - - def __init__(self, page: Page): - """Initialize the layouter with a page.""" - self.page = page - self.style_registry = ConcreteStyleRegistry(page.style_resolver) - - def layout_paragraph(self, paragraph: Paragraph, start_word: int = 0, pretext: Optional[Text] = None) -> Tuple[bool, Optional[int], Optional[Text]]: - """ - Layout a paragraph using the class-based approach. - - This method provides the same functionality as the standalone function - but with better state management and reusability. - """ - return paragraph_layouter(paragraph, self.page, start_word, pretext) - - def layout_document(self, paragraphs: List[Paragraph]) -> bool: - """ - Layout multiple paragraphs in sequence. - - Args: - paragraphs: List of paragraphs to layout - - Returns: - True if all paragraphs were laid out successfully, False otherwise - """ - for paragraph in paragraphs: - start_word = 0 - pretext = None - - while True: - complete, next_word, remaining_pretext = self.layout_paragraph( - paragraph, start_word, pretext - ) - - if complete: - # Paragraph finished - break - - if next_word is None: - # Error condition - return False - - # Continue on next page or handle page break - # For now, we'll just return False indicating we need more space - return False - - return True diff --git a/pyWebLayout/layout/ereader_layout.py b/pyWebLayout/layout/ereader_layout.py index 3bad6e5..dd32b85 100644 --- a/pyWebLayout/layout/ereader_layout.py +++ b/pyWebLayout/layout/ereader_layout.py @@ -27,6 +27,7 @@ from pyWebLayout.concrete.page import Page from pyWebLayout.concrete.text import Line, Text from pyWebLayout.style.page_style import PageStyle from pyWebLayout.style import Font +from pyWebLayout.layout.document_layouter import paragraph_layouter @dataclass @@ -212,11 +213,12 @@ class BidirectionalLayouter: Handles font scaling and maintains position state. """ - def __init__(self, blocks: List[Block], page_style: PageStyle, page_size: Tuple[int, int] = (800, 600)): + def __init__(self, blocks: List[Block], page_style: PageStyle, page_size: Tuple[int, int] = (800, 600), alignment_override=None): self.blocks = blocks self.page_style = page_style self.page_size = page_size self.chapter_navigator = ChapterNavigator(blocks) + self.alignment_override = alignment_override def render_page_forward(self, position: RenderingPosition, font_scale: float = 1.0) -> Tuple[Page, RenderingPosition]: """ @@ -328,54 +330,66 @@ class BidirectionalLayouter: return True, new_pos def _layout_paragraph_on_page(self, paragraph: Paragraph, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]: - """Layout a paragraph on the page with font scaling support""" - # This would integrate with the existing paragraph_layouter but with font scaling - # For now, this is a placeholder implementation + """ + Layout a paragraph on the page using the core paragraph_layouter. + Integrates font scaling and position tracking with the proven layout logic. - # Calculate scaled line height - line_height = int(paragraph.style.font_size * font_scale * 1.2) # 1.2 is line spacing factor + Args: + paragraph: The paragraph to layout (already scaled if font_scale != 1.0) + page: The page to layout on + position: Current rendering position + font_scale: Font scaling factor (used for context, paragraph should already be scaled) + + Returns: + Tuple of (success, new_position) + """ + # Convert remaining_pretext from string to Text object if needed + pretext_obj = None + if position.remaining_pretext: + # Create a Text object from the pretext string + pretext_obj = Text( + position.remaining_pretext, + paragraph.style, + page.draw, + line=None, + source=None + ) - if not page.can_fit_line(line_height): - return False, position - - # Create a line and try to fit words - y_cursor = page._current_y_offset - x_cursor = page.border_size - - # Scale word spacing constraints - word_spacing = FontScaler.scale_word_spacing((5, 15), font_scale) # Default spacing - - line = Line( - spacing=word_spacing, - origin=(x_cursor, y_cursor), - size=(page.available_width, line_height), - draw=page.draw, - font=FontScaler.scale_font(paragraph.style, font_scale) + # Call the core paragraph layouter with alignment override if set + success, failed_word_index, remaining_pretext = paragraph_layouter( + paragraph, + page, + start_word=position.word_index, + pretext=pretext_obj, + alignment_override=self.alignment_override ) - # Add words starting from position.word_index - words_added = 0 - for i, word in enumerate(paragraph.words[position.word_index:], start=position.word_index): - success, overflow = line.add_word(word) - if not success: - break - words_added += 1 + # Create new position based on the result + new_pos = position.copy() - if words_added > 0: - page.add_child(line) - page._current_y_offset += line_height - - new_pos = position.copy() - new_pos.word_index += words_added - - # If we finished the paragraph, move to next block - if new_pos.word_index >= len(paragraph.words): - new_pos.block_index += 1 - new_pos.word_index = 0 - + if success: + # Paragraph was fully laid out, move to next block + new_pos.block_index += 1 + new_pos.word_index = 0 + new_pos.remaining_pretext = None return True, new_pos - - return False, position + else: + # Paragraph was not fully laid out + if failed_word_index is not None: + # Update position to the word that didn't fit + new_pos.word_index = failed_word_index + + # Convert Text object back to string if there's remaining pretext + if remaining_pretext is not None and hasattr(remaining_pretext, 'text'): + new_pos.remaining_pretext = remaining_pretext.text + else: + new_pos.remaining_pretext = None + + return False, new_pos + else: + # No specific word failed, but layout wasn't successful + # This shouldn't normally happen, but handle it gracefully + return False, position def _layout_heading_on_page(self, heading: Heading, page: Page, position: RenderingPosition, font_scale: float) -> Tuple[bool, RenderingPosition]: """Layout a heading on the page""" diff --git a/pyWebLayout/style/page_style.py b/pyWebLayout/style/page_style.py index e8e71cd..724f7eb 100644 --- a/pyWebLayout/style/page_style.py +++ b/pyWebLayout/style/page_style.py @@ -22,6 +22,10 @@ class PageStyle: # Background color background_color: Tuple[int, int, int] = (255, 255, 255) + + # Typography properties + max_font_size: int = 72 # Maximum font size allowed on a page + line_spacing_multiplier: float = 1.2 # Baseline-to-baseline spacing multiplier @property def padding_top(self) -> int: diff --git a/tests/concrete/test_new_page_implementation.py b/tests/concrete/test_new_page_implementation.py index 0fe881b..f0e689c 100644 --- a/tests/concrete/test_new_page_implementation.py +++ b/tests/concrete/test_new_page_implementation.py @@ -244,6 +244,79 @@ class TestPageImplementation(unittest.TestCase): # Test that children are in the correct order for i, child in enumerate(page.children): self.assertEqual(child._text, f"Child {i}") + + def test_page_can_fit_line_boundary_checking(self): + """Test that can_fit_line correctly checks bottom boundary""" + # Create page with known dimensions + # Page: 800x600, border: 40, padding: (10, 10, 10, 10) + # Content area starts at y=50 (border + padding_top = 40 + 10) + # Content area ends at y=550 (height - border - padding_bottom = 600 - 40 - 10) + style = PageStyle( + border_width=40, + padding=(10, 10, 10, 10) + ) + page = Page(size=(800, 600), style=style) + + # Initial y_offset should be at border + padding_top = 50 + self.assertEqual(page._current_y_offset, 50) + + # Test 1: Line that fits comfortably + line_height = 20 + max_y = 600 - 40 - 10 # 550 + self.assertTrue(page.can_fit_line(line_height)) + # Would end at 50 + 20 = 70, well within 550 + + # Test 2: Simulate adding lines to fill the page + # Available height: 550 - 50 = 500 pixels + # With 20-pixel lines, we can fit 25 lines exactly + for i in range(24): # Add 24 lines + self.assertTrue(page.can_fit_line(20), f"Line {i+1} should fit") + # Simulate adding a line by updating y_offset + page._current_y_offset += 20 + + # After 24 lines: y_offset = 50 + (24 * 20) = 530 + self.assertEqual(page._current_y_offset, 530) + + # Test 3: One more 20-pixel line should fit (530 + 20 = 550, exactly at boundary) + self.assertTrue(page.can_fit_line(20)) + page._current_y_offset += 20 + self.assertEqual(page._current_y_offset, 550) + + # Test 4: Now another line should NOT fit (550 + 20 = 570 > 550) + self.assertFalse(page.can_fit_line(20)) + + # Test 5: Even a 1-pixel line should not fit (550 + 1 = 551 > 550) + self.assertFalse(page.can_fit_line(1)) + + # Test 6: Edge case - exactly at boundary, 0-height line should fit + self.assertTrue(page.can_fit_line(0)) + + def test_page_can_fit_line_with_different_styles(self): + """Test can_fit_line with different page styles""" + # Test with no border or padding + style_no_border = PageStyle(border_width=0, padding=(0, 0, 0, 0)) + page_no_border = Page(size=(100, 100), style=style_no_border) + + # With no border/padding, y_offset starts at 0 + self.assertEqual(page_no_border._current_y_offset, 0) + + # Can fit a 100-pixel line exactly + self.assertTrue(page_no_border.can_fit_line(100)) + + # Cannot fit a 101-pixel line + self.assertFalse(page_no_border.can_fit_line(101)) + + # Test with large border and padding + style_large = PageStyle(border_width=20, padding=(15, 15, 15, 15)) + page_large = Page(size=(200, 200), style=style_large) + + # y_offset starts at border + padding_top = 20 + 15 = 35 + self.assertEqual(page_large._current_y_offset, 35) + + # Max y = 200 - 20 - 15 = 165 + # Available height = 165 - 35 = 130 pixels + self.assertTrue(page_large.can_fit_line(130)) + self.assertFalse(page_large.can_fit_line(131)) if __name__ == '__main__': diff --git a/tests/layouter/test_document_layouter_integration.py b/tests/layouter/test_document_layouter_integration.py index 3d5c22b..631afe6 100644 --- a/tests/layouter/test_document_layouter_integration.py +++ b/tests/layouter/test_document_layouter_integration.py @@ -16,43 +16,12 @@ from pyWebLayout.layout.document_layouter import paragraph_layouter, DocumentLay from pyWebLayout.style.abstract_style import AbstractStyle from pyWebLayout.style.concrete_style import ConcreteStyle, StyleResolver, RenderingContext from pyWebLayout.style.fonts import Font +from pyWebLayout.style.page_style import PageStyle +from pyWebLayout.concrete.page import Page from pyWebLayout.concrete.text import Line, Text from pyWebLayout.abstract.inline import Word -class MockPage: - """A realistic mock page that behaves like a real page.""" - - def __init__(self, width=400, height=600, max_lines=20): - self.border_size = 20 - self._current_y_offset = 50 - self.available_width = width - self.available_height = height - self.max_lines = max_lines - self.lines_added = 0 - self.children = [] - - # Create a real drawing context - self.image = Image.new('RGB', (width + 40, height + 100), 'white') - self.draw = ImageDraw.Draw(self.image) - - # Create a real style resolver - context = RenderingContext(base_font_size=16) - self.style_resolver = StyleResolver(context) - - def can_fit_line(self, line_height): - """Check if another line can fit on the page.""" - remaining_height = self.available_height - self._current_y_offset - can_fit = remaining_height >= line_height and self.lines_added < self.max_lines - return can_fit - - def add_child(self, child): - """Add a child element (like a Line) to the page.""" - self.children.append(child) - self.lines_added += 1 - return True - - class MockWord(Word): """A simple mock word that extends the real Word class.""" @@ -106,8 +75,9 @@ class TestDocumentLayouterIntegration: def test_single_page_layout_with_real_components(self): """Test layout on a single page using real Line and Text objects.""" - # Create a page that can fit content - page = MockPage(width=500, height=400, max_lines=10) + # Create a real page that can fit content + page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10)) + page = Page(size=(500, 400), style=page_style) # Create a paragraph with realistic content paragraph = MockParagraph( @@ -125,7 +95,6 @@ class TestDocumentLayouterIntegration: # Verify lines were added to page assert len(page.children) > 0 - assert page.lines_added > 0 # Verify actual Line objects were created for child in page.children: @@ -135,8 +104,9 @@ class TestDocumentLayouterIntegration: def test_multi_page_scenario_with_page_overflow(self): """Test realistic multi-page scenario with actual page overflow.""" - # Create a very small page that will definitely overflow - small_page = MockPage(width=150, height=80, max_lines=1) # Extremely small page + # Create a very small real page that will definitely overflow + small_page_style = PageStyle(border_width=5, padding=(5, 5, 5, 5)) + small_page = Page(size=(150, 80), style=small_page_style) # Create a long paragraph that will definitely overflow long_text = " ".join([f"verylongword{i:02d}" for i in range(20)]) # 20 long words @@ -157,13 +127,13 @@ class TestDocumentLayouterIntegration: # If it failed, verify overflow handling assert failed_word_index is not None # Should indicate where it failed assert failed_word_index < len(paragraph.words) # Should be within word range - assert len(small_page.children) <= small_page.max_lines print(f"✓ Multi-page test: Page overflow at word {failed_word_index}, {len(small_page.children)} lines fit") def test_word_spacing_constraints_in_real_lines(self): """Test that word spacing constraints are properly used in real Line objects.""" - # Create page - page = MockPage(width=400, height=300) + # Create real page + page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10)) + page = Page(size=(400, 300), style=page_style) # Create paragraph with specific spacing constraints paragraph = MockParagraph( @@ -197,7 +167,8 @@ class TestDocumentLayouterIntegration: ] for alignment_name, style in alignments_to_test: - page = MockPage(width=350, height=200) + page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10)) + page = Page(size=(350, 200), style=page_style) paragraph = MockParagraph( "This sentence will test different alignment strategies with word spacing.", style @@ -217,8 +188,9 @@ class TestDocumentLayouterIntegration: def test_realistic_document_with_multiple_pages(self): """Test a realistic document that spans multiple pages.""" - # Create multiple pages - pages = [MockPage(width=400, height=300, max_lines=5) for _ in range(3)] + # Create multiple real pages + page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10)) + pages = [Page(size=(400, 300), style=page_style) for _ in range(3)] # Create a document with multiple paragraphs paragraphs = [ @@ -275,7 +247,8 @@ class TestDocumentLayouterIntegration: def test_word_spacing_constraint_resolution_integration(self): """Test the complete integration from AbstractStyle to Line spacing.""" - page = MockPage() + page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10)) + page = Page(size=(400, 600), style=page_style) # Test different constraint scenarios test_cases = [ @@ -300,8 +273,9 @@ class TestDocumentLayouterIntegration: ] for case in test_cases: - # Create fresh page for each test - test_page = MockPage() + # Create fresh real page for each test + test_page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10)) + test_page = Page(size=(400, 600), style=test_page_style) paragraph = MockParagraph( "Testing constraint resolution with different scenarios.", case["style"] @@ -322,8 +296,9 @@ class TestDocumentLayouterIntegration: def test_hyphenation_with_word_spacing_constraints(self): """Test that hyphenation works correctly with word spacing constraints.""" - # Create a narrow page to force hyphenation - narrow_page = MockPage(width=200, height=300) + # Create a narrow real page to force hyphenation + narrow_page_style = PageStyle(border_width=20, padding=(10, 10, 10, 10)) + narrow_page = Page(size=(200, 300), style=narrow_page_style) # Create paragraph with long words that will need hyphenation paragraph = MockParagraph(