From d542f08dd0c9692305982a881e14e2bf67d41032 Mon Sep 17 00:00:00 2001 From: Duncan Tourolle Date: Tue, 4 Nov 2025 22:59:58 +0100 Subject: [PATCH] new page per chapter --- pyWebLayout/abstract/block.py | 40 +++++++++++++++++++ pyWebLayout/io/readers/epub_reader.py | 7 ++++ pyWebLayout/layout/document_layouter.py | 22 +++++++++- .../epub_page_renderer_documentlayouter.py | 17 +++++++- 4 files changed, 82 insertions(+), 4 deletions(-) diff --git a/pyWebLayout/abstract/block.py b/pyWebLayout/abstract/block.py index c946914..98536e3 100644 --- a/pyWebLayout/abstract/block.py +++ b/pyWebLayout/abstract/block.py @@ -23,6 +23,7 @@ class BlockType(Enum): HORIZONTAL_RULE = 10 LINE_BREAK = 11 IMAGE = 12 + PAGE_BREAK = 13 class Block: @@ -1586,3 +1587,42 @@ class HorizontalRule(Block): raise AttributeError(f"Container {type(container).__name__} must have an 'add_block' method") return hr + + +class PageBreak(Block): + """ + A page break element that forces content to start on a new page. + + When encountered during layout, this block signals that all subsequent + content should be placed on a new page, even if the current page has + available space. + """ + + def __init__(self): + """Initialize a page break element.""" + super().__init__(BlockType.PAGE_BREAK) + + @classmethod + def create_and_add_to(cls, container) -> 'PageBreak': + """ + Create a new PageBreak and add it to a container. + + Args: + container: The container to add the page break to (must have add_block method) + + Returns: + The newly created PageBreak object + + Raises: + AttributeError: If the container doesn't have the required add_block method + """ + # Create the new page break + page_break = cls() + + # Add the page break to the container + if hasattr(container, 'add_block'): + container.add_block(page_break) + else: + raise AttributeError(f"Container {type(container).__name__} must have an 'add_block' method") + + return page_break diff --git a/pyWebLayout/io/readers/epub_reader.py b/pyWebLayout/io/readers/epub_reader.py index 0f85977..239aa51 100644 --- a/pyWebLayout/io/readers/epub_reader.py +++ b/pyWebLayout/io/readers/epub_reader.py @@ -15,6 +15,7 @@ import urllib.parse from PIL import Image as PILImage, ImageOps from pyWebLayout.abstract.document import Document, Book, Chapter, MetadataType +from pyWebLayout.abstract.block import PageBreak from pyWebLayout.io.readers.html_extraction import parse_html_string @@ -515,6 +516,10 @@ class EPUBReader: for block in blocks: chapter.add_block(block) + # Add a PageBreak after the chapter to ensure next chapter starts on new page + # This helps maintain chapter boundaries during pagination + chapter.add_block(PageBreak()) + except Exception as e: print(f"Error parsing chapter {i+1}: {str(e)}") # Add an error message block @@ -526,6 +531,8 @@ class EPUBReader: default_font = Font() error_para.add_word(Word(f"Error loading chapter: {str(e)}", default_font)) chapter.add_block(error_para) + # Still add PageBreak even after error + chapter.add_block(PageBreak()) def read_epub(epub_path: str) -> Book: diff --git a/pyWebLayout/layout/document_layouter.py b/pyWebLayout/layout/document_layouter.py index e666026..a72b614 100644 --- a/pyWebLayout/layout/document_layouter.py +++ b/pyWebLayout/layout/document_layouter.py @@ -6,7 +6,7 @@ from pyWebLayout.concrete import Page, Line, Text from pyWebLayout.concrete.image import RenderableImage from pyWebLayout.concrete.functional import LinkText from pyWebLayout.abstract import Paragraph, Word, Link -from pyWebLayout.abstract.block import Image as AbstractImage +from pyWebLayout.abstract.block import Image as AbstractImage, PageBreak from pyWebLayout.abstract.inline import LinkedWord from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver from pyWebLayout.style import Font, Alignment @@ -78,7 +78,6 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr # Cap font size to page maximum if needed if font.font_size > page.style.max_font_size: - from pyWebLayout.style import Font font = Font( font_path=font._font_path, font_size=page.style.max_font_size, @@ -211,6 +210,25 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr return True, None, None +def pagebreak_layouter(page_break: PageBreak, page: Page) -> bool: + """ + Handle a page break element. + + A page break signals that all subsequent content should start on a new page. + This function always returns False to indicate that the current page is complete + and a new page should be created for subsequent content. + + Args: + page_break: The PageBreak block + page: The current page (not used, but kept for consistency) + + Returns: + bool: Always False to force creation of a new page + """ + # Page break always forces a new page + return False + + def image_layouter(image: AbstractImage, page: Page, max_width: Optional[int] = None, max_height: Optional[int] = None) -> bool: """ diff --git a/scripts/epub_page_renderer_documentlayouter.py b/scripts/epub_page_renderer_documentlayouter.py index 217f9eb..9d9439e 100644 --- a/scripts/epub_page_renderer_documentlayouter.py +++ b/scripts/epub_page_renderer_documentlayouter.py @@ -23,12 +23,12 @@ sys.path.insert(0, str(Path(__file__).parent.parent)) try: from pyWebLayout.io.readers.epub_reader import read_epub - from pyWebLayout.layout.document_layouter import DocumentLayouter, paragraph_layouter, image_layouter + from pyWebLayout.layout.document_layouter import DocumentLayouter, paragraph_layouter, image_layouter, pagebreak_layouter from pyWebLayout.concrete.page import Page from pyWebLayout.style.page_style import PageStyle from pyWebLayout.style.fonts import Font from pyWebLayout.style.alignment import Alignment - from pyWebLayout.abstract.block import Block, Paragraph, Heading, HList, Table, Image as AbstractImage + from pyWebLayout.abstract.block import Block, Paragraph, Heading, HList, Table, Image as AbstractImage, PageBreak from pyWebLayout.style.concrete_style import RenderingContext, StyleResolver from PIL import Image, ImageDraw except ImportError as e: @@ -179,6 +179,19 @@ def layout_blocks_on_pages(blocks: List[Block], page_style: PageStyle, print(f"Warning: Error processing list: {e}") current_block_index += 1 + elif isinstance(block, PageBreak): + # PageBreak forces a new page + success = pagebreak_layouter(block, page) + + # Mark that we've seen this block + current_block_index += 1 + continuation_word_index = 0 + continuation_pretext = None + + # PageBreak always returns False to force new page + # Break to create a new page for subsequent content + break + elif isinstance(block, Table): # Skip tables for now (not implemented) print(f"Warning: Skipping table (not yet implemented)")