new page per chapter

This commit is contained in:
Duncan Tourolle 2025-11-04 22:59:58 +01:00
parent 562b4cf5d0
commit d542f08dd0
4 changed files with 82 additions and 4 deletions

View File

@ -23,6 +23,7 @@ class BlockType(Enum):
HORIZONTAL_RULE = 10
LINE_BREAK = 11
IMAGE = 12
PAGE_BREAK = 13
class Block:
@ -1586,3 +1587,42 @@ class HorizontalRule(Block):
raise AttributeError(f"Container {type(container).__name__} must have an 'add_block' method")
return hr
class PageBreak(Block):
"""
A page break element that forces content to start on a new page.
When encountered during layout, this block signals that all subsequent
content should be placed on a new page, even if the current page has
available space.
"""
def __init__(self):
"""Initialize a page break element."""
super().__init__(BlockType.PAGE_BREAK)
@classmethod
def create_and_add_to(cls, container) -> 'PageBreak':
"""
Create a new PageBreak and add it to a container.
Args:
container: The container to add the page break to (must have add_block method)
Returns:
The newly created PageBreak object
Raises:
AttributeError: If the container doesn't have the required add_block method
"""
# Create the new page break
page_break = cls()
# Add the page break to the container
if hasattr(container, 'add_block'):
container.add_block(page_break)
else:
raise AttributeError(f"Container {type(container).__name__} must have an 'add_block' method")
return page_break

View File

@ -15,6 +15,7 @@ import urllib.parse
from PIL import Image as PILImage, ImageOps
from pyWebLayout.abstract.document import Document, Book, Chapter, MetadataType
from pyWebLayout.abstract.block import PageBreak
from pyWebLayout.io.readers.html_extraction import parse_html_string
@ -515,6 +516,10 @@ class EPUBReader:
for block in blocks:
chapter.add_block(block)
# Add a PageBreak after the chapter to ensure next chapter starts on new page
# This helps maintain chapter boundaries during pagination
chapter.add_block(PageBreak())
except Exception as e:
print(f"Error parsing chapter {i+1}: {str(e)}")
# Add an error message block
@ -526,6 +531,8 @@ class EPUBReader:
default_font = Font()
error_para.add_word(Word(f"Error loading chapter: {str(e)}", default_font))
chapter.add_block(error_para)
# Still add PageBreak even after error
chapter.add_block(PageBreak())
def read_epub(epub_path: str) -> Book:

View File

@ -6,7 +6,7 @@ from pyWebLayout.concrete import Page, Line, Text
from pyWebLayout.concrete.image import RenderableImage
from pyWebLayout.concrete.functional import LinkText
from pyWebLayout.abstract import Paragraph, Word, Link
from pyWebLayout.abstract.block import Image as AbstractImage
from pyWebLayout.abstract.block import Image as AbstractImage, PageBreak
from pyWebLayout.abstract.inline import LinkedWord
from pyWebLayout.style.concrete_style import ConcreteStyleRegistry, RenderingContext, StyleResolver
from pyWebLayout.style import Font, Alignment
@ -78,7 +78,6 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
# Cap font size to page maximum if needed
if font.font_size > page.style.max_font_size:
from pyWebLayout.style import Font
font = Font(
font_path=font._font_path,
font_size=page.style.max_font_size,
@ -211,6 +210,25 @@ def paragraph_layouter(paragraph: Paragraph, page: Page, start_word: int = 0, pr
return True, None, None
def pagebreak_layouter(page_break: PageBreak, page: Page) -> bool:
"""
Handle a page break element.
A page break signals that all subsequent content should start on a new page.
This function always returns False to indicate that the current page is complete
and a new page should be created for subsequent content.
Args:
page_break: The PageBreak block
page: The current page (not used, but kept for consistency)
Returns:
bool: Always False to force creation of a new page
"""
# Page break always forces a new page
return False
def image_layouter(image: AbstractImage, page: Page, max_width: Optional[int] = None,
max_height: Optional[int] = None) -> bool:
"""

View File

@ -23,12 +23,12 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
try:
from pyWebLayout.io.readers.epub_reader import read_epub
from pyWebLayout.layout.document_layouter import DocumentLayouter, paragraph_layouter, image_layouter
from pyWebLayout.layout.document_layouter import DocumentLayouter, paragraph_layouter, image_layouter, pagebreak_layouter
from pyWebLayout.concrete.page import Page
from pyWebLayout.style.page_style import PageStyle
from pyWebLayout.style.fonts import Font
from pyWebLayout.style.alignment import Alignment
from pyWebLayout.abstract.block import Block, Paragraph, Heading, HList, Table, Image as AbstractImage
from pyWebLayout.abstract.block import Block, Paragraph, Heading, HList, Table, Image as AbstractImage, PageBreak
from pyWebLayout.style.concrete_style import RenderingContext, StyleResolver
from PIL import Image, ImageDraw
except ImportError as e:
@ -179,6 +179,19 @@ def layout_blocks_on_pages(blocks: List[Block], page_style: PageStyle,
print(f"Warning: Error processing list: {e}")
current_block_index += 1
elif isinstance(block, PageBreak):
# PageBreak forces a new page
success = pagebreak_layouter(block, page)
# Mark that we've seen this block
current_block_index += 1
continuation_word_index = 0
continuation_pretext = None
# PageBreak always returns False to force new page
# Break to create a new page for subsequent content
break
elif isinstance(block, Table):
# Skip tables for now (not implemented)
print(f"Warning: Skipping table (not yet implemented)")