Faster pdf export and test support
Some checks failed
Lint / lint (push) Successful in 21s
Tests / test (3.11) (push) Successful in 19s
Tests / test (3.12) (push) Successful in 16s
Tests / test (3.13) (push) Successful in 16s
Tests / test (3.14) (push) Successful in 17s
Python CI / test (push) Failing after 1m44s

This commit is contained in:
Duncan Tourolle 2026-04-09 22:30:36 +02:00
parent f96200c799
commit e112437372
3 changed files with 132 additions and 51 deletions

View File

@ -41,7 +41,7 @@ jobs:
id: pytest id: pytest
continue-on-error: true continue-on-error: true
run: | run: |
xvfb-run -a pytest --cov=pyPhotoAlbum --cov-report=xml --cov-report=json --cov-report=html --cov-report=term-missing pytest --cov=pyPhotoAlbum --cov-report=xml --cov-report=json --cov-report=html --cov-report=term-missing
env: env:
QT_QPA_PLATFORM: offscreen QT_QPA_PLATFORM: offscreen

View File

@ -5,9 +5,10 @@ Uses multiprocessing to pre-process images in parallel for faster exports.
""" """
import os import os
import threading
from typing import Any, List, Tuple, Optional, Union, Dict from typing import Any, List, Tuple, Optional, Union, Dict
from dataclasses import dataclass, field from dataclasses import dataclass, field
from concurrent.futures import ProcessPoolExecutor, as_completed from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
import multiprocessing import multiprocessing
import io import io
from reportlab.lib.pagesizes import A4 from reportlab.lib.pagesizes import A4
@ -192,15 +193,15 @@ class PDFExporter:
self.warnings: List[str] = [] self.warnings: List[str] = []
self.current_pdf_page = 1 self.current_pdf_page = 1
self.max_workers = max_workers or multiprocessing.cpu_count() self.max_workers = max_workers or multiprocessing.cpu_count()
# Cache for pre-processed images: task_id -> PIL Image
self._processed_images: Dict[str, Image.Image] = {} self._processed_images: Dict[str, Image.Image] = {}
def export(self, output_path: str, progress_callback=None) -> Tuple[bool, List[str]]: def export(self, output_path: str, progress_callback=None) -> Tuple[bool, List[str]]:
""" """
Export the project to PDF. Export the project to PDF.
Uses multiprocessing to pre-process all images in parallel before Uses multiprocessing to pre-process all images in parallel, then renders
assembling the PDF sequentially. each page to its own PDF buffer in parallel (via threads), and finally
merges the per-page PDFs into the output file.
Args: Args:
output_path: Path where PDF should be saved output_path: Path where PDF should be saved
@ -210,7 +211,6 @@ class PDFExporter:
Tuple of (success: bool, warnings: List[str]) Tuple of (success: bool, warnings: List[str])
""" """
self.warnings = [] self.warnings = []
self.current_pdf_page = 1
self._processed_images = {} self._processed_images = {}
try: try:
@ -221,70 +221,157 @@ class PDFExporter:
# Get page dimensions from project (in mm) # Get page dimensions from project (in mm)
page_width_mm, page_height_mm = self.project.page_size_mm page_width_mm, page_height_mm = self.project.page_size_mm
# Bleed expands each page on all sides
bleed_mm = self.project.page_bleed_mm bleed_mm = self.project.page_bleed_mm
bleed_pt = bleed_mm * self.MM_TO_POINTS bleed_pt = bleed_mm * self.MM_TO_POINTS
# Convert to PDF points (base page = cut/trim size)
page_width_pt = page_width_mm * self.MM_TO_POINTS page_width_pt = page_width_mm * self.MM_TO_POINTS
page_height_pt = page_height_mm * self.MM_TO_POINTS page_height_pt = page_height_mm * self.MM_TO_POINTS
# Expanded page size includes bleed on all sides # Phase 1: parallel image pre-processing (unchanged)
expanded_width_pt = page_width_pt + 2 * bleed_pt
expanded_height_pt = page_height_pt + 2 * bleed_pt
# Phase 1: Collect all image tasks and process in parallel
if progress_callback: if progress_callback:
progress_callback(0, total_pages, "Collecting images for processing...") progress_callback(0, total_pages, "Collecting images for processing...")
image_tasks = self._collect_image_tasks(page_width_pt, page_height_pt) image_tasks = self._collect_image_tasks(page_width_pt, page_height_pt)
if image_tasks: if image_tasks:
if progress_callback: if progress_callback:
progress_callback(0, total_pages, f"Processing {len(image_tasks)} images in parallel...") progress_callback(0, total_pages, f"Processing {len(image_tasks)} images in parallel...")
self._preprocess_images_parallel(image_tasks, progress_callback, total_pages) self._preprocess_images_parallel(image_tasks, progress_callback, total_pages)
# Phase 2: Build PDF using pre-processed images # Phase 2: determine ordered page sequence (inserts blank pages for spread alignment)
c = canvas.Canvas(output_path, pagesize=(expanded_width_pt, expanded_height_pt)) page_sequence = self._compute_page_sequence()
pages_processed = 0
for page in self.project.pages:
page_name = self.project.get_page_display_name(page)
# Phase 3: render each page to its own PDF bytes in parallel
n = len(page_sequence)
if progress_callback: if progress_callback:
progress_callback(pages_processed, total_pages, f"Assembling {page_name}...") progress_callback(0, total_pages, f"Rendering {n} pages in parallel...")
pdf_bytes_list: List[Optional[bytes]] = [None] * n
if page.is_cover: with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
self._export_cover(c, page, page_width_pt, page_height_pt) future_to_idx = {
pages_processed += 1 executor.submit(
elif page.is_double_spread: self._render_item_to_bytes, item, page_width_pt, page_height_pt, bleed_pt
if self.current_pdf_page % 2 == 1: ): i
c.showPage() for i, item in enumerate(page_sequence)
self.current_pdf_page += 1 }
completed = 0
for future in as_completed(future_to_idx):
i = future_to_idx[future]
try:
pdf_bytes_list[i] = future.result()
except Exception as e:
self.warnings.append(f"Error rendering page: {str(e)}")
pdf_bytes_list[i] = self._make_blank_page_bytes(page_width_pt, page_height_pt, bleed_pt)
completed += 1
if progress_callback: if progress_callback:
progress_callback(pages_processed, total_pages, "Inserting blank page for alignment...") progress_callback(completed, n, f"Rendering pages: {completed}/{n}...")
self._export_spread(c, page, page_width_pt, page_height_pt, bleed_pt) # Phase 4: merge all per-page PDFs into the output file
pages_processed += 2 if progress_callback:
else: progress_callback(n, total_pages, "Merging pages...")
self._export_single_page(c, page, page_width_pt, page_height_pt, bleed_pt) self._merge_page_pdfs(pdf_bytes_list, output_path)
pages_processed += 1
c.save()
# Clean up processed images cache
self._processed_images = {} self._processed_images = {}
if progress_callback: if progress_callback:
progress_callback(total_pages, total_pages, "Export complete!") progress_callback(total_pages, total_pages, "Export complete!")
return True, self.warnings return True, self.warnings
except Exception as e: except Exception as e:
self.warnings.append(f"Export failed: {str(e)}") self.warnings.append(f"Export failed: {str(e)}")
return False, self.warnings return False, self.warnings
def _compute_page_sequence(self) -> List[Tuple[str, Any]]:
"""
Build an ordered list of (page_type, page) items to render.
Inserts ('blank', None) entries before double-page spreads that would
otherwise start on an odd-numbered PDF page (spreads must start on even pages).
"""
sequence: List[Tuple[str, Any]] = []
pdf_page_num = 1
for page in self.project.pages:
if page.is_cover:
sequence.append(("cover", page))
pdf_page_num += 1
elif page.is_double_spread:
if pdf_page_num % 2 == 1:
sequence.append(("blank", None))
pdf_page_num += 1
sequence.append(("spread", page))
pdf_page_num += 2
else:
sequence.append(("single", page))
pdf_page_num += 1
return sequence
def _render_item_to_bytes(
self,
item: Tuple[str, Any],
page_width_pt: float,
page_height_pt: float,
bleed_pt: float,
) -> bytes:
"""
Render a single page item to a self-contained PDF (as bytes).
Each call creates its own Canvas / BytesIO so pages can be rendered
concurrently without sharing state.
"""
page_type, page = item
expanded_width_pt = page_width_pt + 2 * bleed_pt
expanded_height_pt = page_height_pt + 2 * bleed_pt
buf = io.BytesIO()
if page_type == "blank":
c = canvas.Canvas(buf, pagesize=(expanded_width_pt, expanded_height_pt))
c.showPage()
c.save()
elif page_type == "cover":
cover_width_mm, cover_height_mm = page.layout.size
cover_width_pt = cover_width_mm * self.MM_TO_POINTS
cover_height_pt = cover_height_mm * self.MM_TO_POINTS
c = canvas.Canvas(buf, pagesize=(cover_width_pt, cover_height_pt))
for element in sorted(page.layout.elements, key=lambda x: x.z_index):
self._render_element(c, element, 0, cover_width_pt, cover_height_pt, "Cover")
c.showPage()
c.save()
elif page_type == "single":
c = canvas.Canvas(buf, pagesize=(expanded_width_pt, expanded_height_pt))
for element in sorted(page.layout.elements, key=lambda x: x.z_index):
self._render_element(c, element, 0, page_width_pt, page_height_pt, page.page_number, bleed_pt)
c.showPage()
c.save()
elif page_type == "spread":
c = canvas.Canvas(buf, pagesize=(expanded_width_pt, expanded_height_pt))
self._export_spread(c, page, page_width_pt, page_height_pt, bleed_pt)
c.save()
return buf.getvalue()
def _make_blank_page_bytes(
self, page_width_pt: float, page_height_pt: float, bleed_pt: float
) -> bytes:
"""Return a minimal single-blank-page PDF for use as an error placeholder."""
buf = io.BytesIO()
c = canvas.Canvas(buf, pagesize=(page_width_pt + 2 * bleed_pt, page_height_pt + 2 * bleed_pt))
c.showPage()
c.save()
return buf.getvalue()
def _merge_page_pdfs(self, pdf_bytes_list: List[Optional[bytes]], output_path: str):
"""Merge a list of single-page PDF byte strings into one output file."""
from pypdf import PdfWriter, PdfReader
writer = PdfWriter()
for pdf_bytes in pdf_bytes_list:
if pdf_bytes is None:
continue
reader = PdfReader(io.BytesIO(pdf_bytes))
for page in reader.pages:
writer.add_page(page)
with open(output_path, "wb") as f:
writer.write(f)
def _make_task_id( def _make_task_id(
self, self,
element: ImageData, element: ImageData,
@ -530,10 +617,6 @@ class PDFExporter:
c.showPage() # Finish cover page c.showPage() # Finish cover page
self.current_pdf_page += 1
# Reset page size for content pages
c.setPageSize((page_width_pt, page_height_pt))
def _export_single_page( def _export_single_page(
self, c: canvas.Canvas, page, page_width_pt: float, page_height_pt: float, bleed_pt: float = 0.0 self, c: canvas.Canvas, page, page_width_pt: float, page_height_pt: float, bleed_pt: float = 0.0
@ -548,7 +631,6 @@ class PDFExporter:
self._render_element(c, element, 0, page_width_pt, page_height_pt, page.page_number, bleed_pt) self._render_element(c, element, 0, page_width_pt, page_height_pt, page.page_number, bleed_pt)
c.showPage() # Finish this page c.showPage() # Finish this page
self.current_pdf_page += 1
def _export_spread( def _export_spread(
self, c: canvas.Canvas, page, page_width_pt: float, page_height_pt: float, bleed_pt: float = 0.0 self, c: canvas.Canvas, page, page_width_pt: float, page_height_pt: float, bleed_pt: float = 0.0
@ -596,7 +678,6 @@ class PDFExporter:
self._render_split_element(params, bleed_pt) self._render_split_element(params, bleed_pt)
c.showPage() # Finish left page c.showPage() # Finish left page
self.current_pdf_page += 1
# Process elements for right page # Process elements for right page
c.setPageSize((expanded_width_pt, expanded_height_pt)) c.setPageSize((expanded_width_pt, expanded_height_pt))
@ -625,7 +706,6 @@ class PDFExporter:
self._render_split_element(params, bleed_pt) self._render_split_element(params, bleed_pt)
c.showPage() # Finish right page c.showPage() # Finish right page
self.current_pdf_page += 1
def _render_element( def _render_element(
self, self,

View File

@ -31,6 +31,7 @@ dependencies = [
"Pillow>=8.0.0", "Pillow>=8.0.0",
"reportlab>=3.5.0", "reportlab>=3.5.0",
"lxml>=4.6.0", "lxml>=4.6.0",
"pypdf>=4.0.0",
] ]
[project.optional-dependencies] [project.optional-dependencies]