Faster pdf export and test support
Some checks failed
Lint / lint (push) Successful in 21s
Tests / test (3.11) (push) Successful in 19s
Tests / test (3.12) (push) Successful in 16s
Tests / test (3.13) (push) Successful in 16s
Tests / test (3.14) (push) Successful in 17s
Python CI / test (push) Failing after 1m44s
Some checks failed
Lint / lint (push) Successful in 21s
Tests / test (3.11) (push) Successful in 19s
Tests / test (3.12) (push) Successful in 16s
Tests / test (3.13) (push) Successful in 16s
Tests / test (3.14) (push) Successful in 17s
Python CI / test (push) Failing after 1m44s
This commit is contained in:
parent
f96200c799
commit
e112437372
@ -41,7 +41,7 @@ jobs:
|
||||
id: pytest
|
||||
continue-on-error: true
|
||||
run: |
|
||||
xvfb-run -a pytest --cov=pyPhotoAlbum --cov-report=xml --cov-report=json --cov-report=html --cov-report=term-missing
|
||||
pytest --cov=pyPhotoAlbum --cov-report=xml --cov-report=json --cov-report=html --cov-report=term-missing
|
||||
env:
|
||||
QT_QPA_PLATFORM: offscreen
|
||||
|
||||
|
||||
@ -5,9 +5,10 @@ Uses multiprocessing to pre-process images in parallel for faster exports.
|
||||
"""
|
||||
|
||||
import os
|
||||
import threading
|
||||
from typing import Any, List, Tuple, Optional, Union, Dict
|
||||
from dataclasses import dataclass, field
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
||||
import multiprocessing
|
||||
import io
|
||||
from reportlab.lib.pagesizes import A4
|
||||
@ -192,15 +193,15 @@ class PDFExporter:
|
||||
self.warnings: List[str] = []
|
||||
self.current_pdf_page = 1
|
||||
self.max_workers = max_workers or multiprocessing.cpu_count()
|
||||
# Cache for pre-processed images: task_id -> PIL Image
|
||||
self._processed_images: Dict[str, Image.Image] = {}
|
||||
|
||||
def export(self, output_path: str, progress_callback=None) -> Tuple[bool, List[str]]:
|
||||
"""
|
||||
Export the project to PDF.
|
||||
|
||||
Uses multiprocessing to pre-process all images in parallel before
|
||||
assembling the PDF sequentially.
|
||||
Uses multiprocessing to pre-process all images in parallel, then renders
|
||||
each page to its own PDF buffer in parallel (via threads), and finally
|
||||
merges the per-page PDFs into the output file.
|
||||
|
||||
Args:
|
||||
output_path: Path where PDF should be saved
|
||||
@ -210,7 +211,6 @@ class PDFExporter:
|
||||
Tuple of (success: bool, warnings: List[str])
|
||||
"""
|
||||
self.warnings = []
|
||||
self.current_pdf_page = 1
|
||||
self._processed_images = {}
|
||||
|
||||
try:
|
||||
@ -221,70 +221,157 @@ class PDFExporter:
|
||||
|
||||
# Get page dimensions from project (in mm)
|
||||
page_width_mm, page_height_mm = self.project.page_size_mm
|
||||
|
||||
# Bleed expands each page on all sides
|
||||
bleed_mm = self.project.page_bleed_mm
|
||||
bleed_pt = bleed_mm * self.MM_TO_POINTS
|
||||
|
||||
# Convert to PDF points (base page = cut/trim size)
|
||||
page_width_pt = page_width_mm * self.MM_TO_POINTS
|
||||
page_height_pt = page_height_mm * self.MM_TO_POINTS
|
||||
|
||||
# Expanded page size includes bleed on all sides
|
||||
expanded_width_pt = page_width_pt + 2 * bleed_pt
|
||||
expanded_height_pt = page_height_pt + 2 * bleed_pt
|
||||
|
||||
# Phase 1: Collect all image tasks and process in parallel
|
||||
# Phase 1: parallel image pre-processing (unchanged)
|
||||
if progress_callback:
|
||||
progress_callback(0, total_pages, "Collecting images for processing...")
|
||||
|
||||
image_tasks = self._collect_image_tasks(page_width_pt, page_height_pt)
|
||||
|
||||
if image_tasks:
|
||||
if progress_callback:
|
||||
progress_callback(0, total_pages, f"Processing {len(image_tasks)} images in parallel...")
|
||||
self._preprocess_images_parallel(image_tasks, progress_callback, total_pages)
|
||||
|
||||
# Phase 2: Build PDF using pre-processed images
|
||||
c = canvas.Canvas(output_path, pagesize=(expanded_width_pt, expanded_height_pt))
|
||||
|
||||
pages_processed = 0
|
||||
for page in self.project.pages:
|
||||
page_name = self.project.get_page_display_name(page)
|
||||
# Phase 2: determine ordered page sequence (inserts blank pages for spread alignment)
|
||||
page_sequence = self._compute_page_sequence()
|
||||
|
||||
# Phase 3: render each page to its own PDF bytes in parallel
|
||||
n = len(page_sequence)
|
||||
if progress_callback:
|
||||
progress_callback(pages_processed, total_pages, f"Assembling {page_name}...")
|
||||
progress_callback(0, total_pages, f"Rendering {n} pages in parallel...")
|
||||
pdf_bytes_list: List[Optional[bytes]] = [None] * n
|
||||
|
||||
if page.is_cover:
|
||||
self._export_cover(c, page, page_width_pt, page_height_pt)
|
||||
pages_processed += 1
|
||||
elif page.is_double_spread:
|
||||
if self.current_pdf_page % 2 == 1:
|
||||
c.showPage()
|
||||
self.current_pdf_page += 1
|
||||
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
||||
future_to_idx = {
|
||||
executor.submit(
|
||||
self._render_item_to_bytes, item, page_width_pt, page_height_pt, bleed_pt
|
||||
): i
|
||||
for i, item in enumerate(page_sequence)
|
||||
}
|
||||
completed = 0
|
||||
for future in as_completed(future_to_idx):
|
||||
i = future_to_idx[future]
|
||||
try:
|
||||
pdf_bytes_list[i] = future.result()
|
||||
except Exception as e:
|
||||
self.warnings.append(f"Error rendering page: {str(e)}")
|
||||
pdf_bytes_list[i] = self._make_blank_page_bytes(page_width_pt, page_height_pt, bleed_pt)
|
||||
completed += 1
|
||||
if progress_callback:
|
||||
progress_callback(pages_processed, total_pages, "Inserting blank page for alignment...")
|
||||
progress_callback(completed, n, f"Rendering pages: {completed}/{n}...")
|
||||
|
||||
self._export_spread(c, page, page_width_pt, page_height_pt, bleed_pt)
|
||||
pages_processed += 2
|
||||
else:
|
||||
self._export_single_page(c, page, page_width_pt, page_height_pt, bleed_pt)
|
||||
pages_processed += 1
|
||||
# Phase 4: merge all per-page PDFs into the output file
|
||||
if progress_callback:
|
||||
progress_callback(n, total_pages, "Merging pages...")
|
||||
self._merge_page_pdfs(pdf_bytes_list, output_path)
|
||||
|
||||
c.save()
|
||||
|
||||
# Clean up processed images cache
|
||||
self._processed_images = {}
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(total_pages, total_pages, "Export complete!")
|
||||
|
||||
return True, self.warnings
|
||||
|
||||
except Exception as e:
|
||||
self.warnings.append(f"Export failed: {str(e)}")
|
||||
return False, self.warnings
|
||||
|
||||
def _compute_page_sequence(self) -> List[Tuple[str, Any]]:
|
||||
"""
|
||||
Build an ordered list of (page_type, page) items to render.
|
||||
|
||||
Inserts ('blank', None) entries before double-page spreads that would
|
||||
otherwise start on an odd-numbered PDF page (spreads must start on even pages).
|
||||
"""
|
||||
sequence: List[Tuple[str, Any]] = []
|
||||
pdf_page_num = 1
|
||||
for page in self.project.pages:
|
||||
if page.is_cover:
|
||||
sequence.append(("cover", page))
|
||||
pdf_page_num += 1
|
||||
elif page.is_double_spread:
|
||||
if pdf_page_num % 2 == 1:
|
||||
sequence.append(("blank", None))
|
||||
pdf_page_num += 1
|
||||
sequence.append(("spread", page))
|
||||
pdf_page_num += 2
|
||||
else:
|
||||
sequence.append(("single", page))
|
||||
pdf_page_num += 1
|
||||
return sequence
|
||||
|
||||
def _render_item_to_bytes(
|
||||
self,
|
||||
item: Tuple[str, Any],
|
||||
page_width_pt: float,
|
||||
page_height_pt: float,
|
||||
bleed_pt: float,
|
||||
) -> bytes:
|
||||
"""
|
||||
Render a single page item to a self-contained PDF (as bytes).
|
||||
|
||||
Each call creates its own Canvas / BytesIO so pages can be rendered
|
||||
concurrently without sharing state.
|
||||
"""
|
||||
page_type, page = item
|
||||
expanded_width_pt = page_width_pt + 2 * bleed_pt
|
||||
expanded_height_pt = page_height_pt + 2 * bleed_pt
|
||||
buf = io.BytesIO()
|
||||
|
||||
if page_type == "blank":
|
||||
c = canvas.Canvas(buf, pagesize=(expanded_width_pt, expanded_height_pt))
|
||||
c.showPage()
|
||||
c.save()
|
||||
|
||||
elif page_type == "cover":
|
||||
cover_width_mm, cover_height_mm = page.layout.size
|
||||
cover_width_pt = cover_width_mm * self.MM_TO_POINTS
|
||||
cover_height_pt = cover_height_mm * self.MM_TO_POINTS
|
||||
c = canvas.Canvas(buf, pagesize=(cover_width_pt, cover_height_pt))
|
||||
for element in sorted(page.layout.elements, key=lambda x: x.z_index):
|
||||
self._render_element(c, element, 0, cover_width_pt, cover_height_pt, "Cover")
|
||||
c.showPage()
|
||||
c.save()
|
||||
|
||||
elif page_type == "single":
|
||||
c = canvas.Canvas(buf, pagesize=(expanded_width_pt, expanded_height_pt))
|
||||
for element in sorted(page.layout.elements, key=lambda x: x.z_index):
|
||||
self._render_element(c, element, 0, page_width_pt, page_height_pt, page.page_number, bleed_pt)
|
||||
c.showPage()
|
||||
c.save()
|
||||
|
||||
elif page_type == "spread":
|
||||
c = canvas.Canvas(buf, pagesize=(expanded_width_pt, expanded_height_pt))
|
||||
self._export_spread(c, page, page_width_pt, page_height_pt, bleed_pt)
|
||||
c.save()
|
||||
|
||||
return buf.getvalue()
|
||||
|
||||
def _make_blank_page_bytes(
|
||||
self, page_width_pt: float, page_height_pt: float, bleed_pt: float
|
||||
) -> bytes:
|
||||
"""Return a minimal single-blank-page PDF for use as an error placeholder."""
|
||||
buf = io.BytesIO()
|
||||
c = canvas.Canvas(buf, pagesize=(page_width_pt + 2 * bleed_pt, page_height_pt + 2 * bleed_pt))
|
||||
c.showPage()
|
||||
c.save()
|
||||
return buf.getvalue()
|
||||
|
||||
def _merge_page_pdfs(self, pdf_bytes_list: List[Optional[bytes]], output_path: str):
|
||||
"""Merge a list of single-page PDF byte strings into one output file."""
|
||||
from pypdf import PdfWriter, PdfReader
|
||||
|
||||
writer = PdfWriter()
|
||||
for pdf_bytes in pdf_bytes_list:
|
||||
if pdf_bytes is None:
|
||||
continue
|
||||
reader = PdfReader(io.BytesIO(pdf_bytes))
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
with open(output_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
def _make_task_id(
|
||||
self,
|
||||
element: ImageData,
|
||||
@ -530,10 +617,6 @@ class PDFExporter:
|
||||
|
||||
|
||||
c.showPage() # Finish cover page
|
||||
self.current_pdf_page += 1
|
||||
|
||||
# Reset page size for content pages
|
||||
c.setPageSize((page_width_pt, page_height_pt))
|
||||
|
||||
def _export_single_page(
|
||||
self, c: canvas.Canvas, page, page_width_pt: float, page_height_pt: float, bleed_pt: float = 0.0
|
||||
@ -548,7 +631,6 @@ class PDFExporter:
|
||||
self._render_element(c, element, 0, page_width_pt, page_height_pt, page.page_number, bleed_pt)
|
||||
|
||||
c.showPage() # Finish this page
|
||||
self.current_pdf_page += 1
|
||||
|
||||
def _export_spread(
|
||||
self, c: canvas.Canvas, page, page_width_pt: float, page_height_pt: float, bleed_pt: float = 0.0
|
||||
@ -596,7 +678,6 @@ class PDFExporter:
|
||||
self._render_split_element(params, bleed_pt)
|
||||
|
||||
c.showPage() # Finish left page
|
||||
self.current_pdf_page += 1
|
||||
|
||||
# Process elements for right page
|
||||
c.setPageSize((expanded_width_pt, expanded_height_pt))
|
||||
@ -625,7 +706,6 @@ class PDFExporter:
|
||||
self._render_split_element(params, bleed_pt)
|
||||
|
||||
c.showPage() # Finish right page
|
||||
self.current_pdf_page += 1
|
||||
|
||||
def _render_element(
|
||||
self,
|
||||
|
||||
@ -31,6 +31,7 @@ dependencies = [
|
||||
"Pillow>=8.0.0",
|
||||
"reportlab>=3.5.0",
|
||||
"lxml>=4.6.0",
|
||||
"pypdf>=4.0.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user