Faster pdf export and test support
Some checks failed
Lint / lint (push) Successful in 21s
Tests / test (3.11) (push) Successful in 19s
Tests / test (3.12) (push) Successful in 16s
Tests / test (3.13) (push) Successful in 16s
Tests / test (3.14) (push) Successful in 17s
Python CI / test (push) Failing after 1m44s
Some checks failed
Lint / lint (push) Successful in 21s
Tests / test (3.11) (push) Successful in 19s
Tests / test (3.12) (push) Successful in 16s
Tests / test (3.13) (push) Successful in 16s
Tests / test (3.14) (push) Successful in 17s
Python CI / test (push) Failing after 1m44s
This commit is contained in:
parent
f96200c799
commit
e112437372
@ -41,7 +41,7 @@ jobs:
|
|||||||
id: pytest
|
id: pytest
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: |
|
run: |
|
||||||
xvfb-run -a pytest --cov=pyPhotoAlbum --cov-report=xml --cov-report=json --cov-report=html --cov-report=term-missing
|
pytest --cov=pyPhotoAlbum --cov-report=xml --cov-report=json --cov-report=html --cov-report=term-missing
|
||||||
env:
|
env:
|
||||||
QT_QPA_PLATFORM: offscreen
|
QT_QPA_PLATFORM: offscreen
|
||||||
|
|
||||||
|
|||||||
@ -5,9 +5,10 @@ Uses multiprocessing to pre-process images in parallel for faster exports.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import threading
|
||||||
from typing import Any, List, Tuple, Optional, Union, Dict
|
from typing import Any, List, Tuple, Optional, Union, Dict
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import io
|
import io
|
||||||
from reportlab.lib.pagesizes import A4
|
from reportlab.lib.pagesizes import A4
|
||||||
@ -192,15 +193,15 @@ class PDFExporter:
|
|||||||
self.warnings: List[str] = []
|
self.warnings: List[str] = []
|
||||||
self.current_pdf_page = 1
|
self.current_pdf_page = 1
|
||||||
self.max_workers = max_workers or multiprocessing.cpu_count()
|
self.max_workers = max_workers or multiprocessing.cpu_count()
|
||||||
# Cache for pre-processed images: task_id -> PIL Image
|
|
||||||
self._processed_images: Dict[str, Image.Image] = {}
|
self._processed_images: Dict[str, Image.Image] = {}
|
||||||
|
|
||||||
def export(self, output_path: str, progress_callback=None) -> Tuple[bool, List[str]]:
|
def export(self, output_path: str, progress_callback=None) -> Tuple[bool, List[str]]:
|
||||||
"""
|
"""
|
||||||
Export the project to PDF.
|
Export the project to PDF.
|
||||||
|
|
||||||
Uses multiprocessing to pre-process all images in parallel before
|
Uses multiprocessing to pre-process all images in parallel, then renders
|
||||||
assembling the PDF sequentially.
|
each page to its own PDF buffer in parallel (via threads), and finally
|
||||||
|
merges the per-page PDFs into the output file.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
output_path: Path where PDF should be saved
|
output_path: Path where PDF should be saved
|
||||||
@ -210,7 +211,6 @@ class PDFExporter:
|
|||||||
Tuple of (success: bool, warnings: List[str])
|
Tuple of (success: bool, warnings: List[str])
|
||||||
"""
|
"""
|
||||||
self.warnings = []
|
self.warnings = []
|
||||||
self.current_pdf_page = 1
|
|
||||||
self._processed_images = {}
|
self._processed_images = {}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -221,70 +221,157 @@ class PDFExporter:
|
|||||||
|
|
||||||
# Get page dimensions from project (in mm)
|
# Get page dimensions from project (in mm)
|
||||||
page_width_mm, page_height_mm = self.project.page_size_mm
|
page_width_mm, page_height_mm = self.project.page_size_mm
|
||||||
|
|
||||||
# Bleed expands each page on all sides
|
|
||||||
bleed_mm = self.project.page_bleed_mm
|
bleed_mm = self.project.page_bleed_mm
|
||||||
bleed_pt = bleed_mm * self.MM_TO_POINTS
|
bleed_pt = bleed_mm * self.MM_TO_POINTS
|
||||||
|
|
||||||
# Convert to PDF points (base page = cut/trim size)
|
|
||||||
page_width_pt = page_width_mm * self.MM_TO_POINTS
|
page_width_pt = page_width_mm * self.MM_TO_POINTS
|
||||||
page_height_pt = page_height_mm * self.MM_TO_POINTS
|
page_height_pt = page_height_mm * self.MM_TO_POINTS
|
||||||
|
|
||||||
# Expanded page size includes bleed on all sides
|
# Phase 1: parallel image pre-processing (unchanged)
|
||||||
expanded_width_pt = page_width_pt + 2 * bleed_pt
|
|
||||||
expanded_height_pt = page_height_pt + 2 * bleed_pt
|
|
||||||
|
|
||||||
# Phase 1: Collect all image tasks and process in parallel
|
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
progress_callback(0, total_pages, "Collecting images for processing...")
|
progress_callback(0, total_pages, "Collecting images for processing...")
|
||||||
|
|
||||||
image_tasks = self._collect_image_tasks(page_width_pt, page_height_pt)
|
image_tasks = self._collect_image_tasks(page_width_pt, page_height_pt)
|
||||||
|
|
||||||
if image_tasks:
|
if image_tasks:
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
progress_callback(0, total_pages, f"Processing {len(image_tasks)} images in parallel...")
|
progress_callback(0, total_pages, f"Processing {len(image_tasks)} images in parallel...")
|
||||||
self._preprocess_images_parallel(image_tasks, progress_callback, total_pages)
|
self._preprocess_images_parallel(image_tasks, progress_callback, total_pages)
|
||||||
|
|
||||||
# Phase 2: Build PDF using pre-processed images
|
# Phase 2: determine ordered page sequence (inserts blank pages for spread alignment)
|
||||||
c = canvas.Canvas(output_path, pagesize=(expanded_width_pt, expanded_height_pt))
|
page_sequence = self._compute_page_sequence()
|
||||||
|
|
||||||
pages_processed = 0
|
# Phase 3: render each page to its own PDF bytes in parallel
|
||||||
for page in self.project.pages:
|
n = len(page_sequence)
|
||||||
page_name = self.project.get_page_display_name(page)
|
if progress_callback:
|
||||||
|
progress_callback(0, total_pages, f"Rendering {n} pages in parallel...")
|
||||||
|
pdf_bytes_list: List[Optional[bytes]] = [None] * n
|
||||||
|
|
||||||
if progress_callback:
|
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
||||||
progress_callback(pages_processed, total_pages, f"Assembling {page_name}...")
|
future_to_idx = {
|
||||||
|
executor.submit(
|
||||||
|
self._render_item_to_bytes, item, page_width_pt, page_height_pt, bleed_pt
|
||||||
|
): i
|
||||||
|
for i, item in enumerate(page_sequence)
|
||||||
|
}
|
||||||
|
completed = 0
|
||||||
|
for future in as_completed(future_to_idx):
|
||||||
|
i = future_to_idx[future]
|
||||||
|
try:
|
||||||
|
pdf_bytes_list[i] = future.result()
|
||||||
|
except Exception as e:
|
||||||
|
self.warnings.append(f"Error rendering page: {str(e)}")
|
||||||
|
pdf_bytes_list[i] = self._make_blank_page_bytes(page_width_pt, page_height_pt, bleed_pt)
|
||||||
|
completed += 1
|
||||||
|
if progress_callback:
|
||||||
|
progress_callback(completed, n, f"Rendering pages: {completed}/{n}...")
|
||||||
|
|
||||||
if page.is_cover:
|
# Phase 4: merge all per-page PDFs into the output file
|
||||||
self._export_cover(c, page, page_width_pt, page_height_pt)
|
if progress_callback:
|
||||||
pages_processed += 1
|
progress_callback(n, total_pages, "Merging pages...")
|
||||||
elif page.is_double_spread:
|
self._merge_page_pdfs(pdf_bytes_list, output_path)
|
||||||
if self.current_pdf_page % 2 == 1:
|
|
||||||
c.showPage()
|
|
||||||
self.current_pdf_page += 1
|
|
||||||
if progress_callback:
|
|
||||||
progress_callback(pages_processed, total_pages, "Inserting blank page for alignment...")
|
|
||||||
|
|
||||||
self._export_spread(c, page, page_width_pt, page_height_pt, bleed_pt)
|
|
||||||
pages_processed += 2
|
|
||||||
else:
|
|
||||||
self._export_single_page(c, page, page_width_pt, page_height_pt, bleed_pt)
|
|
||||||
pages_processed += 1
|
|
||||||
|
|
||||||
c.save()
|
|
||||||
|
|
||||||
# Clean up processed images cache
|
|
||||||
self._processed_images = {}
|
self._processed_images = {}
|
||||||
|
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
progress_callback(total_pages, total_pages, "Export complete!")
|
progress_callback(total_pages, total_pages, "Export complete!")
|
||||||
|
|
||||||
return True, self.warnings
|
return True, self.warnings
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.warnings.append(f"Export failed: {str(e)}")
|
self.warnings.append(f"Export failed: {str(e)}")
|
||||||
return False, self.warnings
|
return False, self.warnings
|
||||||
|
|
||||||
|
def _compute_page_sequence(self) -> List[Tuple[str, Any]]:
|
||||||
|
"""
|
||||||
|
Build an ordered list of (page_type, page) items to render.
|
||||||
|
|
||||||
|
Inserts ('blank', None) entries before double-page spreads that would
|
||||||
|
otherwise start on an odd-numbered PDF page (spreads must start on even pages).
|
||||||
|
"""
|
||||||
|
sequence: List[Tuple[str, Any]] = []
|
||||||
|
pdf_page_num = 1
|
||||||
|
for page in self.project.pages:
|
||||||
|
if page.is_cover:
|
||||||
|
sequence.append(("cover", page))
|
||||||
|
pdf_page_num += 1
|
||||||
|
elif page.is_double_spread:
|
||||||
|
if pdf_page_num % 2 == 1:
|
||||||
|
sequence.append(("blank", None))
|
||||||
|
pdf_page_num += 1
|
||||||
|
sequence.append(("spread", page))
|
||||||
|
pdf_page_num += 2
|
||||||
|
else:
|
||||||
|
sequence.append(("single", page))
|
||||||
|
pdf_page_num += 1
|
||||||
|
return sequence
|
||||||
|
|
||||||
|
def _render_item_to_bytes(
|
||||||
|
self,
|
||||||
|
item: Tuple[str, Any],
|
||||||
|
page_width_pt: float,
|
||||||
|
page_height_pt: float,
|
||||||
|
bleed_pt: float,
|
||||||
|
) -> bytes:
|
||||||
|
"""
|
||||||
|
Render a single page item to a self-contained PDF (as bytes).
|
||||||
|
|
||||||
|
Each call creates its own Canvas / BytesIO so pages can be rendered
|
||||||
|
concurrently without sharing state.
|
||||||
|
"""
|
||||||
|
page_type, page = item
|
||||||
|
expanded_width_pt = page_width_pt + 2 * bleed_pt
|
||||||
|
expanded_height_pt = page_height_pt + 2 * bleed_pt
|
||||||
|
buf = io.BytesIO()
|
||||||
|
|
||||||
|
if page_type == "blank":
|
||||||
|
c = canvas.Canvas(buf, pagesize=(expanded_width_pt, expanded_height_pt))
|
||||||
|
c.showPage()
|
||||||
|
c.save()
|
||||||
|
|
||||||
|
elif page_type == "cover":
|
||||||
|
cover_width_mm, cover_height_mm = page.layout.size
|
||||||
|
cover_width_pt = cover_width_mm * self.MM_TO_POINTS
|
||||||
|
cover_height_pt = cover_height_mm * self.MM_TO_POINTS
|
||||||
|
c = canvas.Canvas(buf, pagesize=(cover_width_pt, cover_height_pt))
|
||||||
|
for element in sorted(page.layout.elements, key=lambda x: x.z_index):
|
||||||
|
self._render_element(c, element, 0, cover_width_pt, cover_height_pt, "Cover")
|
||||||
|
c.showPage()
|
||||||
|
c.save()
|
||||||
|
|
||||||
|
elif page_type == "single":
|
||||||
|
c = canvas.Canvas(buf, pagesize=(expanded_width_pt, expanded_height_pt))
|
||||||
|
for element in sorted(page.layout.elements, key=lambda x: x.z_index):
|
||||||
|
self._render_element(c, element, 0, page_width_pt, page_height_pt, page.page_number, bleed_pt)
|
||||||
|
c.showPage()
|
||||||
|
c.save()
|
||||||
|
|
||||||
|
elif page_type == "spread":
|
||||||
|
c = canvas.Canvas(buf, pagesize=(expanded_width_pt, expanded_height_pt))
|
||||||
|
self._export_spread(c, page, page_width_pt, page_height_pt, bleed_pt)
|
||||||
|
c.save()
|
||||||
|
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
|
def _make_blank_page_bytes(
|
||||||
|
self, page_width_pt: float, page_height_pt: float, bleed_pt: float
|
||||||
|
) -> bytes:
|
||||||
|
"""Return a minimal single-blank-page PDF for use as an error placeholder."""
|
||||||
|
buf = io.BytesIO()
|
||||||
|
c = canvas.Canvas(buf, pagesize=(page_width_pt + 2 * bleed_pt, page_height_pt + 2 * bleed_pt))
|
||||||
|
c.showPage()
|
||||||
|
c.save()
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
|
def _merge_page_pdfs(self, pdf_bytes_list: List[Optional[bytes]], output_path: str):
|
||||||
|
"""Merge a list of single-page PDF byte strings into one output file."""
|
||||||
|
from pypdf import PdfWriter, PdfReader
|
||||||
|
|
||||||
|
writer = PdfWriter()
|
||||||
|
for pdf_bytes in pdf_bytes_list:
|
||||||
|
if pdf_bytes is None:
|
||||||
|
continue
|
||||||
|
reader = PdfReader(io.BytesIO(pdf_bytes))
|
||||||
|
for page in reader.pages:
|
||||||
|
writer.add_page(page)
|
||||||
|
with open(output_path, "wb") as f:
|
||||||
|
writer.write(f)
|
||||||
|
|
||||||
def _make_task_id(
|
def _make_task_id(
|
||||||
self,
|
self,
|
||||||
element: ImageData,
|
element: ImageData,
|
||||||
@ -530,10 +617,6 @@ class PDFExporter:
|
|||||||
|
|
||||||
|
|
||||||
c.showPage() # Finish cover page
|
c.showPage() # Finish cover page
|
||||||
self.current_pdf_page += 1
|
|
||||||
|
|
||||||
# Reset page size for content pages
|
|
||||||
c.setPageSize((page_width_pt, page_height_pt))
|
|
||||||
|
|
||||||
def _export_single_page(
|
def _export_single_page(
|
||||||
self, c: canvas.Canvas, page, page_width_pt: float, page_height_pt: float, bleed_pt: float = 0.0
|
self, c: canvas.Canvas, page, page_width_pt: float, page_height_pt: float, bleed_pt: float = 0.0
|
||||||
@ -548,7 +631,6 @@ class PDFExporter:
|
|||||||
self._render_element(c, element, 0, page_width_pt, page_height_pt, page.page_number, bleed_pt)
|
self._render_element(c, element, 0, page_width_pt, page_height_pt, page.page_number, bleed_pt)
|
||||||
|
|
||||||
c.showPage() # Finish this page
|
c.showPage() # Finish this page
|
||||||
self.current_pdf_page += 1
|
|
||||||
|
|
||||||
def _export_spread(
|
def _export_spread(
|
||||||
self, c: canvas.Canvas, page, page_width_pt: float, page_height_pt: float, bleed_pt: float = 0.0
|
self, c: canvas.Canvas, page, page_width_pt: float, page_height_pt: float, bleed_pt: float = 0.0
|
||||||
@ -596,7 +678,6 @@ class PDFExporter:
|
|||||||
self._render_split_element(params, bleed_pt)
|
self._render_split_element(params, bleed_pt)
|
||||||
|
|
||||||
c.showPage() # Finish left page
|
c.showPage() # Finish left page
|
||||||
self.current_pdf_page += 1
|
|
||||||
|
|
||||||
# Process elements for right page
|
# Process elements for right page
|
||||||
c.setPageSize((expanded_width_pt, expanded_height_pt))
|
c.setPageSize((expanded_width_pt, expanded_height_pt))
|
||||||
@ -625,7 +706,6 @@ class PDFExporter:
|
|||||||
self._render_split_element(params, bleed_pt)
|
self._render_split_element(params, bleed_pt)
|
||||||
|
|
||||||
c.showPage() # Finish right page
|
c.showPage() # Finish right page
|
||||||
self.current_pdf_page += 1
|
|
||||||
|
|
||||||
def _render_element(
|
def _render_element(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@ -31,6 +31,7 @@ dependencies = [
|
|||||||
"Pillow>=8.0.0",
|
"Pillow>=8.0.0",
|
||||||
"reportlab>=3.5.0",
|
"reportlab>=3.5.0",
|
||||||
"lxml>=4.6.0",
|
"lxml>=4.6.0",
|
||||||
|
"pypdf>=4.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user