diff --git a/.gitea/workflows/tests.yml b/.gitea/workflows/tests.yml index 90231e2..deea9a1 100644 --- a/.gitea/workflows/tests.yml +++ b/.gitea/workflows/tests.yml @@ -41,7 +41,7 @@ jobs: id: pytest continue-on-error: true run: | - xvfb-run -a pytest --cov=pyPhotoAlbum --cov-report=xml --cov-report=json --cov-report=html --cov-report=term-missing + pytest --cov=pyPhotoAlbum --cov-report=xml --cov-report=json --cov-report=html --cov-report=term-missing env: QT_QPA_PLATFORM: offscreen diff --git a/pyPhotoAlbum/pdf_exporter.py b/pyPhotoAlbum/pdf_exporter.py index cf247d7..d033223 100644 --- a/pyPhotoAlbum/pdf_exporter.py +++ b/pyPhotoAlbum/pdf_exporter.py @@ -5,9 +5,10 @@ Uses multiprocessing to pre-process images in parallel for faster exports. """ import os +import threading from typing import Any, List, Tuple, Optional, Union, Dict from dataclasses import dataclass, field -from concurrent.futures import ProcessPoolExecutor, as_completed +from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed import multiprocessing import io from reportlab.lib.pagesizes import A4 @@ -192,15 +193,15 @@ class PDFExporter: self.warnings: List[str] = [] self.current_pdf_page = 1 self.max_workers = max_workers or multiprocessing.cpu_count() - # Cache for pre-processed images: task_id -> PIL Image self._processed_images: Dict[str, Image.Image] = {} def export(self, output_path: str, progress_callback=None) -> Tuple[bool, List[str]]: """ Export the project to PDF. - Uses multiprocessing to pre-process all images in parallel before - assembling the PDF sequentially. + Uses multiprocessing to pre-process all images in parallel, then renders + each page to its own PDF buffer in parallel (via threads), and finally + merges the per-page PDFs into the output file. Args: output_path: Path where PDF should be saved @@ -210,7 +211,6 @@ class PDFExporter: Tuple of (success: bool, warnings: List[str]) """ self.warnings = [] - self.current_pdf_page = 1 self._processed_images = {} try: @@ -221,70 +221,157 @@ class PDFExporter: # Get page dimensions from project (in mm) page_width_mm, page_height_mm = self.project.page_size_mm - - # Bleed expands each page on all sides bleed_mm = self.project.page_bleed_mm bleed_pt = bleed_mm * self.MM_TO_POINTS - - # Convert to PDF points (base page = cut/trim size) page_width_pt = page_width_mm * self.MM_TO_POINTS page_height_pt = page_height_mm * self.MM_TO_POINTS - # Expanded page size includes bleed on all sides - expanded_width_pt = page_width_pt + 2 * bleed_pt - expanded_height_pt = page_height_pt + 2 * bleed_pt - - # Phase 1: Collect all image tasks and process in parallel + # Phase 1: parallel image pre-processing (unchanged) if progress_callback: progress_callback(0, total_pages, "Collecting images for processing...") - image_tasks = self._collect_image_tasks(page_width_pt, page_height_pt) - if image_tasks: if progress_callback: progress_callback(0, total_pages, f"Processing {len(image_tasks)} images in parallel...") self._preprocess_images_parallel(image_tasks, progress_callback, total_pages) - # Phase 2: Build PDF using pre-processed images - c = canvas.Canvas(output_path, pagesize=(expanded_width_pt, expanded_height_pt)) + # Phase 2: determine ordered page sequence (inserts blank pages for spread alignment) + page_sequence = self._compute_page_sequence() - pages_processed = 0 - for page in self.project.pages: - page_name = self.project.get_page_display_name(page) + # Phase 3: render each page to its own PDF bytes in parallel + n = len(page_sequence) + if progress_callback: + progress_callback(0, total_pages, f"Rendering {n} pages in parallel...") + pdf_bytes_list: List[Optional[bytes]] = [None] * n - if progress_callback: - progress_callback(pages_processed, total_pages, f"Assembling {page_name}...") + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + future_to_idx = { + executor.submit( + self._render_item_to_bytes, item, page_width_pt, page_height_pt, bleed_pt + ): i + for i, item in enumerate(page_sequence) + } + completed = 0 + for future in as_completed(future_to_idx): + i = future_to_idx[future] + try: + pdf_bytes_list[i] = future.result() + except Exception as e: + self.warnings.append(f"Error rendering page: {str(e)}") + pdf_bytes_list[i] = self._make_blank_page_bytes(page_width_pt, page_height_pt, bleed_pt) + completed += 1 + if progress_callback: + progress_callback(completed, n, f"Rendering pages: {completed}/{n}...") - if page.is_cover: - self._export_cover(c, page, page_width_pt, page_height_pt) - pages_processed += 1 - elif page.is_double_spread: - if self.current_pdf_page % 2 == 1: - c.showPage() - self.current_pdf_page += 1 - if progress_callback: - progress_callback(pages_processed, total_pages, "Inserting blank page for alignment...") + # Phase 4: merge all per-page PDFs into the output file + if progress_callback: + progress_callback(n, total_pages, "Merging pages...") + self._merge_page_pdfs(pdf_bytes_list, output_path) - self._export_spread(c, page, page_width_pt, page_height_pt, bleed_pt) - pages_processed += 2 - else: - self._export_single_page(c, page, page_width_pt, page_height_pt, bleed_pt) - pages_processed += 1 - - c.save() - - # Clean up processed images cache self._processed_images = {} - if progress_callback: progress_callback(total_pages, total_pages, "Export complete!") - return True, self.warnings except Exception as e: self.warnings.append(f"Export failed: {str(e)}") return False, self.warnings + def _compute_page_sequence(self) -> List[Tuple[str, Any]]: + """ + Build an ordered list of (page_type, page) items to render. + + Inserts ('blank', None) entries before double-page spreads that would + otherwise start on an odd-numbered PDF page (spreads must start on even pages). + """ + sequence: List[Tuple[str, Any]] = [] + pdf_page_num = 1 + for page in self.project.pages: + if page.is_cover: + sequence.append(("cover", page)) + pdf_page_num += 1 + elif page.is_double_spread: + if pdf_page_num % 2 == 1: + sequence.append(("blank", None)) + pdf_page_num += 1 + sequence.append(("spread", page)) + pdf_page_num += 2 + else: + sequence.append(("single", page)) + pdf_page_num += 1 + return sequence + + def _render_item_to_bytes( + self, + item: Tuple[str, Any], + page_width_pt: float, + page_height_pt: float, + bleed_pt: float, + ) -> bytes: + """ + Render a single page item to a self-contained PDF (as bytes). + + Each call creates its own Canvas / BytesIO so pages can be rendered + concurrently without sharing state. + """ + page_type, page = item + expanded_width_pt = page_width_pt + 2 * bleed_pt + expanded_height_pt = page_height_pt + 2 * bleed_pt + buf = io.BytesIO() + + if page_type == "blank": + c = canvas.Canvas(buf, pagesize=(expanded_width_pt, expanded_height_pt)) + c.showPage() + c.save() + + elif page_type == "cover": + cover_width_mm, cover_height_mm = page.layout.size + cover_width_pt = cover_width_mm * self.MM_TO_POINTS + cover_height_pt = cover_height_mm * self.MM_TO_POINTS + c = canvas.Canvas(buf, pagesize=(cover_width_pt, cover_height_pt)) + for element in sorted(page.layout.elements, key=lambda x: x.z_index): + self._render_element(c, element, 0, cover_width_pt, cover_height_pt, "Cover") + c.showPage() + c.save() + + elif page_type == "single": + c = canvas.Canvas(buf, pagesize=(expanded_width_pt, expanded_height_pt)) + for element in sorted(page.layout.elements, key=lambda x: x.z_index): + self._render_element(c, element, 0, page_width_pt, page_height_pt, page.page_number, bleed_pt) + c.showPage() + c.save() + + elif page_type == "spread": + c = canvas.Canvas(buf, pagesize=(expanded_width_pt, expanded_height_pt)) + self._export_spread(c, page, page_width_pt, page_height_pt, bleed_pt) + c.save() + + return buf.getvalue() + + def _make_blank_page_bytes( + self, page_width_pt: float, page_height_pt: float, bleed_pt: float + ) -> bytes: + """Return a minimal single-blank-page PDF for use as an error placeholder.""" + buf = io.BytesIO() + c = canvas.Canvas(buf, pagesize=(page_width_pt + 2 * bleed_pt, page_height_pt + 2 * bleed_pt)) + c.showPage() + c.save() + return buf.getvalue() + + def _merge_page_pdfs(self, pdf_bytes_list: List[Optional[bytes]], output_path: str): + """Merge a list of single-page PDF byte strings into one output file.""" + from pypdf import PdfWriter, PdfReader + + writer = PdfWriter() + for pdf_bytes in pdf_bytes_list: + if pdf_bytes is None: + continue + reader = PdfReader(io.BytesIO(pdf_bytes)) + for page in reader.pages: + writer.add_page(page) + with open(output_path, "wb") as f: + writer.write(f) + def _make_task_id( self, element: ImageData, @@ -530,10 +617,6 @@ class PDFExporter: c.showPage() # Finish cover page - self.current_pdf_page += 1 - - # Reset page size for content pages - c.setPageSize((page_width_pt, page_height_pt)) def _export_single_page( self, c: canvas.Canvas, page, page_width_pt: float, page_height_pt: float, bleed_pt: float = 0.0 @@ -548,7 +631,6 @@ class PDFExporter: self._render_element(c, element, 0, page_width_pt, page_height_pt, page.page_number, bleed_pt) c.showPage() # Finish this page - self.current_pdf_page += 1 def _export_spread( self, c: canvas.Canvas, page, page_width_pt: float, page_height_pt: float, bleed_pt: float = 0.0 @@ -596,7 +678,6 @@ class PDFExporter: self._render_split_element(params, bleed_pt) c.showPage() # Finish left page - self.current_pdf_page += 1 # Process elements for right page c.setPageSize((expanded_width_pt, expanded_height_pt)) @@ -625,7 +706,6 @@ class PDFExporter: self._render_split_element(params, bleed_pt) c.showPage() # Finish right page - self.current_pdf_page += 1 def _render_element( self, diff --git a/pyproject.toml b/pyproject.toml index 6750b2e..9d63d31 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "Pillow>=8.0.0", "reportlab>=3.5.0", "lxml>=4.6.0", + "pypdf>=4.0.0", ] [project.optional-dependencies]