add BW image conversion

2025-11-04 22:09:26 +01:00 · 2025-11-04 22:09:26 +01:00 · 9ba35d2fa8
commit 9ba35d2fa8
parent 25d36566d0
1 changed files with 70 additions and 2 deletions
--- a/pyWebLayout/io/readers/epub_reader.py
+++ b/pyWebLayout/io/readers/epub_reader.py
@ -8,10 +8,11 @@ to pyWebLayout's abstract document model.
 import os
 import zipfile
 import tempfile
-from typing import Dict, List, Optional, Any, Tuple
+from typing import Dict, List, Optional, Any, Tuple, Callable
 import xml.etree.ElementTree as ET
 import re
 import urllib.parse
 from PIL import Image as PILImage, ImageOps
 from pyWebLayout.abstract.document import Document, Book, Chapter, MetadataType
 from pyWebLayout.io.readers.html_extraction import parse_html_string
@ -27,6 +28,30 @@ NAMESPACES = {
 }
 def default_eink_processor(img: PILImage.Image) -> PILImage.Image:
    """
    Process image for 4-bit e-ink display using PIL only.
    Applies histogram equalization and 4-bit quantization.
    Args:
        img: PIL Image to process
    Returns:
        Processed PIL Image in L mode (grayscale) with 4-bit quantization
    """
    # Convert to grayscale if needed
    if img.mode != 'L':
        img = img.convert('L')
    # Apply histogram equalization for contrast enhancement
    img = ImageOps.equalize(img)
    # Quantize to 4-bit (16 grayscale levels: 0, 17, 34, ..., 255)
    img = img.point(lambda x: (x // 16) * 17)
    return img
 class EPUBReader:
    """
    Reader for EPUB documents.
@ -35,14 +60,19 @@ class EPUBReader:
    pyWebLayout's abstract document model.
    """
-    def __init__(self, epub_path: str):
+    def __init__(self, epub_path: str, image_processor: Optional[Callable[[PILImage.Image], PILImage.Image]] = default_eink_processor):
        """
        Initialize an EPUB reader.
        Args:
            epub_path: Path to the EPUB file
            image_processor: Optional function to process images for display optimization.
                           Defaults to default_eink_processor for 4-bit e-ink displays.
                           Set to None to disable image processing.
                           Custom processor should accept and return a PIL Image.
        """
        self.epub_path = epub_path
        self.image_processor = image_processor
        self.book = Book()
        self.temp_dir = None
        self.content_dir = None
@ -70,6 +100,9 @@ class EPUBReader:
            # Add chapters to the book
            self._add_chapters()
            # Process images for e-ink display optimization
            self._process_content_images()
            return self.book
        finally:
@ -365,6 +398,14 @@ class EPUBReader:
            # Create a copy to ensure all data is in memory
            pil_image = pil_image.copy()
            # Apply image processing if enabled
            if self.image_processor:
                try:
                    pil_image = self.image_processor(pil_image)
                except Exception as e:
                    print(f"Warning: Image processing failed for cover: {str(e)}")
                    # Continue with unprocessed image
            # Create an AbstractImage block with the cover image path
            cover_image = AbstractImage(source=cover_path, alt_text="Cover Image")
@ -386,6 +427,33 @@ class EPUBReader:
            if hasattr(self.book, 'chapters') and cover_chapter in self.book.chapters:
                self.book.chapters.remove(cover_chapter)
    def _process_chapter_images(self, chapter: Chapter):
        """
        Process images in a single chapter.
        Args:
            chapter: The chapter containing images to process
        """
        from pyWebLayout.abstract.block import Image as AbstractImage
        for block in chapter.blocks:
            if isinstance(block, AbstractImage):
                # Only process if image has been loaded and processor is enabled
                if hasattr(block, '_loaded_image') and block._loaded_image:
                    try:
                        block._loaded_image = self.image_processor(block._loaded_image)
                    except Exception as e:
                        print(f"Warning: Image processing failed for image '{block.alt_text}': {str(e)}")
                        # Continue with unprocessed image
    def _process_content_images(self):
        """Apply image processing to all images in chapters."""
        if not self.image_processor:
            return
        for chapter in self.book.chapters:
            self._process_chapter_images(chapter)
    def _add_chapters(self):
        """Add chapters to the book based on the spine and TOC."""
        # Add cover chapter first if available