diff --git a/pyWebLayout/io/readers/epub_reader.py b/pyWebLayout/io/readers/epub_reader.py index 36d06cb..0f85977 100644 --- a/pyWebLayout/io/readers/epub_reader.py +++ b/pyWebLayout/io/readers/epub_reader.py @@ -8,10 +8,11 @@ to pyWebLayout's abstract document model. import os import zipfile import tempfile -from typing import Dict, List, Optional, Any, Tuple +from typing import Dict, List, Optional, Any, Tuple, Callable import xml.etree.ElementTree as ET import re import urllib.parse +from PIL import Image as PILImage, ImageOps from pyWebLayout.abstract.document import Document, Book, Chapter, MetadataType from pyWebLayout.io.readers.html_extraction import parse_html_string @@ -27,6 +28,30 @@ NAMESPACES = { } +def default_eink_processor(img: PILImage.Image) -> PILImage.Image: + """ + Process image for 4-bit e-ink display using PIL only. + Applies histogram equalization and 4-bit quantization. + + Args: + img: PIL Image to process + + Returns: + Processed PIL Image in L mode (grayscale) with 4-bit quantization + """ + # Convert to grayscale if needed + if img.mode != 'L': + img = img.convert('L') + + # Apply histogram equalization for contrast enhancement + img = ImageOps.equalize(img) + + # Quantize to 4-bit (16 grayscale levels: 0, 17, 34, ..., 255) + img = img.point(lambda x: (x // 16) * 17) + + return img + + class EPUBReader: """ Reader for EPUB documents. @@ -35,14 +60,19 @@ class EPUBReader: pyWebLayout's abstract document model. """ - def __init__(self, epub_path: str): + def __init__(self, epub_path: str, image_processor: Optional[Callable[[PILImage.Image], PILImage.Image]] = default_eink_processor): """ Initialize an EPUB reader. Args: epub_path: Path to the EPUB file + image_processor: Optional function to process images for display optimization. + Defaults to default_eink_processor for 4-bit e-ink displays. + Set to None to disable image processing. + Custom processor should accept and return a PIL Image. """ self.epub_path = epub_path + self.image_processor = image_processor self.book = Book() self.temp_dir = None self.content_dir = None @@ -70,6 +100,9 @@ class EPUBReader: # Add chapters to the book self._add_chapters() + # Process images for e-ink display optimization + self._process_content_images() + return self.book finally: @@ -365,6 +398,14 @@ class EPUBReader: # Create a copy to ensure all data is in memory pil_image = pil_image.copy() + # Apply image processing if enabled + if self.image_processor: + try: + pil_image = self.image_processor(pil_image) + except Exception as e: + print(f"Warning: Image processing failed for cover: {str(e)}") + # Continue with unprocessed image + # Create an AbstractImage block with the cover image path cover_image = AbstractImage(source=cover_path, alt_text="Cover Image") @@ -386,6 +427,33 @@ class EPUBReader: if hasattr(self.book, 'chapters') and cover_chapter in self.book.chapters: self.book.chapters.remove(cover_chapter) + def _process_chapter_images(self, chapter: Chapter): + """ + Process images in a single chapter. + + Args: + chapter: The chapter containing images to process + """ + from pyWebLayout.abstract.block import Image as AbstractImage + + for block in chapter.blocks: + if isinstance(block, AbstractImage): + # Only process if image has been loaded and processor is enabled + if hasattr(block, '_loaded_image') and block._loaded_image: + try: + block._loaded_image = self.image_processor(block._loaded_image) + except Exception as e: + print(f"Warning: Image processing failed for image '{block.alt_text}': {str(e)}") + # Continue with unprocessed image + + def _process_content_images(self): + """Apply image processing to all images in chapters.""" + if not self.image_processor: + return + + for chapter in self.book.chapters: + self._process_chapter_images(chapter) + def _add_chapters(self): """Add chapters to the book based on the spine and TOC.""" # Add cover chapter first if available