add BW image conversion

This commit is contained in:
Duncan Tourolle 2025-11-04 22:09:26 +01:00
parent 25d36566d0
commit 9ba35d2fa8

View File

@ -8,10 +8,11 @@ to pyWebLayout's abstract document model.
import os import os
import zipfile import zipfile
import tempfile import tempfile
from typing import Dict, List, Optional, Any, Tuple from typing import Dict, List, Optional, Any, Tuple, Callable
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import re import re
import urllib.parse import urllib.parse
from PIL import Image as PILImage, ImageOps
from pyWebLayout.abstract.document import Document, Book, Chapter, MetadataType from pyWebLayout.abstract.document import Document, Book, Chapter, MetadataType
from pyWebLayout.io.readers.html_extraction import parse_html_string from pyWebLayout.io.readers.html_extraction import parse_html_string
@ -27,6 +28,30 @@ NAMESPACES = {
} }
def default_eink_processor(img: PILImage.Image) -> PILImage.Image:
"""
Process image for 4-bit e-ink display using PIL only.
Applies histogram equalization and 4-bit quantization.
Args:
img: PIL Image to process
Returns:
Processed PIL Image in L mode (grayscale) with 4-bit quantization
"""
# Convert to grayscale if needed
if img.mode != 'L':
img = img.convert('L')
# Apply histogram equalization for contrast enhancement
img = ImageOps.equalize(img)
# Quantize to 4-bit (16 grayscale levels: 0, 17, 34, ..., 255)
img = img.point(lambda x: (x // 16) * 17)
return img
class EPUBReader: class EPUBReader:
""" """
Reader for EPUB documents. Reader for EPUB documents.
@ -35,14 +60,19 @@ class EPUBReader:
pyWebLayout's abstract document model. pyWebLayout's abstract document model.
""" """
def __init__(self, epub_path: str): def __init__(self, epub_path: str, image_processor: Optional[Callable[[PILImage.Image], PILImage.Image]] = default_eink_processor):
""" """
Initialize an EPUB reader. Initialize an EPUB reader.
Args: Args:
epub_path: Path to the EPUB file epub_path: Path to the EPUB file
image_processor: Optional function to process images for display optimization.
Defaults to default_eink_processor for 4-bit e-ink displays.
Set to None to disable image processing.
Custom processor should accept and return a PIL Image.
""" """
self.epub_path = epub_path self.epub_path = epub_path
self.image_processor = image_processor
self.book = Book() self.book = Book()
self.temp_dir = None self.temp_dir = None
self.content_dir = None self.content_dir = None
@ -70,6 +100,9 @@ class EPUBReader:
# Add chapters to the book # Add chapters to the book
self._add_chapters() self._add_chapters()
# Process images for e-ink display optimization
self._process_content_images()
return self.book return self.book
finally: finally:
@ -365,6 +398,14 @@ class EPUBReader:
# Create a copy to ensure all data is in memory # Create a copy to ensure all data is in memory
pil_image = pil_image.copy() pil_image = pil_image.copy()
# Apply image processing if enabled
if self.image_processor:
try:
pil_image = self.image_processor(pil_image)
except Exception as e:
print(f"Warning: Image processing failed for cover: {str(e)}")
# Continue with unprocessed image
# Create an AbstractImage block with the cover image path # Create an AbstractImage block with the cover image path
cover_image = AbstractImage(source=cover_path, alt_text="Cover Image") cover_image = AbstractImage(source=cover_path, alt_text="Cover Image")
@ -386,6 +427,33 @@ class EPUBReader:
if hasattr(self.book, 'chapters') and cover_chapter in self.book.chapters: if hasattr(self.book, 'chapters') and cover_chapter in self.book.chapters:
self.book.chapters.remove(cover_chapter) self.book.chapters.remove(cover_chapter)
def _process_chapter_images(self, chapter: Chapter):
"""
Process images in a single chapter.
Args:
chapter: The chapter containing images to process
"""
from pyWebLayout.abstract.block import Image as AbstractImage
for block in chapter.blocks:
if isinstance(block, AbstractImage):
# Only process if image has been loaded and processor is enabled
if hasattr(block, '_loaded_image') and block._loaded_image:
try:
block._loaded_image = self.image_processor(block._loaded_image)
except Exception as e:
print(f"Warning: Image processing failed for image '{block.alt_text}': {str(e)}")
# Continue with unprocessed image
def _process_content_images(self):
"""Apply image processing to all images in chapters."""
if not self.image_processor:
return
for chapter in self.book.chapters:
self._process_chapter_images(chapter)
def _add_chapters(self): def _add_chapters(self):
"""Add chapters to the book based on the spine and TOC.""" """Add chapters to the book based on the spine and TOC."""
# Add cover chapter first if available # Add cover chapter first if available