Duncan Tourolle 284a6e3393
All checks were successful
Python CI / test (push) Successful in 4m30s
library and toc navigation
2025-11-08 12:20:23 +01:00

254 lines
7.4 KiB
Python

"""
Utilities for managing book library, scanning EPUBs, and extracting metadata.
"""
from pathlib import Path
from typing import List, Dict, Optional
from dreader import create_ebook_reader
import base64
from io import BytesIO
from PIL import Image
import ebooklib
from ebooklib import epub
def scan_book_directory(directory: Path) -> List[Dict[str, str]]:
"""
Scan a directory for EPUB files and extract metadata.
Args:
directory: Path to directory containing EPUB files
Returns:
List of book dictionaries with metadata
"""
books = []
epub_files = list(directory.glob('*.epub'))
for epub_path in epub_files:
metadata = extract_book_metadata(epub_path)
if metadata:
books.append(metadata)
return sorted(books, key=lambda b: b['title'].lower())
def extract_book_metadata(epub_path: Path, include_cover: bool = True) -> Optional[Dict]:
"""
Extract metadata from an EPUB file.
Args:
epub_path: Path to EPUB file
include_cover: Whether to extract and include cover image as base64
Returns:
Dictionary with book metadata or None if extraction fails
"""
try:
# Create temporary reader to extract metadata
reader = create_ebook_reader(page_size=(400, 600))
reader.load_epub(str(epub_path))
metadata = {
'filename': epub_path.name,
'path': str(epub_path),
'title': reader.book_title or epub_path.stem,
'author': reader.book_author or 'Unknown Author',
}
# Extract cover image if requested - use direct EPUB extraction
if include_cover:
cover_data = extract_cover_from_epub(epub_path)
metadata['cover_data'] = cover_data
return metadata
except Exception as e:
print(f"Error extracting metadata from {epub_path}: {e}")
return {
'filename': epub_path.name,
'path': str(epub_path),
'title': epub_path.stem,
'author': 'Unknown',
'cover_data': None
}
def extract_cover_as_base64(reader, max_width: int = 300, max_height: int = 450) -> Optional[str]:
"""
Extract cover image from reader and return as base64 encoded string.
This function is kept for backward compatibility but now uses extract_cover_from_epub
internally if the reader has an epub_path attribute.
Args:
reader: EbookReader instance with loaded book
max_width: Maximum width for cover image
max_height: Maximum height for cover image
Returns:
Base64 encoded PNG image string or None
"""
try:
# If the reader has an epub path, try to extract actual cover
if hasattr(reader, '_epub_path') and reader._epub_path:
return extract_cover_from_epub(reader._epub_path, max_width, max_height)
# Fallback to first page as cover
cover_image = reader.get_current_page()
# Resize if needed
if cover_image.width > max_width or cover_image.height > max_height:
cover_image.thumbnail((max_width, max_height))
# Convert to base64
buffer = BytesIO()
cover_image.save(buffer, format='PNG')
img_bytes = buffer.getvalue()
img_base64 = base64.b64encode(img_bytes).decode('utf-8')
return img_base64
except Exception as e:
print(f"Error extracting cover image: {e}")
return None
def extract_cover_from_epub(epub_path: Path, max_width: int = 300, max_height: int = 450) -> Optional[str]:
"""
Extract the actual cover image from an EPUB file.
Args:
epub_path: Path to EPUB file
max_width: Maximum width for cover image
max_height: Maximum height for cover image
Returns:
Base64 encoded PNG image string or None
"""
try:
# Read the EPUB
book = epub.read_epub(str(epub_path))
# Look for cover image
cover_image = None
# First, try to find item marked as cover
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_COVER:
cover_image = Image.open(BytesIO(item.get_content()))
break
# If not found, look for files with 'cover' in the name
if not cover_image:
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_IMAGE:
name = item.get_name().lower()
if 'cover' in name:
cover_image = Image.open(BytesIO(item.get_content()))
break
# If still not found, get the first image
if not cover_image:
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_IMAGE:
try:
cover_image = Image.open(BytesIO(item.get_content()))
break
except:
continue
if not cover_image:
return None
# Resize if needed (maintain aspect ratio)
if cover_image.width > max_width or cover_image.height > max_height:
cover_image.thumbnail((max_width, max_height), Image.Resampling.LANCZOS)
# Convert to base64
buffer = BytesIO()
cover_image.save(buffer, format='PNG')
img_bytes = buffer.getvalue()
img_base64 = base64.b64encode(img_bytes).decode('utf-8')
return img_base64
except Exception as e:
print(f"Error extracting cover from EPUB {epub_path}: {e}")
return None
def get_chapter_list(reader) -> List[Dict]:
"""
Get formatted chapter list from reader.
Args:
reader: EbookReader instance with loaded book
Returns:
List of chapter dictionaries with index and title
"""
try:
chapters = reader.get_chapters()
result = []
for i, chapter in enumerate(chapters):
# Handle different chapter formats
if isinstance(chapter, str):
title = chapter
elif isinstance(chapter, dict):
title = chapter.get('title', f'Chapter {i+1}')
elif isinstance(chapter, tuple) and len(chapter) >= 2:
# Tuple format: (title, ...)
title = chapter[0] if chapter[0] else f'Chapter {i+1}'
else:
title = f'Chapter {i+1}'
result.append({
'index': i,
'title': title
})
return result
except Exception as e:
print(f"Error getting chapters: {e}")
return []
def get_bookmark_list(reader) -> List[Dict]:
"""
Get formatted bookmark list from reader.
Args:
reader: EbookReader instance with loaded book
Returns:
List of bookmark dictionaries
"""
try:
bookmarks = reader.list_saved_positions()
return [
{
'name': bookmark,
'position': '' # Could be enhanced to show chapter/page info
}
for bookmark in bookmarks
]
except Exception as e:
print(f"Error getting bookmarks: {e}")
return []
def page_image_to_base64(page_image) -> str:
"""
Convert PIL Image to base64 encoded string.
Args:
page_image: PIL Image object
Returns:
Base64 encoded PNG string
"""
buffer = BytesIO()
page_image.save(buffer, format='PNG')
img_bytes = buffer.getvalue()
return base64.b64encode(img_bytes).decode('utf-8')