Duncan Tourolle 131d39080e
All checks were successful
Python CI / test (push) Successful in 3m24s
first application elements
2025-11-07 20:00:05 +01:00

180 lines
4.9 KiB
Python

"""
Utilities for managing book library, scanning EPUBs, and extracting metadata.
"""
from pathlib import Path
from typing import List, Dict, Optional
from dreader import create_ebook_reader
import base64
from io import BytesIO
def scan_book_directory(directory: Path) -> List[Dict[str, str]]:
"""
Scan a directory for EPUB files and extract metadata.
Args:
directory: Path to directory containing EPUB files
Returns:
List of book dictionaries with metadata
"""
books = []
epub_files = list(directory.glob('*.epub'))
for epub_path in epub_files:
metadata = extract_book_metadata(epub_path)
if metadata:
books.append(metadata)
return sorted(books, key=lambda b: b['title'].lower())
def extract_book_metadata(epub_path: Path, include_cover: bool = True) -> Optional[Dict]:
"""
Extract metadata from an EPUB file.
Args:
epub_path: Path to EPUB file
include_cover: Whether to extract and include cover image as base64
Returns:
Dictionary with book metadata or None if extraction fails
"""
try:
# Create temporary reader to extract metadata
reader = create_ebook_reader(page_size=(400, 600))
reader.load_epub(str(epub_path))
metadata = {
'filename': epub_path.name,
'path': str(epub_path),
'title': reader.book_title or epub_path.stem,
'author': reader.book_author or 'Unknown Author',
}
# Extract cover image if requested
if include_cover:
cover_data = extract_cover_as_base64(reader)
metadata['cover_data'] = cover_data
return metadata
except Exception as e:
print(f"Error extracting metadata from {epub_path}: {e}")
return {
'filename': epub_path.name,
'path': str(epub_path),
'title': epub_path.stem,
'author': 'Unknown',
'cover_data': None
}
def extract_cover_as_base64(reader, max_width: int = 300, max_height: int = 450) -> Optional[str]:
"""
Extract cover image from reader and return as base64 encoded string.
Args:
reader: EbookReader instance with loaded book
max_width: Maximum width for cover image
max_height: Maximum height for cover image
Returns:
Base64 encoded PNG image string or None
"""
try:
# Get first page as cover
cover_image = reader.get_current_page()
# Resize if needed
if cover_image.width > max_width or cover_image.height > max_height:
cover_image.thumbnail((max_width, max_height))
# Convert to base64
buffer = BytesIO()
cover_image.save(buffer, format='PNG')
img_bytes = buffer.getvalue()
img_base64 = base64.b64encode(img_bytes).decode('utf-8')
return img_base64
except Exception as e:
print(f"Error extracting cover image: {e}")
return None
def get_chapter_list(reader) -> List[Dict]:
"""
Get formatted chapter list from reader.
Args:
reader: EbookReader instance with loaded book
Returns:
List of chapter dictionaries with index and title
"""
try:
chapters = reader.get_chapters()
result = []
for i, chapter in enumerate(chapters):
# Handle different chapter formats
if isinstance(chapter, str):
title = chapter
elif isinstance(chapter, dict):
title = chapter.get('title', f'Chapter {i+1}')
elif isinstance(chapter, tuple) and len(chapter) >= 2:
# Tuple format: (title, ...)
title = chapter[0] if chapter[0] else f'Chapter {i+1}'
else:
title = f'Chapter {i+1}'
result.append({
'index': i,
'title': title
})
return result
except Exception as e:
print(f"Error getting chapters: {e}")
return []
def get_bookmark_list(reader) -> List[Dict]:
"""
Get formatted bookmark list from reader.
Args:
reader: EbookReader instance with loaded book
Returns:
List of bookmark dictionaries
"""
try:
bookmarks = reader.list_saved_positions()
return [
{
'name': bookmark,
'position': '' # Could be enhanced to show chapter/page info
}
for bookmark in bookmarks
]
except Exception as e:
print(f"Error getting bookmarks: {e}")
return []
def page_image_to_base64(page_image) -> str:
"""
Convert PIL Image to base64 encoded string.
Args:
page_image: PIL Image object
Returns:
Base64 encoded PNG string
"""
buffer = BytesIO()
page_image.save(buffer, format='PNG')
img_bytes = buffer.getvalue()
return base64.b64encode(img_bytes).decode('utf-8')