dreader-application/dreader/book_utils.py

"""
Utilities for managing book library, scanning EPUBs, and extracting metadata.
"""

from pathlib import Path
from typing import List, Dict, Optional
from dreader import create_ebook_reader
import base64
from io import BytesIO


def scan_book_directory(directory: Path) -> List[Dict[str, str]]:
    """
    Scan a directory for EPUB files and extract metadata.

    Args:
        directory: Path to directory containing EPUB files

    Returns:
        List of book dictionaries with metadata
    """
    books = []
    epub_files = list(directory.glob('*.epub'))

    for epub_path in epub_files:
        metadata = extract_book_metadata(epub_path)
        if metadata:
            books.append(metadata)

    return sorted(books, key=lambda b: b['title'].lower())


def extract_book_metadata(epub_path: Path, include_cover: bool = True) -> Optional[Dict]:
    """
    Extract metadata from an EPUB file.

    Args:
        epub_path: Path to EPUB file
        include_cover: Whether to extract and include cover image as base64

    Returns:
        Dictionary with book metadata or None if extraction fails
    """
    try:
        # Create temporary reader to extract metadata
        reader = create_ebook_reader(page_size=(400, 600))
        reader.load_epub(str(epub_path))

        metadata = {
            'filename': epub_path.name,
            'path': str(epub_path),
            'title': reader.book_title or epub_path.stem,
            'author': reader.book_author or 'Unknown Author',
        }

        # Extract cover image if requested
        if include_cover:
            cover_data = extract_cover_as_base64(reader)
            metadata['cover_data'] = cover_data

        return metadata

    except Exception as e:
        print(f"Error extracting metadata from {epub_path}: {e}")
        return {
            'filename': epub_path.name,
            'path': str(epub_path),
            'title': epub_path.stem,
            'author': 'Unknown',
            'cover_data': None
        }


def extract_cover_as_base64(reader, max_width: int = 300, max_height: int = 450) -> Optional[str]:
    """
    Extract cover image from reader and return as base64 encoded string.

    Args:
        reader: EbookReader instance with loaded book
        max_width: Maximum width for cover image
        max_height: Maximum height for cover image

    Returns:
        Base64 encoded PNG image string or None
    """
    try:
        # Get first page as cover
        cover_image = reader.get_current_page()

        # Resize if needed
        if cover_image.width > max_width or cover_image.height > max_height:
            cover_image.thumbnail((max_width, max_height))

        # Convert to base64
        buffer = BytesIO()
        cover_image.save(buffer, format='PNG')
        img_bytes = buffer.getvalue()
        img_base64 = base64.b64encode(img_bytes).decode('utf-8')

        return img_base64

    except Exception as e:
        print(f"Error extracting cover image: {e}")
        return None


def get_chapter_list(reader) -> List[Dict]:
    """
    Get formatted chapter list from reader.

    Args:
        reader: EbookReader instance with loaded book

    Returns:
        List of chapter dictionaries with index and title
    """
    try:
        chapters = reader.get_chapters()
        result = []
        for i, chapter in enumerate(chapters):
            # Handle different chapter formats
            if isinstance(chapter, str):
                title = chapter
            elif isinstance(chapter, dict):
                title = chapter.get('title', f'Chapter {i+1}')
            elif isinstance(chapter, tuple) and len(chapter) >= 2:
                # Tuple format: (title, ...)
                title = chapter[0] if chapter[0] else f'Chapter {i+1}'
            else:
                title = f'Chapter {i+1}'

            result.append({
                'index': i,
                'title': title
            })
        return result
    except Exception as e:
        print(f"Error getting chapters: {e}")
        return []


def get_bookmark_list(reader) -> List[Dict]:
    """
    Get formatted bookmark list from reader.

    Args:
        reader: EbookReader instance with loaded book

    Returns:
        List of bookmark dictionaries
    """
    try:
        bookmarks = reader.list_saved_positions()
        return [
            {
                'name': bookmark,
                'position': ''  # Could be enhanced to show chapter/page info
            }
            for bookmark in bookmarks
        ]
    except Exception as e:
        print(f"Error getting bookmarks: {e}")
        return []


def page_image_to_base64(page_image) -> str:
    """
    Convert PIL Image to base64 encoded string.

    Args:
        page_image: PIL Image object

    Returns:
        Base64 encoded PNG string
    """
    buffer = BytesIO()
    page_image.save(buffer, format='PNG')
    img_bytes = buffer.getvalue()
    return base64.b64encode(img_bytes).decode('utf-8')