dreader-application/dreader/book_utils.py

"""
Utilities for managing book library, scanning EPUBs, and extracting metadata.
"""

from pathlib import Path
from typing import List, Dict, Optional
from dreader import create_ebook_reader
import base64
from io import BytesIO
from PIL import Image
import ebooklib
from ebooklib import epub


def scan_book_directory(directory: Path) -> List[Dict[str, str]]:
    """
    Scan a directory for EPUB files and extract metadata.

    Args:
        directory: Path to directory containing EPUB files

    Returns:
        List of book dictionaries with metadata
    """
    books = []
    epub_files = list(directory.glob('*.epub'))

    for epub_path in epub_files:
        metadata = extract_book_metadata(epub_path)
        if metadata:
            books.append(metadata)

    return sorted(books, key=lambda b: b['title'].lower())


def extract_book_metadata(epub_path: Path, include_cover: bool = True) -> Optional[Dict]:
    """
    Extract metadata from an EPUB file.

    Args:
        epub_path: Path to EPUB file
        include_cover: Whether to extract and include cover image as base64

    Returns:
        Dictionary with book metadata or None if extraction fails
    """
    try:
        # Create temporary reader to extract metadata
        reader = create_ebook_reader(page_size=(400, 600))
        reader.load_epub(str(epub_path))

        metadata = {
            'filename': epub_path.name,
            'path': str(epub_path),
            'title': reader.book_title or epub_path.stem,
            'author': reader.book_author or 'Unknown Author',
        }

        # Extract cover image if requested - use direct EPUB extraction
        if include_cover:
            cover_data = extract_cover_from_epub(epub_path)
            metadata['cover_data'] = cover_data

        return metadata

    except Exception as e:
        print(f"Error extracting metadata from {epub_path}: {e}")
        return {
            'filename': epub_path.name,
            'path': str(epub_path),
            'title': epub_path.stem,
            'author': 'Unknown',
            'cover_data': None
        }


def extract_cover_as_base64(reader, max_width: int = 300, max_height: int = 450) -> Optional[str]:
    """
    Extract cover image from reader and return as base64 encoded string.

    This function is kept for backward compatibility but now uses extract_cover_from_epub
    internally if the reader has an epub_path attribute.

    Args:
        reader: EbookReader instance with loaded book
        max_width: Maximum width for cover image
        max_height: Maximum height for cover image

    Returns:
        Base64 encoded PNG image string or None
    """
    try:
        # If the reader has an epub path, try to extract actual cover
        if hasattr(reader, '_epub_path') and reader._epub_path:
            return extract_cover_from_epub(reader._epub_path, max_width, max_height)

        # Fallback to first page as cover
        cover_image = reader.get_current_page()

        # Resize if needed
        if cover_image.width > max_width or cover_image.height > max_height:
            cover_image.thumbnail((max_width, max_height))

        # Convert to base64
        buffer = BytesIO()
        cover_image.save(buffer, format='PNG')
        img_bytes = buffer.getvalue()
        img_base64 = base64.b64encode(img_bytes).decode('utf-8')

        return img_base64

    except Exception as e:
        print(f"Error extracting cover image: {e}")
        return None


def extract_cover_from_epub(epub_path: Path, max_width: int = 300, max_height: int = 450) -> Optional[str]:
    """
    Extract the actual cover image from an EPUB file.

    Args:
        epub_path: Path to EPUB file
        max_width: Maximum width for cover image
        max_height: Maximum height for cover image

    Returns:
        Base64 encoded PNG image string or None
    """
    try:
        # Read the EPUB
        book = epub.read_epub(str(epub_path))

        # Look for cover image
        cover_image = None

        # First, try to find item marked as cover
        for item in book.get_items():
            if item.get_type() == ebooklib.ITEM_COVER:
                cover_image = Image.open(BytesIO(item.get_content()))
                break

        # If not found, look for files with 'cover' in the name
        if not cover_image:
            for item in book.get_items():
                if item.get_type() == ebooklib.ITEM_IMAGE:
                    name = item.get_name().lower()
                    if 'cover' in name:
                        cover_image = Image.open(BytesIO(item.get_content()))
                        break

        # If still not found, get the first image
        if not cover_image:
            for item in book.get_items():
                if item.get_type() == ebooklib.ITEM_IMAGE:
                    try:
                        cover_image = Image.open(BytesIO(item.get_content()))
                        break
                    except:
                        continue

        if not cover_image:
            return None

        # Resize if needed (maintain aspect ratio)
        if cover_image.width > max_width or cover_image.height > max_height:
            cover_image.thumbnail((max_width, max_height), Image.Resampling.LANCZOS)

        # Convert to base64
        buffer = BytesIO()
        cover_image.save(buffer, format='PNG')
        img_bytes = buffer.getvalue()
        img_base64 = base64.b64encode(img_bytes).decode('utf-8')

        return img_base64

    except Exception as e:
        print(f"Error extracting cover from EPUB {epub_path}: {e}")
        return None


def get_chapter_list(reader) -> List[Dict]:
    """
    Get formatted chapter list from reader.

    Args:
        reader: EbookReader instance with loaded book

    Returns:
        List of chapter dictionaries with index and title
    """
    try:
        chapters = reader.get_chapters()
        result = []
        for i, chapter in enumerate(chapters):
            # Handle different chapter formats
            if isinstance(chapter, str):
                title = chapter
            elif isinstance(chapter, dict):
                title = chapter.get('title', f'Chapter {i+1}')
            elif isinstance(chapter, tuple) and len(chapter) >= 2:
                # Tuple format: (title, ...)
                title = chapter[0] if chapter[0] else f'Chapter {i+1}'
            else:
                title = f'Chapter {i+1}'

            result.append({
                'index': i,
                'title': title
            })
        return result
    except Exception as e:
        print(f"Error getting chapters: {e}")
        return []


def get_bookmark_list(reader) -> List[Dict]:
    """
    Get formatted bookmark list from reader.

    Args:
        reader: EbookReader instance with loaded book

    Returns:
        List of bookmark dictionaries
    """
    try:
        bookmarks = reader.list_saved_positions()
        return [
            {
                'name': bookmark,
                'position': ''  # Could be enhanced to show chapter/page info
            }
            for bookmark in bookmarks
        ]
    except Exception as e:
        print(f"Error getting bookmarks: {e}")
        return []


def page_image_to_base64(page_image) -> str:
    """
    Convert PIL Image to base64 encoded string.

    Args:
        page_image: PIL Image object

    Returns:
        Base64 encoded PNG string
    """
    buffer = BytesIO()
    page_image.save(buffer, format='PNG')
    img_bytes = buffer.getvalue()
    return base64.b64encode(img_bytes).decode('utf-8')