pyPhotoAlbum/tests/test_asset_manager.py
Duncan Tourolle 6a791b1397
All checks were successful
Python CI / test (push) Successful in 3m12s
Lint / lint (push) Successful in 1m38s
Tests / test (3.11) (push) Successful in 2m26s
Tests / test (3.12) (push) Successful in 3m13s
Tests / test (3.13) (push) Successful in 3m9s
Tests / test (3.14) (push) Successful in 1m20s
Use md5 to only store unique content
2025-12-31 12:46:01 +01:00

470 lines
18 KiB
Python

"""
Tests for AssetManager functionality including deduplication and unused asset detection
"""
import os
import pytest
import tempfile
import shutil
from PIL import Image
from pyPhotoAlbum.asset_manager import AssetManager, compute_file_md5
class TestComputeFileMd5:
"""Tests for the compute_file_md5 function"""
def test_compute_md5_existing_file(self, tmp_path):
"""Test MD5 computation for an existing file"""
# Create a test file
test_file = tmp_path / "test.txt"
test_file.write_text("Hello, World!")
md5_hash = compute_file_md5(str(test_file))
assert md5_hash is not None
# Known MD5 for "Hello, World!"
assert md5_hash == "65a8e27d8879283831b664bd8b7f0ad4"
def test_compute_md5_nonexistent_file(self):
"""Test MD5 computation returns None for non-existent file"""
md5_hash = compute_file_md5("/nonexistent/path/file.txt")
assert md5_hash is None
def test_compute_md5_same_content_same_hash(self, tmp_path):
"""Test that identical content produces identical hashes"""
content = b"Test content for hashing"
file1 = tmp_path / "file1.bin"
file2 = tmp_path / "file2.bin"
file1.write_bytes(content)
file2.write_bytes(content)
hash1 = compute_file_md5(str(file1))
hash2 = compute_file_md5(str(file2))
assert hash1 == hash2
def test_compute_md5_different_content_different_hash(self, tmp_path):
"""Test that different content produces different hashes"""
file1 = tmp_path / "file1.txt"
file2 = tmp_path / "file2.txt"
file1.write_text("Content A")
file2.write_text("Content B")
hash1 = compute_file_md5(str(file1))
hash2 = compute_file_md5(str(file2))
assert hash1 != hash2
class TestAssetManagerDeduplication:
"""Tests for AssetManager deduplication methods"""
@pytest.fixture
def asset_manager(self, tmp_path):
"""Create an AssetManager with a temporary project folder"""
project_folder = str(tmp_path / "test_project")
os.makedirs(project_folder)
return AssetManager(project_folder)
@pytest.fixture
def create_test_image(self):
"""Factory fixture for creating test images"""
def _create(path, color="red", size=(100, 100)):
img = Image.new("RGB", size, color=color)
img.save(path)
return path
return _create
def test_compute_all_hashes_empty_folder(self, asset_manager):
"""Test hash computation on empty assets folder"""
hashes = asset_manager.compute_all_hashes()
assert len(hashes) == 0
def test_compute_all_hashes_with_files(self, asset_manager, create_test_image):
"""Test hash computation with files in assets folder"""
# Create some test images
img1 = os.path.join(asset_manager.assets_folder, "image1.png")
img2 = os.path.join(asset_manager.assets_folder, "image2.png")
create_test_image(img1, color="red")
create_test_image(img2, color="blue")
hashes = asset_manager.compute_all_hashes()
assert len(hashes) == 2
assert "assets/image1.png" in hashes
assert "assets/image2.png" in hashes
def test_find_duplicates_no_duplicates(self, asset_manager, create_test_image):
"""Test finding duplicates when there are none"""
img1 = os.path.join(asset_manager.assets_folder, "image1.png")
img2 = os.path.join(asset_manager.assets_folder, "image2.png")
create_test_image(img1, color="red")
create_test_image(img2, color="blue")
duplicates = asset_manager.find_duplicates()
assert len(duplicates) == 0
def test_find_duplicates_with_duplicates(self, asset_manager, tmp_path):
"""Test finding actual duplicate files"""
# Create a source image
source_img = tmp_path / "source.png"
img = Image.new("RGB", (50, 50), color="green")
img.save(str(source_img))
# Copy the same image twice to assets folder
dup1 = os.path.join(asset_manager.assets_folder, "dup1.png")
dup2 = os.path.join(asset_manager.assets_folder, "dup2.png")
shutil.copy(str(source_img), dup1)
shutil.copy(str(source_img), dup2)
duplicates = asset_manager.find_duplicates()
assert len(duplicates) == 1 # One group of duplicates
# The group should contain both files
for paths in duplicates.values():
assert len(paths) == 2
assert "assets/dup1.png" in paths
assert "assets/dup2.png" in paths
def test_get_duplicate_stats_no_duplicates(self, asset_manager, create_test_image):
"""Test duplicate stats when there are no duplicates"""
img1 = os.path.join(asset_manager.assets_folder, "image1.png")
create_test_image(img1, color="red")
groups, files, bytes_to_save = asset_manager.get_duplicate_stats()
assert groups == 0
assert files == 0
assert bytes_to_save == 0
def test_get_duplicate_stats_with_duplicates(self, asset_manager, tmp_path):
"""Test duplicate stats with actual duplicates"""
# Create a source image
source_img = tmp_path / "source.png"
img = Image.new("RGB", (100, 100), color="purple")
img.save(str(source_img))
# Copy to assets folder 3 times (creates 2 duplicates)
for i in range(3):
dest = os.path.join(asset_manager.assets_folder, f"image{i}.png")
shutil.copy(str(source_img), dest)
groups, files, bytes_to_save = asset_manager.get_duplicate_stats()
assert groups == 1 # One group
assert files == 2 # Two extra copies to remove
assert bytes_to_save > 0
def test_deduplicate_assets_removes_files(self, asset_manager, tmp_path):
"""Test that deduplication actually removes duplicate files"""
# Create a source image
source_img = tmp_path / "source.png"
img = Image.new("RGB", (50, 50), color="yellow")
img.save(str(source_img))
# Copy to assets folder 3 times
for i in range(3):
dest = os.path.join(asset_manager.assets_folder, f"image{i}.png")
shutil.copy(str(source_img), dest)
asset_manager.reference_counts[f"assets/image{i}.png"] = 1
# Count files before
files_before = len(os.listdir(asset_manager.assets_folder))
assert files_before == 3
# Run deduplication
files_removed, bytes_saved = asset_manager.deduplicate_assets()
# Check results
assert files_removed == 2
assert bytes_saved > 0
# Count files after
files_after = len(os.listdir(asset_manager.assets_folder))
assert files_after == 1
def test_deduplicate_assets_updates_callback(self, asset_manager, tmp_path):
"""Test that deduplication calls the update callback correctly"""
# Create a source image
source_img = tmp_path / "source.png"
img = Image.new("RGB", (50, 50), color="cyan")
img.save(str(source_img))
# Copy to assets folder
dest1 = os.path.join(asset_manager.assets_folder, "a_first.png")
dest2 = os.path.join(asset_manager.assets_folder, "b_second.png")
shutil.copy(str(source_img), dest1)
shutil.copy(str(source_img), dest2)
# Track callback invocations
callback_calls = []
def track_callback(old_path, new_path):
callback_calls.append((old_path, new_path))
# Run deduplication
asset_manager.deduplicate_assets(update_references_callback=track_callback)
# Callback should have been called for the duplicate
assert len(callback_calls) == 1
# b_second.png should be remapped to a_first.png (alphabetical order)
assert callback_calls[0] == ("assets/b_second.png", "assets/a_first.png")
def test_deduplicate_assets_transfers_reference_counts(self, asset_manager, tmp_path):
"""Test that reference counts are properly transferred during deduplication"""
# Create a source image
source_img = tmp_path / "source.png"
img = Image.new("RGB", (50, 50), color="magenta")
img.save(str(source_img))
# Copy to assets folder
dest1 = os.path.join(asset_manager.assets_folder, "a_first.png")
dest2 = os.path.join(asset_manager.assets_folder, "b_second.png")
shutil.copy(str(source_img), dest1)
shutil.copy(str(source_img), dest2)
# Set reference counts
asset_manager.reference_counts["assets/a_first.png"] = 2
asset_manager.reference_counts["assets/b_second.png"] = 3
# Run deduplication
asset_manager.deduplicate_assets()
# Check reference counts were merged
assert asset_manager.reference_counts.get("assets/a_first.png") == 5
assert "assets/b_second.png" not in asset_manager.reference_counts
def test_serialize_includes_hashes(self, asset_manager, create_test_image):
"""Test that serialization includes asset hashes"""
img1 = os.path.join(asset_manager.assets_folder, "image1.png")
create_test_image(img1, color="red")
asset_manager.compute_all_hashes()
data = asset_manager.serialize()
assert "asset_hashes" in data
assert "assets/image1.png" in data["asset_hashes"]
def test_deserialize_restores_hashes(self, asset_manager):
"""Test that deserialization restores asset hashes"""
test_data = {
"reference_counts": {"assets/test.png": 1},
"asset_hashes": {"assets/test.png": "abc123hash"}
}
asset_manager.deserialize(test_data)
assert asset_manager.asset_hashes.get("assets/test.png") == "abc123hash"
def test_compute_asset_hash_single_file(self, asset_manager, create_test_image):
"""Test computing hash for a single asset"""
img_path = os.path.join(asset_manager.assets_folder, "single.png")
create_test_image(img_path, color="orange")
hash_result = asset_manager.compute_asset_hash("assets/single.png")
assert hash_result is not None
assert "assets/single.png" in asset_manager.asset_hashes
assert asset_manager.asset_hashes["assets/single.png"] == hash_result
class TestAssetManagerIntegration:
"""Integration tests for AssetManager with import and deduplication"""
@pytest.fixture
def asset_manager(self, tmp_path):
"""Create an AssetManager with a temporary project folder"""
project_folder = str(tmp_path / "test_project")
os.makedirs(project_folder)
return AssetManager(project_folder)
def test_import_then_deduplicate(self, asset_manager, tmp_path):
"""Test importing duplicate images and then deduplicating"""
# Create a source image
source_img = tmp_path / "source.png"
img = Image.new("RGB", (80, 80), color="navy")
img.save(str(source_img))
# Import the same image twice
path1 = asset_manager.import_asset(str(source_img))
path2 = asset_manager.import_asset(str(source_img))
assert path1 != path2 # Should have different names due to collision handling
# Check both files exist
assert os.path.exists(asset_manager.get_absolute_path(path1))
assert os.path.exists(asset_manager.get_absolute_path(path2))
# Find duplicates
duplicates = asset_manager.find_duplicates()
assert len(duplicates) == 1
# Deduplicate
files_removed, _ = asset_manager.deduplicate_assets()
assert files_removed == 1
# Only one file should remain
files_in_assets = os.listdir(asset_manager.assets_folder)
assert len(files_in_assets) == 1
class TestAssetManagerUnused:
"""Tests for AssetManager unused asset detection and removal"""
@pytest.fixture
def asset_manager(self, tmp_path):
"""Create an AssetManager with a temporary project folder"""
project_folder = str(tmp_path / "test_project")
os.makedirs(project_folder)
return AssetManager(project_folder)
@pytest.fixture
def create_test_image(self):
"""Factory fixture for creating test images"""
def _create(path, color="red", size=(100, 100)):
img = Image.new("RGB", size, color=color)
img.save(path)
return path
return _create
def test_find_unused_assets_empty_folder(self, asset_manager):
"""Test finding unused assets in empty folder"""
unused = asset_manager.find_unused_assets()
assert len(unused) == 0
def test_find_unused_assets_all_referenced(self, asset_manager, create_test_image):
"""Test finding unused assets when all are referenced"""
img1 = os.path.join(asset_manager.assets_folder, "image1.png")
img2 = os.path.join(asset_manager.assets_folder, "image2.png")
create_test_image(img1, color="red")
create_test_image(img2, color="blue")
# Add references for both
asset_manager.reference_counts["assets/image1.png"] = 1
asset_manager.reference_counts["assets/image2.png"] = 2
unused = asset_manager.find_unused_assets()
assert len(unused) == 0
def test_find_unused_assets_some_unreferenced(self, asset_manager, create_test_image):
"""Test finding unused assets when some have no references"""
img1 = os.path.join(asset_manager.assets_folder, "used.png")
img2 = os.path.join(asset_manager.assets_folder, "unused.png")
create_test_image(img1, color="red")
create_test_image(img2, color="blue")
# Only reference one
asset_manager.reference_counts["assets/used.png"] = 1
unused = asset_manager.find_unused_assets()
assert len(unused) == 1
assert "assets/unused.png" in unused
def test_find_unused_assets_zero_reference_count(self, asset_manager, create_test_image):
"""Test that zero reference count is considered unused"""
img = os.path.join(asset_manager.assets_folder, "orphan.png")
create_test_image(img, color="red")
# Set reference count to 0
asset_manager.reference_counts["assets/orphan.png"] = 0
unused = asset_manager.find_unused_assets()
assert len(unused) == 1
assert "assets/orphan.png" in unused
def test_get_unused_stats_no_unused(self, asset_manager, create_test_image):
"""Test unused stats when all assets are referenced"""
img = os.path.join(asset_manager.assets_folder, "image.png")
create_test_image(img, color="red")
asset_manager.reference_counts["assets/image.png"] = 1
count, total_bytes = asset_manager.get_unused_stats()
assert count == 0
assert total_bytes == 0
def test_get_unused_stats_with_unused(self, asset_manager, create_test_image):
"""Test unused stats with unreferenced files"""
img1 = os.path.join(asset_manager.assets_folder, "unused1.png")
img2 = os.path.join(asset_manager.assets_folder, "unused2.png")
create_test_image(img1, color="red")
create_test_image(img2, color="blue")
# No references for either file
count, total_bytes = asset_manager.get_unused_stats()
assert count == 2
assert total_bytes > 0
def test_remove_unused_assets_removes_files(self, asset_manager, create_test_image):
"""Test that unused assets are actually removed"""
used_path = os.path.join(asset_manager.assets_folder, "used.png")
unused_path = os.path.join(asset_manager.assets_folder, "unused.png")
create_test_image(used_path, color="red")
create_test_image(unused_path, color="blue")
# Only reference the used file
asset_manager.reference_counts["assets/used.png"] = 1
# Remove unused
files_removed, bytes_freed = asset_manager.remove_unused_assets()
assert files_removed == 1
assert bytes_freed > 0
# Check files on disk
assert os.path.exists(used_path)
assert not os.path.exists(unused_path)
def test_remove_unused_assets_no_unused(self, asset_manager, create_test_image):
"""Test removing unused when all assets are referenced"""
img = os.path.join(asset_manager.assets_folder, "used.png")
create_test_image(img, color="red")
asset_manager.reference_counts["assets/used.png"] = 1
files_removed, bytes_freed = asset_manager.remove_unused_assets()
assert files_removed == 0
assert bytes_freed == 0
assert os.path.exists(img)
def test_remove_unused_assets_cleans_tracking(self, asset_manager, create_test_image):
"""Test that removing unused assets cleans up internal tracking"""
img = os.path.join(asset_manager.assets_folder, "orphan.png")
create_test_image(img, color="red")
# Set up tracking with zero refs and a hash
asset_manager.reference_counts["assets/orphan.png"] = 0
asset_manager.asset_hashes["assets/orphan.png"] = "somehash"
asset_manager.remove_unused_assets()
# Tracking should be cleaned up
assert "assets/orphan.png" not in asset_manager.reference_counts
assert "assets/orphan.png" not in asset_manager.asset_hashes
def test_remove_unused_preserves_referenced(self, asset_manager, create_test_image):
"""Test that removing unused preserves all referenced assets"""
# Create several files
for i in range(5):
img = os.path.join(asset_manager.assets_folder, f"image{i}.png")
create_test_image(img, color="red")
# Reference only some of them
asset_manager.reference_counts["assets/image0.png"] = 1
asset_manager.reference_counts["assets/image2.png"] = 3
asset_manager.reference_counts["assets/image4.png"] = 1
files_removed, _ = asset_manager.remove_unused_assets()
assert files_removed == 2 # image1 and image3
# Check that referenced files still exist
assert os.path.exists(os.path.join(asset_manager.assets_folder, "image0.png"))
assert os.path.exists(os.path.join(asset_manager.assets_folder, "image2.png"))
assert os.path.exists(os.path.join(asset_manager.assets_folder, "image4.png"))
# Check that unreferenced files are gone
assert not os.path.exists(os.path.join(asset_manager.assets_folder, "image1.png"))
assert not os.path.exists(os.path.join(asset_manager.assets_folder, "image3.png"))