""" Tests for AssetManager functionality including deduplication and unused asset detection """ import os import pytest import tempfile import shutil from PIL import Image from pyPhotoAlbum.asset_manager import AssetManager, compute_file_md5 class TestComputeFileMd5: """Tests for the compute_file_md5 function""" def test_compute_md5_existing_file(self, tmp_path): """Test MD5 computation for an existing file""" # Create a test file test_file = tmp_path / "test.txt" test_file.write_text("Hello, World!") md5_hash = compute_file_md5(str(test_file)) assert md5_hash is not None # Known MD5 for "Hello, World!" assert md5_hash == "65a8e27d8879283831b664bd8b7f0ad4" def test_compute_md5_nonexistent_file(self): """Test MD5 computation returns None for non-existent file""" md5_hash = compute_file_md5("/nonexistent/path/file.txt") assert md5_hash is None def test_compute_md5_same_content_same_hash(self, tmp_path): """Test that identical content produces identical hashes""" content = b"Test content for hashing" file1 = tmp_path / "file1.bin" file2 = tmp_path / "file2.bin" file1.write_bytes(content) file2.write_bytes(content) hash1 = compute_file_md5(str(file1)) hash2 = compute_file_md5(str(file2)) assert hash1 == hash2 def test_compute_md5_different_content_different_hash(self, tmp_path): """Test that different content produces different hashes""" file1 = tmp_path / "file1.txt" file2 = tmp_path / "file2.txt" file1.write_text("Content A") file2.write_text("Content B") hash1 = compute_file_md5(str(file1)) hash2 = compute_file_md5(str(file2)) assert hash1 != hash2 class TestAssetManagerDeduplication: """Tests for AssetManager deduplication methods""" @pytest.fixture def asset_manager(self, tmp_path): """Create an AssetManager with a temporary project folder""" project_folder = str(tmp_path / "test_project") os.makedirs(project_folder) return AssetManager(project_folder) @pytest.fixture def create_test_image(self): """Factory fixture for creating test images""" def _create(path, color="red", size=(100, 100)): img = Image.new("RGB", size, color=color) img.save(path) return path return _create def test_compute_all_hashes_empty_folder(self, asset_manager): """Test hash computation on empty assets folder""" hashes = asset_manager.compute_all_hashes() assert len(hashes) == 0 def test_compute_all_hashes_with_files(self, asset_manager, create_test_image): """Test hash computation with files in assets folder""" # Create some test images img1 = os.path.join(asset_manager.assets_folder, "image1.png") img2 = os.path.join(asset_manager.assets_folder, "image2.png") create_test_image(img1, color="red") create_test_image(img2, color="blue") hashes = asset_manager.compute_all_hashes() assert len(hashes) == 2 assert "assets/image1.png" in hashes assert "assets/image2.png" in hashes def test_find_duplicates_no_duplicates(self, asset_manager, create_test_image): """Test finding duplicates when there are none""" img1 = os.path.join(asset_manager.assets_folder, "image1.png") img2 = os.path.join(asset_manager.assets_folder, "image2.png") create_test_image(img1, color="red") create_test_image(img2, color="blue") duplicates = asset_manager.find_duplicates() assert len(duplicates) == 0 def test_find_duplicates_with_duplicates(self, asset_manager, tmp_path): """Test finding actual duplicate files""" # Create a source image source_img = tmp_path / "source.png" img = Image.new("RGB", (50, 50), color="green") img.save(str(source_img)) # Copy the same image twice to assets folder dup1 = os.path.join(asset_manager.assets_folder, "dup1.png") dup2 = os.path.join(asset_manager.assets_folder, "dup2.png") shutil.copy(str(source_img), dup1) shutil.copy(str(source_img), dup2) duplicates = asset_manager.find_duplicates() assert len(duplicates) == 1 # One group of duplicates # The group should contain both files for paths in duplicates.values(): assert len(paths) == 2 assert "assets/dup1.png" in paths assert "assets/dup2.png" in paths def test_get_duplicate_stats_no_duplicates(self, asset_manager, create_test_image): """Test duplicate stats when there are no duplicates""" img1 = os.path.join(asset_manager.assets_folder, "image1.png") create_test_image(img1, color="red") groups, files, bytes_to_save = asset_manager.get_duplicate_stats() assert groups == 0 assert files == 0 assert bytes_to_save == 0 def test_get_duplicate_stats_with_duplicates(self, asset_manager, tmp_path): """Test duplicate stats with actual duplicates""" # Create a source image source_img = tmp_path / "source.png" img = Image.new("RGB", (100, 100), color="purple") img.save(str(source_img)) # Copy to assets folder 3 times (creates 2 duplicates) for i in range(3): dest = os.path.join(asset_manager.assets_folder, f"image{i}.png") shutil.copy(str(source_img), dest) groups, files, bytes_to_save = asset_manager.get_duplicate_stats() assert groups == 1 # One group assert files == 2 # Two extra copies to remove assert bytes_to_save > 0 def test_deduplicate_assets_removes_files(self, asset_manager, tmp_path): """Test that deduplication actually removes duplicate files""" # Create a source image source_img = tmp_path / "source.png" img = Image.new("RGB", (50, 50), color="yellow") img.save(str(source_img)) # Copy to assets folder 3 times for i in range(3): dest = os.path.join(asset_manager.assets_folder, f"image{i}.png") shutil.copy(str(source_img), dest) asset_manager.reference_counts[f"assets/image{i}.png"] = 1 # Count files before files_before = len(os.listdir(asset_manager.assets_folder)) assert files_before == 3 # Run deduplication files_removed, bytes_saved = asset_manager.deduplicate_assets() # Check results assert files_removed == 2 assert bytes_saved > 0 # Count files after files_after = len(os.listdir(asset_manager.assets_folder)) assert files_after == 1 def test_deduplicate_assets_updates_callback(self, asset_manager, tmp_path): """Test that deduplication calls the update callback correctly""" # Create a source image source_img = tmp_path / "source.png" img = Image.new("RGB", (50, 50), color="cyan") img.save(str(source_img)) # Copy to assets folder dest1 = os.path.join(asset_manager.assets_folder, "a_first.png") dest2 = os.path.join(asset_manager.assets_folder, "b_second.png") shutil.copy(str(source_img), dest1) shutil.copy(str(source_img), dest2) # Track callback invocations callback_calls = [] def track_callback(old_path, new_path): callback_calls.append((old_path, new_path)) # Run deduplication asset_manager.deduplicate_assets(update_references_callback=track_callback) # Callback should have been called for the duplicate assert len(callback_calls) == 1 # b_second.png should be remapped to a_first.png (alphabetical order) assert callback_calls[0] == ("assets/b_second.png", "assets/a_first.png") def test_deduplicate_assets_transfers_reference_counts(self, asset_manager, tmp_path): """Test that reference counts are properly transferred during deduplication""" # Create a source image source_img = tmp_path / "source.png" img = Image.new("RGB", (50, 50), color="magenta") img.save(str(source_img)) # Copy to assets folder dest1 = os.path.join(asset_manager.assets_folder, "a_first.png") dest2 = os.path.join(asset_manager.assets_folder, "b_second.png") shutil.copy(str(source_img), dest1) shutil.copy(str(source_img), dest2) # Set reference counts asset_manager.reference_counts["assets/a_first.png"] = 2 asset_manager.reference_counts["assets/b_second.png"] = 3 # Run deduplication asset_manager.deduplicate_assets() # Check reference counts were merged assert asset_manager.reference_counts.get("assets/a_first.png") == 5 assert "assets/b_second.png" not in asset_manager.reference_counts def test_serialize_includes_hashes(self, asset_manager, create_test_image): """Test that serialization includes asset hashes""" img1 = os.path.join(asset_manager.assets_folder, "image1.png") create_test_image(img1, color="red") asset_manager.compute_all_hashes() data = asset_manager.serialize() assert "asset_hashes" in data assert "assets/image1.png" in data["asset_hashes"] def test_deserialize_restores_hashes(self, asset_manager): """Test that deserialization restores asset hashes""" test_data = { "reference_counts": {"assets/test.png": 1}, "asset_hashes": {"assets/test.png": "abc123hash"} } asset_manager.deserialize(test_data) assert asset_manager.asset_hashes.get("assets/test.png") == "abc123hash" def test_compute_asset_hash_single_file(self, asset_manager, create_test_image): """Test computing hash for a single asset""" img_path = os.path.join(asset_manager.assets_folder, "single.png") create_test_image(img_path, color="orange") hash_result = asset_manager.compute_asset_hash("assets/single.png") assert hash_result is not None assert "assets/single.png" in asset_manager.asset_hashes assert asset_manager.asset_hashes["assets/single.png"] == hash_result class TestAssetManagerIntegration: """Integration tests for AssetManager with import and deduplication""" @pytest.fixture def asset_manager(self, tmp_path): """Create an AssetManager with a temporary project folder""" project_folder = str(tmp_path / "test_project") os.makedirs(project_folder) return AssetManager(project_folder) def test_import_then_deduplicate(self, asset_manager, tmp_path): """Test importing duplicate images and then deduplicating""" # Create a source image source_img = tmp_path / "source.png" img = Image.new("RGB", (80, 80), color="navy") img.save(str(source_img)) # Import the same image twice path1 = asset_manager.import_asset(str(source_img)) path2 = asset_manager.import_asset(str(source_img)) assert path1 != path2 # Should have different names due to collision handling # Check both files exist assert os.path.exists(asset_manager.get_absolute_path(path1)) assert os.path.exists(asset_manager.get_absolute_path(path2)) # Find duplicates duplicates = asset_manager.find_duplicates() assert len(duplicates) == 1 # Deduplicate files_removed, _ = asset_manager.deduplicate_assets() assert files_removed == 1 # Only one file should remain files_in_assets = os.listdir(asset_manager.assets_folder) assert len(files_in_assets) == 1 class TestAssetManagerUnused: """Tests for AssetManager unused asset detection and removal""" @pytest.fixture def asset_manager(self, tmp_path): """Create an AssetManager with a temporary project folder""" project_folder = str(tmp_path / "test_project") os.makedirs(project_folder) return AssetManager(project_folder) @pytest.fixture def create_test_image(self): """Factory fixture for creating test images""" def _create(path, color="red", size=(100, 100)): img = Image.new("RGB", size, color=color) img.save(path) return path return _create def test_find_unused_assets_empty_folder(self, asset_manager): """Test finding unused assets in empty folder""" unused = asset_manager.find_unused_assets() assert len(unused) == 0 def test_find_unused_assets_all_referenced(self, asset_manager, create_test_image): """Test finding unused assets when all are referenced""" img1 = os.path.join(asset_manager.assets_folder, "image1.png") img2 = os.path.join(asset_manager.assets_folder, "image2.png") create_test_image(img1, color="red") create_test_image(img2, color="blue") # Add references for both asset_manager.reference_counts["assets/image1.png"] = 1 asset_manager.reference_counts["assets/image2.png"] = 2 unused = asset_manager.find_unused_assets() assert len(unused) == 0 def test_find_unused_assets_some_unreferenced(self, asset_manager, create_test_image): """Test finding unused assets when some have no references""" img1 = os.path.join(asset_manager.assets_folder, "used.png") img2 = os.path.join(asset_manager.assets_folder, "unused.png") create_test_image(img1, color="red") create_test_image(img2, color="blue") # Only reference one asset_manager.reference_counts["assets/used.png"] = 1 unused = asset_manager.find_unused_assets() assert len(unused) == 1 assert "assets/unused.png" in unused def test_find_unused_assets_zero_reference_count(self, asset_manager, create_test_image): """Test that zero reference count is considered unused""" img = os.path.join(asset_manager.assets_folder, "orphan.png") create_test_image(img, color="red") # Set reference count to 0 asset_manager.reference_counts["assets/orphan.png"] = 0 unused = asset_manager.find_unused_assets() assert len(unused) == 1 assert "assets/orphan.png" in unused def test_get_unused_stats_no_unused(self, asset_manager, create_test_image): """Test unused stats when all assets are referenced""" img = os.path.join(asset_manager.assets_folder, "image.png") create_test_image(img, color="red") asset_manager.reference_counts["assets/image.png"] = 1 count, total_bytes = asset_manager.get_unused_stats() assert count == 0 assert total_bytes == 0 def test_get_unused_stats_with_unused(self, asset_manager, create_test_image): """Test unused stats with unreferenced files""" img1 = os.path.join(asset_manager.assets_folder, "unused1.png") img2 = os.path.join(asset_manager.assets_folder, "unused2.png") create_test_image(img1, color="red") create_test_image(img2, color="blue") # No references for either file count, total_bytes = asset_manager.get_unused_stats() assert count == 2 assert total_bytes > 0 def test_remove_unused_assets_removes_files(self, asset_manager, create_test_image): """Test that unused assets are actually removed""" used_path = os.path.join(asset_manager.assets_folder, "used.png") unused_path = os.path.join(asset_manager.assets_folder, "unused.png") create_test_image(used_path, color="red") create_test_image(unused_path, color="blue") # Only reference the used file asset_manager.reference_counts["assets/used.png"] = 1 # Remove unused files_removed, bytes_freed = asset_manager.remove_unused_assets() assert files_removed == 1 assert bytes_freed > 0 # Check files on disk assert os.path.exists(used_path) assert not os.path.exists(unused_path) def test_remove_unused_assets_no_unused(self, asset_manager, create_test_image): """Test removing unused when all assets are referenced""" img = os.path.join(asset_manager.assets_folder, "used.png") create_test_image(img, color="red") asset_manager.reference_counts["assets/used.png"] = 1 files_removed, bytes_freed = asset_manager.remove_unused_assets() assert files_removed == 0 assert bytes_freed == 0 assert os.path.exists(img) def test_remove_unused_assets_cleans_tracking(self, asset_manager, create_test_image): """Test that removing unused assets cleans up internal tracking""" img = os.path.join(asset_manager.assets_folder, "orphan.png") create_test_image(img, color="red") # Set up tracking with zero refs and a hash asset_manager.reference_counts["assets/orphan.png"] = 0 asset_manager.asset_hashes["assets/orphan.png"] = "somehash" asset_manager.remove_unused_assets() # Tracking should be cleaned up assert "assets/orphan.png" not in asset_manager.reference_counts assert "assets/orphan.png" not in asset_manager.asset_hashes def test_remove_unused_preserves_referenced(self, asset_manager, create_test_image): """Test that removing unused preserves all referenced assets""" # Create several files for i in range(5): img = os.path.join(asset_manager.assets_folder, f"image{i}.png") create_test_image(img, color="red") # Reference only some of them asset_manager.reference_counts["assets/image0.png"] = 1 asset_manager.reference_counts["assets/image2.png"] = 3 asset_manager.reference_counts["assets/image4.png"] = 1 files_removed, _ = asset_manager.remove_unused_assets() assert files_removed == 2 # image1 and image3 # Check that referenced files still exist assert os.path.exists(os.path.join(asset_manager.assets_folder, "image0.png")) assert os.path.exists(os.path.join(asset_manager.assets_folder, "image2.png")) assert os.path.exists(os.path.join(asset_manager.assets_folder, "image4.png")) # Check that unreferenced files are gone assert not os.path.exists(os.path.join(asset_manager.assets_folder, "image1.png")) assert not os.path.exists(os.path.join(asset_manager.assets_folder, "image3.png"))