test for epubs with images.
All checks were successful
Python CI / test (push) Successful in 48s

This commit is contained in:
Duncan Tourolle 2025-06-07 19:04:53 +02:00
parent 87a313b3e7
commit d40fcfe084

View File

@ -752,5 +752,79 @@ class TestEPUBIntegrationWithHTMLExtraction(unittest.TestCase):
self.assertTrue(color_found, "Should find at least one colored text")
def test_epub_with_image(self):
"""Test that images in EPUB are properly parsed."""
book = epub.EpubBook()
book.set_identifier('image-test-id')
book.set_title('Image Test Book')
book.set_language('en')
book.add_author('Test Author')
# Create minimal JPEG data for testing
img_data = b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x08\x06\x06\x07\x06\x05\x08\x07\x07\x07\t\t\x08\n\x0c\x14\r\x0c\x0b\x0b\x0c\x19\x12\x13\x0f\x14\x1d\x1a\x1f\x1e\x1d\x1a\x1c\x1c $.\' ",#\x1c\x1c(7),01444\x1f\'9=82<.342\xff\xc0\x00\x11\x08\x00d\x00d\x01\x01\x11\x00\x02\x11\x01\x03\x11\x01\xff\xc4\x00\x14\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\xff\xc4\x00\x14\x10\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xda\x00\x0c\x03\x01\x00\x02\x11\x03\x11\x00\x3f\x00\xaa\xff\xd9'
# Create an EpubImage item
image_item = epub.EpubImage()
image_item.id = 'test_img'
image_item.file_name = 'images/test_image.jpg'
image_item.media_type = 'image/jpeg'
image_item.content = img_data
# Add image to book
book.add_item(image_item)
# Create a chapter that references the image
chapter = epub.EpubHtml(
title='Image Chapter',
file_name='image_chapter.xhtml',
lang='en'
)
chapter.content = '''<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Image Chapter</title></head>
<body>
<h1>Chapter with Image</h1>
<p>This chapter contains an image:</p>
<img src="images/test_image.jpg" alt="Test image" width="300" height="200" />
<p>Text after the image.</p>
</body>
</html>'''
book.add_item(chapter)
book.toc = (epub.Link("image_chapter.xhtml", "Image Chapter", "img_ch"),)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
book.spine = ['nav', chapter]
# Write EPUB
epub_path = os.path.join(self.test_dir, f'test_image_{len(self.epub_files)}.epub')
epub.write_epub(epub_path, book, {})
self.epub_files.append(epub_path)
# Read and analyze
parsed_book = read_epub(epub_path)
chapters = list(parsed_book.chapters)
self.assertEqual(len(chapters), 1)
blocks = list(chapters[0].blocks)
self.assertGreater(len(blocks), 0)
# Find blocks by type
heading_blocks = [block for block in blocks if isinstance(block, Heading)]
paragraph_blocks = [block for block in blocks if isinstance(block, Paragraph)]
image_blocks = [block for block in blocks if isinstance(block, Image)]
# Verify we have the expected blocks
self.assertEqual(len(heading_blocks), 1, "Should find exactly one heading block")
self.assertGreaterEqual(len(paragraph_blocks), 2, "Should find at least two paragraph blocks")
self.assertEqual(len(image_blocks), 1, "Should find exactly one image block")
# Verify image properties
image_block = image_blocks[0]
self.assertEqual(image_block.alt_text, "Test image")
self.assertEqual(image_block.width, 300)
self.assertEqual(image_block.height, 200)
self.assertIn("test_image.jpg", image_block.source)
if __name__ == '__main__':
unittest.main()