diff options
author | Harald Eilertsen <haraldei@anduin.net> | 2025-02-04 22:51:59 +0100 |
---|---|---|
committer | Harald Eilertsen <haraldei@anduin.net> | 2025-02-12 20:05:42 +0100 |
commit | 0dd456c65399f320062a23a06aca912e298c9bd1 (patch) | |
tree | 94a2ad9bd848698af1ba9e1bd1a444f669ef961b | |
parent | 97ba14cbe05c4a679c523f2d9b1237610093fe6a (diff) | |
download | volse-hubzilla-0dd456c65399f320062a23a06aca912e298c9bd1.tar.gz volse-hubzilla-0dd456c65399f320062a23a06aca912e298c9bd1.tar.bz2 volse-hubzilla-0dd456c65399f320062a23a06aca912e298c9bd1.zip |
Replace the the code to extract epub thumbnails
The PHP Epub Meta library has a dependency that prevents deployment on
32bit architectures. We also don't need all the functionality in that
library, so this patch replaces it with our own simplified code for
fetching the cover embedded in Epub archives.
We also expand the test suite and clean up some minor issues in the
Epubthumbnail class.
-rw-r--r-- | Zotlabs/Thumbs/Epubthumb.php | 148 | ||||
-rw-r--r-- | tests/unit/Thumb/EpubthumbTest.php | 145 |
2 files changed, 279 insertions, 14 deletions
diff --git a/Zotlabs/Thumbs/Epubthumb.php b/Zotlabs/Thumbs/Epubthumb.php index b50583e30..af372e85c 100644 --- a/Zotlabs/Thumbs/Epubthumb.php +++ b/Zotlabs/Thumbs/Epubthumb.php @@ -2,8 +2,11 @@ namespace Zotlabs\Thumbs; -use SebLucas\EPubMeta\EPub; +use DOMDocument; +use DOMElement; +use DOMXPath; use GdImage; +use ZipArchive; /** * Thumbnail creation for epub files. @@ -24,20 +27,21 @@ class Epubthumb { * Create the thumbnail if the Epub has a cover. * * @param array $attach - * @param number $preview_style unused - * @param number $height (optional) default 300 - * @param number $width (optional) default 300 + * @param int $preview_style unused + * @param int $height (optional) default 300 + * @param int $width (optional) default 300 * * @SuppressWarnings(PHPMD.UnusedFormalParameter) + * phpcs:disable Generic.CodeAnalysis.UnusedFunctionParameter.FoundBeforeLastUsed */ - function Thumb($attach, $preview_style, $height = 300, $width = 300) { + function Thumb($attach, $preview_style, $height = 300, $width = 300): void { $file = dbunescbin($attach['content']); if (!$file) { return; } - $image = $this->getCover($file); + $image = $this->getCoverFromEpub($file); if ($image) { $srcwidth = imagesx($image); @@ -56,15 +60,139 @@ class Epubthumb { } } - private function getCover(string $filename): GdImage|false { - $epub = new EPub($filename); - $cover = $epub->getCover(); + /** + * Fetch the cover from the epub archive, if it's present. + * + * There's a few limitations here: This will only work if the cover + * is a raster image of a supported format. SVG does not work, neither + * will other schemes sometimes used for cover/front page. + * + * @param string $filename The local filename of the epub archive. + * + * @return GdImage|false If a cover is found, it is returned as a + * GdImage object. Otherwise return false. + */ + private function getCoverFromEpub(string $filename): GdImage|false { + $epub = new ZipArchive(); + $rc = $epub->open($filename, ZipArchive::RDONLY); + + if ($rc !== true) { + logger("Error opening file '{$filename}': rc = ${rc}.", LOGGER_DEBUG, LOG_DEBUG); + return false; + } + + $cover = false; + $cover_name = $this->parseEpub($epub); + if ($cover_name !== false) { + $cover = $epub->getFromName($cover_name); + if ($cover === false) { + logger("File '{$cover_name}' not found in EPUB.", LOGGER_DEBUG, LOG_DEBUG); + } + } + + $epub->close(); - if (! empty($cover)) { + if ($cover !== false && !empty($cover)) { return imagecreatefromstring($cover); } else { return false; } } + + /** + * Parse the epub to find the path of the cover image. + * + * @param ZipArchive $epub An opened epub ZipArchive. + * + * @return string|false The path to the cover image or false. + */ + private function parseEpub(ZipArchive $epub): string|false { + $packagePath = $this->getEpubPackagePath($epub); + if ($packagePath !== false) { + $package = $epub->getFromName($packagePath); + if ($package === false || empty($package)) { + logger("Package file '${packagePath}' not found in EPUB", LOGGER_DEBUG, LOG_DEBUG); + return false; + } + + $domdoc = new DOMDocument(); + $domdoc->loadXML($package); + $xpath = new DOMXPath($domdoc); + $xpath->registerNamespace("n", "http://www.idpf.org/2007/opf"); + $nodes = $xpath->query('/n:package/n:manifest/n:item[@properties="cover-image"]'); + + if ($nodes->count() === 0) { + logger('No cover found in EPUB manifest.', LOGGER_DEBUG, LOG_DEBUG); + return false; + } + + $node = $nodes->item(0); + if ($node === null) { + logger('No nodes in non-empty node list?', LOGGER_DEBUG, LOG_DEBUG); + return false; + } + + if (is_a($node, DOMElement::class)) { + // The URL's in the package file is relative to the subdirectory + // within the epub archive where it is located. See + // https://www.w3.org/TR/epub-33/#sec-parsing-urls-metainf + return dirname($packagePath) . '/' . $node->getAttribute('href'); + } + } + + return false; + } + + /** + * Locate the package file within the epub. + * + * The package file in an epub archive contains the manifest + * that again may contain a reference to the cover for the + * epub. + * + * @param ZipArchive $epub An opened epub archive. + * + * @return string|false The full pathname of the package file or false. + */ + private function getEpubPackagePath(ZipArchive $epub): string|false { + // + // The only mandatory known file within the archive is the + // container file, so we fetch it to find the reference to + // the package file. + // + // See: https://www.w3.org/TR/epub-33/#sec-container-metainf + // + $container = $epub->getFromName('META-INF/container.xml'); + + if ($container === false || empty($container)) { + logger('No container in archive, probably not an EPUB.', LOGGER_DEBUG, LOG_DEBUG); + return false; + } + + $domdoc = new DOMDocument(); + $domdoc->loadXML($container); + $nodes = $domdoc->getElementsByTagName('rootfile'); + + if ($nodes->count() == 0) { + logger('EPUB rootfile not found, is this an epub?', LOGGER_DEBUG, LOG_DEBUG); + return false; + } + + $packageNode = $nodes->item(0); + if ($packageNode === null || !is_a($packageNode, DOMElement::class)) { + logger('EPUB rootfile element missing or invalid.', LOGGER_DEBUG, LOG_DEBUG); + return false; + } + + $packagePath = $packageNode->getAttribute('full-path'); + $packageMediaType = $packageNode->getAttribute('media-type'); + + if (empty($packagePath) || $packageMediaType !== 'application/oebps-package+xml') { + logger('EPUB package path missing or incorrect media type.', LOGGER_DEBUG, LOG_DEBUG); + return false; + } + + return $packagePath; + } } diff --git a/tests/unit/Thumb/EpubthumbTest.php b/tests/unit/Thumb/EpubthumbTest.php index 5dabaf359..d381d940e 100644 --- a/tests/unit/Thumb/EpubthumbTest.php +++ b/tests/unit/Thumb/EpubthumbTest.php @@ -8,14 +8,151 @@ namespace Zotlabs\Tests\Unit\Thumbs; +use PHPUnit\Framework\Attributes\{AfterClass, Before, BeforeClass}; use Zotlabs\Thumbs\Epubthumb; use Zotlabs\Tests\Unit\UnitTestCase; +use ZipArchive; + class EpubthumbTest extends UnitTestCase { - function testEpubThumbMatch(): void { - $thumbnailer = new Epubthumb(); + private const TMPDIR = __DIR__ . '/tmp'; + + private Epubthumb $thumbnailer; + + /** + * Create a temp dir to use for the tests in this class. + */ + #[BeforeClass] + static function setupTmpDir(): void { + if (!is_dir(self::TMPDIR)) { + mkdir(self::TMPDIR); + } + } + + /** + * Clean up and remove the temp dir after the tests. + */ + #[AfterClass] + static function cleanupTmpDir(): void { + $files = scandir(self::TMPDIR); + if ($files !== false) { + foreach($files as $f) { + if ($f[0] !== '.') { + unlink(self::TMPDIR . '/' . $f); + } + } + } + rmdir(self::TMPDIR); + } + + /** + * Create the thumbnailer object for tests. + * + * This is run before each test, so that each test has it's own + * instance of the thumbnailer. + */ + #[Before] + function createThumbnailer(): void { + $this->thumbnailer = new Epubthumb(); + } + + /* + * Tests + */ + + public function testEpubThumbMatch(): void { + $this->assertTrue($this->thumbnailer->Match('application/epub+zip')); + $this->assertFalse($this->thumbnailer->Match('application/zip')); + } + + public function testNoThumbnailCreatedForFileThatDontExist(): void { + $this->checkCreateThumbnail(self::TMPDIR . '/nonexisting.epub', false); + } + + public function testNoThumbnailCreatedIfNotAZipArchive(): void { + $filename = self::TMPDIR . '/notazip.epub'; + + file_put_contents($filename, 'This is not a ZIP file!'); + + $this->checkCreateThumbnail($filename, false); + } + + public function testNoThumbnailCreatedIfInvalidEpub(): void { + $filename = self::TMPDIR . '/nocontainer.epub'; + + $epub = new ZipArchive(); + $epub->open($filename, ZipArchive::CREATE); + $epub->addFromString('somefile.txt', 'It was a dark an stormy night...'); + $epub->close(); + + $this->checkCreateThumbnail($filename, false); + } + + public function testNoThumbnailCreatedIfCoverFileMissing(): void { + $filename = self::TMPDIR . '/good.epub'; + + $epub = new ZipArchive(); + $epub->open($filename, ZipArchive::CREATE); + $this->addEpubContainer($epub); + $this->addEpubPackage($epub); + $epub->close(); + + $this->checkCreateThumbnail($filename, false); + } + + public function testCreateCoverFromEpub(): void { + $filename = self::TMPDIR . '/good.epub'; + + $epub = new ZipArchive(); + $epub->open($filename, ZipArchive::CREATE); + $this->addEpubContainer($epub); + $this->addEpubPackage($epub); + $epub->addFile(PROJECT_BASE . '/images/red-koala.png', 'EPUB/cover.png'); + $epub->close(); + + $this->checkCreateThumbnail($filename, true); + } + + /* + * Helper functions + */ + + private function checkCreateThumbnail(string $filename, bool $expectThumbnail): void { + $attach = [ 'content' => $filename ]; + $this->thumbnailer->Thumb($attach, 0); + + $this->assertEquals($expectThumbnail, file_exists($filename . '.thumb')); + } + + private function addEpubContainer(ZipArchive $epub): void { + $xml = <<<XML + <?xml version="1.0" encoding="UTF-8"?> + <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> + <rootfiles> + <rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/> + </rootfiles> + </container> + XML; + + $epub->addEmptyDir('META-INF'); + $epub->addFromString('META-INF/container.xml', $xml); + } + + private function addEpubPackage(ZipArchive $epub): void { + $xml = <<<XML + <?xml version="1.0" encoding="UTF-8"?> + <package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="pub-identifier"> + <manifest> + <item + properties="cover-image" + id="ci" + href="cover.png" + media-type="image/png" /> + </manifest> + </package> + XML; - $this->assertTrue($thumbnailer->Match('application/epub+zip')); - $this->assertFalse($thumbnailer->Match('application/zip')); + $epub->addEmptyDir('EPUB'); + $epub->addFromString('EPUB/package.opf', $xml); } } |