diff options
author | Harald Eilertsen <haraldei@anduin.net> | 2024-11-07 19:23:35 +0100 |
---|---|---|
committer | Harald Eilertsen <haraldei@anduin.net> | 2024-11-08 16:43:29 +0100 |
commit | fe30b5497593dcfb4445d72c99fa357011cebf46 (patch) | |
tree | 6d4d89fed5dcd52e26dd1e07e7cae04206c737d4 /vendor/mikespub/php-epub-meta/src/Data/Item.php | |
parent | b00ae997a5dab923a99e1f1cccf35bb52eba9a62 (diff) | |
download | volse-hubzilla-fe30b5497593dcfb4445d72c99fa357011cebf46.tar.gz volse-hubzilla-fe30b5497593dcfb4445d72c99fa357011cebf46.tar.bz2 volse-hubzilla-fe30b5497593dcfb4445d72c99fa357011cebf46.zip |
Update php-epub-meta and use composer for dep handling.
Note that we upgrade to the 2.x branch of the dependency, as the 3.x
branch requires PHP version 8.2 or later. There's no reason for us to
move our minimum supported version of PHP just yet.
Diffstat (limited to 'vendor/mikespub/php-epub-meta/src/Data/Item.php')
-rw-r--r-- | vendor/mikespub/php-epub-meta/src/Data/Item.php | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/vendor/mikespub/php-epub-meta/src/Data/Item.php b/vendor/mikespub/php-epub-meta/src/Data/Item.php new file mode 100644 index 000000000..271462920 --- /dev/null +++ b/vendor/mikespub/php-epub-meta/src/Data/Item.php @@ -0,0 +1,193 @@ +<?php + +namespace SebLucas\EPubMeta\Data; + +use SebLucas\EPubMeta\Tools\HtmlTools; +use DOMDocument; +use DOMElement; +use DOMText; +use DOMXPath; +use Exception; + +/** + * An item of the EPUB manifest. + * + * @author Simon Schrape <simon@epubli.com> + */ +class Item +{ + public const XHTML = 'application/xhtml+xml'; + /** @var string */ + protected $id; + /** @var string The path to the corresponding file. */ + protected $href; + /** @var string */ + protected $mediaType; + /** @var callable|null A callable to get data from the referenced file. */ + protected $dataCallable; + /** @var string The data read from the referenced file. */ + protected $data; + /** @var int The size of the referenced file. */ + protected $size; + + /** + * @param string $id This Item’s identifier. + * @param string $href The path to the corresponding file. + * @param callable $dataCallable A callable to get data from the referenced file. + * @param int $size The size of the referenced file. + * @param string|null $mediaType The media type of the corresponding file. If omitted XHTML is assumed. + */ + public function __construct($id, $href, $dataCallable, $size, $mediaType = null) + { + $this->id = $id; + $this->href = $href; + $this->dataCallable = $dataCallable; + $this->size = $size; + $this->mediaType = $mediaType ?: static::XHTML; + } + + /** + * @return string + */ + public function getId() + { + return $this->id; + } + + /** + * @return string + */ + public function getHref() + { + return $this->href; + } + + /** + * @return string + */ + public function getMediaType() + { + return $this->mediaType; + } + + /** + * Extract (a part of) the contents from the referenced XML file. + * + * @param string|null $fragmentBegin ID of the element where to start reading the contents. + * @param string|null $fragmentEnd ID of the element where to stop reading the contents. + * @param bool $keepMarkup Whether to keep the XHTML markup rather than extracted plain text. + * @return string The contents of that fragment. + * @throws Exception + */ + public function getContents($fragmentBegin = null, $fragmentEnd = null, $keepMarkup = false) + { + $dom = new DOMDocument(); + $dom->loadXML(HtmlTools::convertEntitiesNamedToNumeric($this->getData())); + + // get the starting point + if ($fragmentBegin) { + $xp = new DOMXPath($dom); + $node = $xp->query("//*[@id='$fragmentBegin']")->item(0); + if (!$node) { + throw new Exception("Begin of fragment not found: No element with ID $fragmentBegin!"); + } + } else { + $node = $dom->getElementsByTagName('body')->item(0) ?: $dom->documentElement; + } + + $allowableTags = [ + 'br', + 'p', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'span', + 'div', + 'i', + 'strong', + 'b', + 'table', + 'td', + 'th', + 'tr', + ]; + $contents = ''; + $endTags = []; + /** @var DOMElement|DOMText $node */ + // traverse DOM structure till end point is reached, accumulating the contents + while ($node && (!$fragmentEnd || !$node->hasAttributes() || $node->getAttribute('id') != $fragmentEnd)) { + if ($node instanceof DOMText) { + // when encountering a text node append its value to the contents + $contents .= $keepMarkup ? htmlspecialchars($node->nodeValue) : $node->nodeValue; + } elseif ($node instanceof DOMElement) { + $tag = $node->localName; + if ($keepMarkup && in_array($tag, $allowableTags)) { + $contents .= "<$tag>"; + $endTags[] = "</$tag>"; + } elseif (HtmlTools::isBlockLevelElement($tag)) { + // add whitespace between contents of adjacent blocks + $endTags[] = PHP_EOL; + } else { + $endTags[] = ''; + } + + if ($node->hasChildNodes()) { + // step into + $node = $node->firstChild; + continue; + } + } + + // leave node + while ($node) { + if ($node instanceof DOMElement) { + $contents .= array_pop($endTags); + } + + if ($node->nextSibling) { + // step right + $node = $node->nextSibling; + break; + } elseif ($node = $node->parentNode) { + // step out + continue; + } elseif ($fragmentEnd) { + // reached end of DOM without finding fragment end + throw new Exception("End of fragment not found: No element with ID $fragmentEnd!"); + } + } + } + while ($endTags) { + $contents .= array_pop($endTags); + } + + return $contents; + } + + /** + * Get the file data. + * + * @return string The binary data of the corresponding file. + */ + public function getData() + { + if ($this->dataCallable) { + $this->data = call_user_func($this->dataCallable); + $this->dataCallable = null; + } + + return $this->data; + } + + /** + * Get the size of the corresponding file. + * + * @return int + */ + public function getSize() + { + return $this->size ?: strlen($this->getData()); + } +} |