aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/mikespub/php-epub-meta/src/Data/Item.php
blob: 27146292090a84d2d3c33bffcad58e0b00286936 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
<?php

namespace SebLucas\EPubMeta\Data;

use SebLucas\EPubMeta\Tools\HtmlTools;
use DOMDocument;
use DOMElement;
use DOMText;
use DOMXPath;
use Exception;

/**
 * An item of the EPUB manifest.
 *
 * @author Simon Schrape <simon@epubli.com>
 */
class Item
{
    public const XHTML = 'application/xhtml+xml';
    /** @var string */
    protected $id;
    /** @var string The path to the corresponding file. */
    protected $href;
    /** @var string */
    protected $mediaType;
    /** @var callable|null A callable to get data from the referenced file. */
    protected $dataCallable;
    /** @var string The data read from the referenced file. */
    protected $data;
    /** @var int The size of the referenced file. */
    protected $size;

    /**
     * @param string $id This Item’s identifier.
     * @param string $href The path to the corresponding file.
     * @param callable $dataCallable A callable to get data from the referenced file.
     * @param int $size The size of the referenced file.
     * @param string|null $mediaType The media type of the corresponding file. If omitted XHTML is assumed.
     */
    public function __construct($id, $href, $dataCallable, $size, $mediaType = null)
    {
        $this->id = $id;
        $this->href = $href;
        $this->dataCallable = $dataCallable;
        $this->size = $size;
        $this->mediaType = $mediaType ?: static::XHTML;
    }

    /**
     * @return string
     */
    public function getId()
    {
        return $this->id;
    }

    /**
     * @return string
     */
    public function getHref()
    {
        return $this->href;
    }

    /**
     * @return string
     */
    public function getMediaType()
    {
        return $this->mediaType;
    }

    /**
     * Extract (a part of) the contents from the referenced XML file.
     *
     * @param string|null $fragmentBegin ID of the element where to start reading the contents.
     * @param string|null $fragmentEnd ID of the element where to stop reading the contents.
     * @param bool $keepMarkup Whether to keep the XHTML markup rather than extracted plain text.
     * @return string The contents of that fragment.
     * @throws Exception
     */
    public function getContents($fragmentBegin = null, $fragmentEnd = null, $keepMarkup = false)
    {
        $dom = new DOMDocument();
        $dom->loadXML(HtmlTools::convertEntitiesNamedToNumeric($this->getData()));

        // get the starting point
        if ($fragmentBegin) {
            $xp = new DOMXPath($dom);
            $node = $xp->query("//*[@id='$fragmentBegin']")->item(0);
            if (!$node) {
                throw new Exception("Begin of fragment not found: No element with ID $fragmentBegin!");
            }
        } else {
            $node = $dom->getElementsByTagName('body')->item(0) ?: $dom->documentElement;
        }

        $allowableTags = [
            'br',
            'p',
            'h1',
            'h2',
            'h3',
            'h4',
            'h5',
            'span',
            'div',
            'i',
            'strong',
            'b',
            'table',
            'td',
            'th',
            'tr',
        ];
        $contents = '';
        $endTags = [];
        /** @var DOMElement|DOMText $node */
        // traverse DOM structure till end point is reached, accumulating the contents
        while ($node && (!$fragmentEnd || !$node->hasAttributes() || $node->getAttribute('id') != $fragmentEnd)) {
            if ($node instanceof DOMText) {
                // when encountering a text node append its value to the contents
                $contents .= $keepMarkup ? htmlspecialchars($node->nodeValue) : $node->nodeValue;
            } elseif ($node instanceof DOMElement) {
                $tag = $node->localName;
                if ($keepMarkup && in_array($tag, $allowableTags)) {
                    $contents .= "<$tag>";
                    $endTags[] = "</$tag>";
                } elseif (HtmlTools::isBlockLevelElement($tag)) {
                    // add whitespace between contents of adjacent blocks
                    $endTags[] = PHP_EOL;
                } else {
                    $endTags[] = '';
                }

                if ($node->hasChildNodes()) {
                    // step into
                    $node = $node->firstChild;
                    continue;
                }
            }

            // leave node
            while ($node) {
                if ($node instanceof DOMElement) {
                    $contents .= array_pop($endTags);
                }

                if ($node->nextSibling) {
                    // step right
                    $node = $node->nextSibling;
                    break;
                } elseif ($node = $node->parentNode) {
                    // step out
                    continue;
                } elseif ($fragmentEnd) {
                    // reached end of DOM without finding fragment end
                    throw new Exception("End of fragment not found: No element with ID $fragmentEnd!");
                }
            }
        }
        while ($endTags) {
            $contents .= array_pop($endTags);
        }

        return $contents;
    }

    /**
     * Get the file data.
     *
     * @return string The binary data of the corresponding file.
     */
    public function getData()
    {
        if ($this->dataCallable) {
            $this->data = call_user_func($this->dataCallable);
            $this->dataCallable = null;
        }

        return $this->data;
    }

    /**
     * Get the size of the corresponding file.
     *
     * @return int
     */
    public function getSize()
    {
        return $this->size ?: strlen($this->getData());
    }
}