aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/mikespub/php-epub-meta/src/Tools/HtmlTools.php
blob: f905d265ad086eb84904e5ea5ec0b73c385679e1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
<?php

namespace SebLucas\EPubMeta\Tools;

/**
 * From Epubli\Common\Tools - see https://github.com/epubli/common
 * @author Epubli Developers <devs@epubli.com>
 */
class HtmlTools
{
    /**
     * @param string $html
     * @return string
     */
    public static function convertEntitiesNamedToNumeric($html)
    {
        return strtr($html, include(__DIR__ . '/htmlEntityMap.php'));
    }

    /**
     * @param string $name
     * @return bool
     */
    public static function isBlockLevelElement($name)
    {
        return in_array($name, include(__DIR__ . '/htmlBlockLevelElements.php'));
    }

    /**
     * performs a tag-aware truncation of (html-) strings, preserving tag integrity
     * @param array<string>|string $html
     * @param int|string $length
     * @return bool|string
     */
    public static function truncate($html, $length = "20%")
    {
        $htmls = is_array($html) ? $html : [$html];
        foreach ($htmls as &$htmlString) {
            if (is_string($length)) {
                $length = trim($length);
                /* interpret percentage value */
                if (substr($length, -1) == '%') {
                    $length = (int) (strlen($htmlString) * intval(substr($length, 0, -1)) / 100);
                }
            }
            $htmlString = substr($htmlString, 0, $length);
            /* eliminate trailing truncated tag fragment if present */
            $htmlString = preg_replace('/<[^>]*$/is', '', $htmlString);
        }

        return is_array($html) ? $htmls : array_pop($htmls);
    }

    /**
     * strips all occurring html tags from $html (which can either be a string or an array of strings),
     * preserving all content enclosed by all tags in $keep and
     * dumping the content residing in all tags listed in $drop
     * @param array<string>|string $html
     * @param array<string> $keep
     * @param array<string> $drop
     * @return array<string>|string
     */
    public static function stripHtmlTags(
        $html,
        $keep =
        ['title', 'br', 'p', 'h1','h2','h3','h4','h5','span','div','i','strong','b', 'table', 'td', 'th', 'tr'],
        $drop =
        ['head','style']
    ) {
        $htmls = is_array($html) ? $html : [$html];
        foreach ($htmls as &$htmlString) {
            foreach ($drop as $dumpTag) {
                $htmlString = preg_replace("/<$dumpTag.*$dumpTag>/is", "\n", $htmlString);
            }
            $htmlString = preg_replace("/[\n\r ]{2,}/i", "\n", $htmlString);
            $htmlString = preg_replace("/[\n|\r]/i", '<br />', $htmlString);

            /* @TODO: remove style tags and only keep body content (drop head) */
            $tempFunc = function ($matches) use ($keep) {
                $htmlNode = "<" . $matches[1] . ">" . strip_tags($matches[2]) . "</" . $matches[1] . ">";
                if (in_array($matches[1], $keep)) {
                    return " " . $htmlNode . " ";
                } else {
                    return "";
                }
            };

            $allowedTags = implode("|", array_values($keep));
            $regExp = '@<(' . $allowedTags . ')[^>]*?>(.*?)<\/\1>@i';
            $htmlString = preg_replace_callback($regExp, $tempFunc, $htmlString);

            $htmlString = strip_tags($htmlString, "<" . implode("><", $keep) . ">");
        }
        /* preserve injected variable cast type (string|array) when returning processed entity */
        return is_array($html) ? $htmls : array_pop($htmls);
    }
}