1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
<?php
namespace SebLucas\EPubMeta\Tools;
/**
* From Epubli\Common\Tools - see https://github.com/epubli/common
* @author Epubli Developers <devs@epubli.com>
*/
class HtmlTools
{
/**
* @param string $html
* @return string
*/
public static function convertEntitiesNamedToNumeric($html)
{
return strtr($html, include(__DIR__ . '/htmlEntityMap.php'));
}
/**
* @param string $name
* @return bool
*/
public static function isBlockLevelElement($name)
{
return in_array($name, include(__DIR__ . '/htmlBlockLevelElements.php'));
}
/**
* performs a tag-aware truncation of (html-) strings, preserving tag integrity
* @param array<string>|string $html
* @param int|string $length
* @return bool|string
*/
public static function truncate($html, $length = "20%")
{
$htmls = is_array($html) ? $html : [$html];
foreach ($htmls as &$htmlString) {
if (is_string($length)) {
$length = trim($length);
/* interpret percentage value */
if (substr($length, -1) == '%') {
$length = (int) (strlen($htmlString) * intval(substr($length, 0, -1)) / 100);
}
}
$htmlString = substr($htmlString, 0, $length);
/* eliminate trailing truncated tag fragment if present */
$htmlString = preg_replace('/<[^>]*$/is', '', $htmlString);
}
return is_array($html) ? $htmls : array_pop($htmls);
}
/**
* strips all occurring html tags from $html (which can either be a string or an array of strings),
* preserving all content enclosed by all tags in $keep and
* dumping the content residing in all tags listed in $drop
* @param array<string>|string $html
* @param array<string> $keep
* @param array<string> $drop
* @return array<string>|string
*/
public static function stripHtmlTags(
$html,
$keep =
['title', 'br', 'p', 'h1','h2','h3','h4','h5','span','div','i','strong','b', 'table', 'td', 'th', 'tr'],
$drop =
['head','style']
) {
$htmls = is_array($html) ? $html : [$html];
foreach ($htmls as &$htmlString) {
foreach ($drop as $dumpTag) {
$htmlString = preg_replace("/<$dumpTag.*$dumpTag>/is", "\n", $htmlString);
}
$htmlString = preg_replace("/[\n\r ]{2,}/i", "\n", $htmlString);
$htmlString = preg_replace("/[\n|\r]/i", '<br />', $htmlString);
/* @TODO: remove style tags and only keep body content (drop head) */
$tempFunc = function ($matches) use ($keep) {
$htmlNode = "<" . $matches[1] . ">" . strip_tags($matches[2]) . "</" . $matches[1] . ">";
if (in_array($matches[1], $keep)) {
return " " . $htmlNode . " ";
} else {
return "";
}
};
$allowedTags = implode("|", array_values($keep));
$regExp = '@<(' . $allowedTags . ')[^>]*?>(.*?)<\/\1>@i';
$htmlString = preg_replace_callback($regExp, $tempFunc, $htmlString);
$htmlString = strip_tags($htmlString, "<" . implode("><", $keep) . ">");
}
/* preserve injected variable cast type (string|array) when returning processed entity */
return is_array($html) ? $htmls : array_pop($htmls);
}
}
|