1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
<?php
namespace Zotlabs\Lib;
/**
* MarkdownSoap
* Purify Markdown for storage
* $x = new MarkdownSoap($string_to_be_cleansed);
* $text = $x->clean();
*
* What this does:
* 1. extracts code blocks and privately escapes them from processing
* 2. Run html purifier on the content
* 3. put back the code blocks
* 4. run htmlspecialchars on the entire content for safe storage
*
* At render time:
* $markdown = \Zotlabs\Lib\MarkdownSoap::unescape($text);
* $html = \Michelf\MarkdownExtra::DefaultTransform($markdown);
*/
class MarkdownSoap {
private $token;
private $str;
function __construct($s) {
$this->str = $s;
$this->token = random_string(20);
}
function clean() {
$x = $this->extract_code($this->str);
$x = $this->purify($x);
$x = $this->putback_code($x);
$x = $this->escape($x);
return $x;
}
function extract_code($s) {
$text = preg_replace_callback('{
(?:\n\n|\A\n?)
( # $1 = the code block -- one or more lines, starting with a space/tab
(?>
[ ]{'.'4'.'} # Lines must start with a tab or a tab-width of spaces
.*\n+
)+
)
((?=^[ ]{0,'.'4'.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
}xm',
[ $this , 'encode_code' ], $s);
return $text;
}
function encode_code($matches) {
return $this->token . ';' . base64_encode($matches[0]) . ';' ;
}
function decode_code($matches) {
return base64_decode($matches[1]);
}
function putback_code($s) {
$text = preg_replace_callback('{' . $this->token . '\;(.*?)\;}xm',[ $this, 'decode_code' ], $s);
return $text;
}
function purify($s) {
$s = $this->protect_autolinks($s);
$s = purify_html($s);
$s = $this->unprotect_autolinks($s);
return $s;
}
function protect_autolinks($s) {
$s = preg_replace('/\<(https?\:\/\/)(.*?)\>/','[$1$2]($1$2)',$s);
return $s;
}
function unprotect_autolinks($s) {
return $s;
}
function escape($s) {
return htmlspecialchars($s,ENT_QUOTES,'UTF-8',false);
}
static public function unescape($s) {
return htmlspecialchars_decode($s,ENT_QUOTES);
}
}
|