From 6ea32a8ba31a9c459cda1b6c9694621958a7bd38 Mon Sep 17 00:00:00 2001 From: zotlabs Date: Tue, 14 Mar 2017 00:23:44 -0700 Subject: class MarkdownSoap to safely store markdown by purifying and preserving (escaped) what may be unsafe code in codeblocks. The stored item needs to be unescaped just prior to calling the markdown-to-html processor --- Zotlabs/Lib/MarkdownSoap.php | 86 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 Zotlabs/Lib/MarkdownSoap.php (limited to 'Zotlabs') diff --git a/Zotlabs/Lib/MarkdownSoap.php b/Zotlabs/Lib/MarkdownSoap.php new file mode 100644 index 000000000..d0481eb4d --- /dev/null +++ b/Zotlabs/Lib/MarkdownSoap.php @@ -0,0 +1,86 @@ +clean(); + * + * What this does: + * 1. extracts code blocks and privately escapes them from processing + * 2. Run html purifier on the content + * 3. put back the code blocks + * 4. run htmlspecialchars on the entire content for safe storage + * + * At render time: + * $markdown = \Zotlabs\Lib\MarkdownSoap::unescape($text); + * $html = \Michelf\MarkdownExtra::DefaultTransform($markdown); + */ + + + +class MarkdownSoap { + + private $token; + + private $str; + + function __construct($s) { + $this->str = $s; + $this->token = random_string(20); + } + + + function clean() { + $x = $this->extract_code($this->str); + $x = $this->purify($x); + $x = $this->putback_code($x); + $x = $this->escape($x); + + return $x; + } + + function extract_code($s) { + + $text = preg_replace_callback('{ + (?:\n\n|\A\n?) + ( # $1 = the code block -- one or more lines, starting with a space/tab + (?> + [ ]{'.'4'.'} # Lines must start with a tab or a tab-width of spaces + .*\n+ + )+ + ) + ((?=^[ ]{0,'.'4'.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + }xm', + [ $this , 'encode_code' ], $s); + + return $text; + } + + function encode_code($matches) { + return $this->token . ';' . base64_encode($matches[1]) . ';' ; + } + + function decode_code($matches) { + return base64_decode($matches[1]); + } + + function putback_code($s) { + $text = preg_replace_callback('{' . $this->token . '\;(.*?)\;}xm',[ $this, 'decode_code' ], $s); + return $text; + } + + function purify($s) { + return purify_html($s); + } + + function escape($s) { + return htmlspecialchars($s,ENT_QUOTES); + } + + static public function unescape($s) { + return htmlspecialchars_decode($s,ENT_QUOTES); + } +} -- cgit v1.2.3