diff options
author | zotlabs <mike@macgirvin.com> | 2017-03-14 00:23:44 -0700 |
---|---|---|
committer | Mario Vavti <mario@mariovavti.com> | 2017-03-29 13:37:36 +0200 |
commit | 6ea32a8ba31a9c459cda1b6c9694621958a7bd38 (patch) | |
tree | 3df6aec96aab622e2df991f64d17d9ba5da5125c | |
parent | 8d72cea2d1008dd73ff5f4c7442a4cc0c718eb5c (diff) | |
download | volse-hubzilla-6ea32a8ba31a9c459cda1b6c9694621958a7bd38.tar.gz volse-hubzilla-6ea32a8ba31a9c459cda1b6c9694621958a7bd38.tar.bz2 volse-hubzilla-6ea32a8ba31a9c459cda1b6c9694621958a7bd38.zip |
class MarkdownSoap to safely store markdown by purifying and preserving (escaped) what may be unsafe code in codeblocks. The stored item needs to be unescaped just prior to calling the markdown-to-html processor
-rw-r--r-- | Zotlabs/Lib/MarkdownSoap.php | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/Zotlabs/Lib/MarkdownSoap.php b/Zotlabs/Lib/MarkdownSoap.php new file mode 100644 index 000000000..d0481eb4d --- /dev/null +++ b/Zotlabs/Lib/MarkdownSoap.php @@ -0,0 +1,86 @@ +<?php + +namespace Zotlabs\Lib; + +/** + * MarkdownSoap + * Purify Markdown for storage + * $x = newMarkdownSoap($string_to_be_cleansed); + * $text = $x->clean(); + * + * What this does: + * 1. extracts code blocks and privately escapes them from processing + * 2. Run html purifier on the content + * 3. put back the code blocks + * 4. run htmlspecialchars on the entire content for safe storage + * + * At render time: + * $markdown = \Zotlabs\Lib\MarkdownSoap::unescape($text); + * $html = \Michelf\MarkdownExtra::DefaultTransform($markdown); + */ + + + +class MarkdownSoap { + + private $token; + + private $str; + + function __construct($s) { + $this->str = $s; + $this->token = random_string(20); + } + + + function clean() { + $x = $this->extract_code($this->str); + $x = $this->purify($x); + $x = $this->putback_code($x); + $x = $this->escape($x); + + return $x; + } + + function extract_code($s) { + + $text = preg_replace_callback('{ + (?:\n\n|\A\n?) + ( # $1 = the code block -- one or more lines, starting with a space/tab + (?> + [ ]{'.'4'.'} # Lines must start with a tab or a tab-width of spaces + .*\n+ + )+ + ) + ((?=^[ ]{0,'.'4'.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + }xm', + [ $this , 'encode_code' ], $s); + + return $text; + } + + function encode_code($matches) { + return $this->token . ';' . base64_encode($matches[1]) . ';' ; + } + + function decode_code($matches) { + return base64_decode($matches[1]); + } + + function putback_code($s) { + $text = preg_replace_callback('{' . $this->token . '\;(.*?)\;}xm',[ $this, 'decode_code' ], $s); + return $text; + } + + function purify($s) { + return purify_html($s); + } + + function escape($s) { + return htmlspecialchars($s,ENT_QUOTES); + } + + static public function unescape($s) { + return htmlspecialchars_decode($s,ENT_QUOTES); + } +} |