aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzotlabs <mike@macgirvin.com>2017-03-14 00:23:44 -0700
committerzotlabs <mike@macgirvin.com>2017-03-14 00:23:44 -0700
commit1244b0e36af95226ba22f86025d2b24ab09003e6 (patch)
tree6cc2b5364fdf6b60c1e303f1ee9d97ce943b932c
parentab58c95d27e46b0fb37215d4c7b78ab370537cb6 (diff)
downloadvolse-hubzilla-1244b0e36af95226ba22f86025d2b24ab09003e6.tar.gz
volse-hubzilla-1244b0e36af95226ba22f86025d2b24ab09003e6.tar.bz2
volse-hubzilla-1244b0e36af95226ba22f86025d2b24ab09003e6.zip
class MarkdownSoap to safely store markdown by purifying and preserving (escaped) what may be unsafe code in codeblocks. The stored item needs to be unescaped just prior to calling the markdown-to-html processor
-rw-r--r--Zotlabs/Lib/MarkdownSoap.php86
1 files changed, 86 insertions, 0 deletions
diff --git a/Zotlabs/Lib/MarkdownSoap.php b/Zotlabs/Lib/MarkdownSoap.php
new file mode 100644
index 000000000..d0481eb4d
--- /dev/null
+++ b/Zotlabs/Lib/MarkdownSoap.php
@@ -0,0 +1,86 @@
+<?php
+
+namespace Zotlabs\Lib;
+
+/**
+ * MarkdownSoap
+ * Purify Markdown for storage
+ * $x = newMarkdownSoap($string_to_be_cleansed);
+ * $text = $x->clean();
+ *
+ * What this does:
+ * 1. extracts code blocks and privately escapes them from processing
+ * 2. Run html purifier on the content
+ * 3. put back the code blocks
+ * 4. run htmlspecialchars on the entire content for safe storage
+ *
+ * At render time:
+ * $markdown = \Zotlabs\Lib\MarkdownSoap::unescape($text);
+ * $html = \Michelf\MarkdownExtra::DefaultTransform($markdown);
+ */
+
+
+
+class MarkdownSoap {
+
+ private $token;
+
+ private $str;
+
+ function __construct($s) {
+ $this->str = $s;
+ $this->token = random_string(20);
+ }
+
+
+ function clean() {
+ $x = $this->extract_code($this->str);
+ $x = $this->purify($x);
+ $x = $this->putback_code($x);
+ $x = $this->escape($x);
+
+ return $x;
+ }
+
+ function extract_code($s) {
+
+ $text = preg_replace_callback('{
+ (?:\n\n|\A\n?)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?>
+ [ ]{'.'4'.'} # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,'.'4'.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ }xm',
+ [ $this , 'encode_code' ], $s);
+
+ return $text;
+ }
+
+ function encode_code($matches) {
+ return $this->token . ';' . base64_encode($matches[1]) . ';' ;
+ }
+
+ function decode_code($matches) {
+ return base64_decode($matches[1]);
+ }
+
+ function putback_code($s) {
+ $text = preg_replace_callback('{' . $this->token . '\;(.*?)\;}xm',[ $this, 'decode_code' ], $s);
+ return $text;
+ }
+
+ function purify($s) {
+ return purify_html($s);
+ }
+
+ function escape($s) {
+ return htmlspecialchars($s,ENT_QUOTES);
+ }
+
+ static public function unescape($s) {
+ return htmlspecialchars_decode($s,ENT_QUOTES);
+ }
+}