aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzotlabs <mike@macgirvin.com>2017-03-14 00:23:44 -0700
committerMario Vavti <mario@mariovavti.com>2017-03-29 13:37:36 +0200
commit6ea32a8ba31a9c459cda1b6c9694621958a7bd38 (patch)
tree3df6aec96aab622e2df991f64d17d9ba5da5125c
parent8d72cea2d1008dd73ff5f4c7442a4cc0c718eb5c (diff)
downloadvolse-hubzilla-6ea32a8ba31a9c459cda1b6c9694621958a7bd38.tar.gz
volse-hubzilla-6ea32a8ba31a9c459cda1b6c9694621958a7bd38.tar.bz2
volse-hubzilla-6ea32a8ba31a9c459cda1b6c9694621958a7bd38.zip
class MarkdownSoap to safely store markdown by purifying and preserving (escaped) what may be unsafe code in codeblocks. The stored item needs to be unescaped just prior to calling the markdown-to-html processor
-rw-r--r--Zotlabs/Lib/MarkdownSoap.php86
1 files changed, 86 insertions, 0 deletions
diff --git a/Zotlabs/Lib/MarkdownSoap.php b/Zotlabs/Lib/MarkdownSoap.php
new file mode 100644
index 000000000..d0481eb4d
--- /dev/null
+++ b/Zotlabs/Lib/MarkdownSoap.php
@@ -0,0 +1,86 @@
+<?php
+
+namespace Zotlabs\Lib;
+
+/**
+ * MarkdownSoap
+ * Purify Markdown for storage
+ * $x = newMarkdownSoap($string_to_be_cleansed);
+ * $text = $x->clean();
+ *
+ * What this does:
+ * 1. extracts code blocks and privately escapes them from processing
+ * 2. Run html purifier on the content
+ * 3. put back the code blocks
+ * 4. run htmlspecialchars on the entire content for safe storage
+ *
+ * At render time:
+ * $markdown = \Zotlabs\Lib\MarkdownSoap::unescape($text);
+ * $html = \Michelf\MarkdownExtra::DefaultTransform($markdown);
+ */
+
+
+
+class MarkdownSoap {
+
+ private $token;
+
+ private $str;
+
+ function __construct($s) {
+ $this->str = $s;
+ $this->token = random_string(20);
+ }
+
+
+ function clean() {
+ $x = $this->extract_code($this->str);
+ $x = $this->purify($x);
+ $x = $this->putback_code($x);
+ $x = $this->escape($x);
+
+ return $x;
+ }
+
+ function extract_code($s) {
+
+ $text = preg_replace_callback('{
+ (?:\n\n|\A\n?)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?>
+ [ ]{'.'4'.'} # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,'.'4'.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ }xm',
+ [ $this , 'encode_code' ], $s);
+
+ return $text;
+ }
+
+ function encode_code($matches) {
+ return $this->token . ';' . base64_encode($matches[1]) . ';' ;
+ }
+
+ function decode_code($matches) {
+ return base64_decode($matches[1]);
+ }
+
+ function putback_code($s) {
+ $text = preg_replace_callback('{' . $this->token . '\;(.*?)\;}xm',[ $this, 'decode_code' ], $s);
+ return $text;
+ }
+
+ function purify($s) {
+ return purify_html($s);
+ }
+
+ function escape($s) {
+ return htmlspecialchars($s,ENT_QUOTES);
+ }
+
+ static public function unescape($s) {
+ return htmlspecialchars_decode($s,ENT_QUOTES);
+ }
+}