<?php

namespace Zotlabs\Lib;

/**
 * @brief MarkdownSoap class.
 *
 * Purify Markdown for storage
 * @code{.php}
 *   $x = new MarkdownSoap($string_to_be_cleansed);
 *   $text = $x->clean();
 * @endcode
 * What this does:
 * 1. extracts code blocks and privately escapes them from processing
 * 2. Run html purifier on the content
 * 3. put back the code blocks
 * 4. run htmlspecialchars on the entire content for safe storage
 *
 * At render time:
 * @code{.php}
 *    $markdown = \Zotlabs\Lib\MarkdownSoap::unescape($text);
 *    $html = \Michelf\MarkdownExtra::DefaultTransform($markdown);
 * @endcode
 */
class MarkdownSoap {

	/**
	 * @var string
	 */
	private $str;
	/**
	 * @var string
	 */
	private $token;


	function __construct($s) {
		$this->str   = $s;
		$this->token = random_string(20);
	}

	function clean() {

		$x = $this->extract_code($this->str);

		$x = $this->purify($x);

		$x = $this->putback_code($x);

		$x = $this->escape($x);

		return $x;
	}

	/**
	 * @brief Extracts code blocks and privately escapes them from processing.
	 *
	 * @see encode_code()
	 * @see putback_code()
	 *
	 * @param string $s
	 * @return string
	 */
	function extract_code($s) {

		$text = preg_replace_callback('{
					(?:\n\n|\A\n?)
					(	            # $1 = the code block -- one or more lines, starting with a space/tab
					  (?>
						[ ]{'.'4'.'}  # Lines must start with a tab or a tab-width of spaces
						.*\n+
					  )+
					)
					((?=^[ ]{0,'.'4'.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
				}xm',
				[ $this , 'encode_code' ], $s);

		return $text;
	}

	function encode_code($matches) {
		return $this->token . ';' . base64_encode($matches[0]) . ';' ;
	}

	function decode_code($matches) {
		return base64_decode($matches[1]);
	}

	/**
	 * @brief Put back the code blocks.
	 *
	 * @see extract_code()
	 * @see decode_code()
	 *
	 * @param string $s
	 * @return string
	 */
	function putback_code($s) {
		$text = preg_replace_callback('{' . $this->token . '\;(.*?)\;}xm', [ $this, 'decode_code' ], $s);
		return $text;
	}

	function purify($s) {
		$s = $this->protect_autolinks($s);
		$s = purify_html($s);
		$s = $this->unprotect_autolinks($s);
		return $s;
	}

	function protect_autolinks($s) {
		$s = preg_replace('/\<(https?\:\/\/)(.*?)\>/', '[$1$2]($1$2)', $s);
		return $s;
	}

	function unprotect_autolinks($s) {
		return $s;
	}

	function escape($s) {
		return htmlspecialchars($s, ENT_QUOTES, 'UTF-8', false);
	}

	/**
	 * @brief Converts special HTML entities back to characters.
	 *
	 * @param string $s
	 * @return string
	 */
	static public function unescape($s) {
		return htmlspecialchars_decode($s, ENT_QUOTES);
	}
}