From ffb1997902facb36b78a7cfa522f41f2b3d71cda Mon Sep 17 00:00:00 2001 From: Mike Macgirvin Date: Wed, 8 Sep 2010 20:14:17 -0700 Subject: mistpark 2.0 infrasturcture lands --- library/HTMLPurifier.php | 237 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 library/HTMLPurifier.php (limited to 'library/HTMLPurifier.php') diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php new file mode 100644 index 000000000..ba2c7b306 --- /dev/null +++ b/library/HTMLPurifier.php @@ -0,0 +1,237 @@ +config = HTMLPurifier_Config::create($config); + + $this->strategy = new HTMLPurifier_Strategy_Core(); + + } + + /** + * Adds a filter to process the output. First come first serve + * @param $filter HTMLPurifier_Filter object + */ + public function addFilter($filter) { + trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING); + $this->filters[] = $filter; + } + + /** + * Filters an HTML snippet/document to be XSS-free and standards-compliant. + * + * @param $html String of HTML to purify + * @param $config HTMLPurifier_Config object for this operation, if omitted, + * defaults to the config object specified during this + * object's construction. The parameter can also be any type + * that HTMLPurifier_Config::create() supports. + * @return Purified HTML + */ + public function purify($html, $config = null) { + + // :TODO: make the config merge in, instead of replace + $config = $config ? HTMLPurifier_Config::create($config) : $this->config; + + // implementation is partially environment dependant, partially + // configuration dependant + $lexer = HTMLPurifier_Lexer::create($config); + + $context = new HTMLPurifier_Context(); + + // setup HTML generator + $this->generator = new HTMLPurifier_Generator($config, $context); + $context->register('Generator', $this->generator); + + // set up global context variables + if ($config->get('Core.CollectErrors')) { + // may get moved out if other facilities use it + $language_factory = HTMLPurifier_LanguageFactory::instance(); + $language = $language_factory->create($config, $context); + $context->register('Locale', $language); + + $error_collector = new HTMLPurifier_ErrorCollector($context); + $context->register('ErrorCollector', $error_collector); + } + + // setup id_accumulator context, necessary due to the fact that + // AttrValidator can be called from many places + $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); + $context->register('IDAccumulator', $id_accumulator); + + $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); + + // setup filters + $filter_flags = $config->getBatch('Filter'); + $custom_filters = $filter_flags['Custom']; + unset($filter_flags['Custom']); + $filters = array(); + foreach ($filter_flags as $filter => $flag) { + if (!$flag) continue; + if (strpos($filter, '.') !== false) continue; + $class = "HTMLPurifier_Filter_$filter"; + $filters[] = new $class; + } + foreach ($custom_filters as $filter) { + // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat + $filters[] = $filter; + } + $filters = array_merge($filters, $this->filters); + // maybe prepare(), but later + + for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { + $html = $filters[$i]->preFilter($html, $config, $context); + } + + // purified HTML + $html = + $this->generator->generateFromTokens( + // list of tokens + $this->strategy->execute( + // list of un-purified tokens + $lexer->tokenizeHTML( + // un-purified HTML + $html, $config, $context + ), + $config, $context + ) + ); + + for ($i = $filter_size - 1; $i >= 0; $i--) { + $html = $filters[$i]->postFilter($html, $config, $context); + } + + $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); + $this->context =& $context; + return $html; + } + + /** + * Filters an array of HTML snippets + * @param $config Optional HTMLPurifier_Config object for this operation. + * See HTMLPurifier::purify() for more details. + * @return Array of purified HTML + */ + public function purifyArray($array_of_html, $config = null) { + $context_array = array(); + foreach ($array_of_html as $key => $html) { + $array_of_html[$key] = $this->purify($html, $config); + $context_array[$key] = $this->context; + } + $this->context = $context_array; + return $array_of_html; + } + + /** + * Singleton for enforcing just one HTML Purifier in your system + * @param $prototype Optional prototype HTMLPurifier instance to + * overload singleton with, or HTMLPurifier_Config + * instance to configure the generated version with. + */ + public static function instance($prototype = null) { + if (!self::$instance || $prototype) { + if ($prototype instanceof HTMLPurifier) { + self::$instance = $prototype; + } elseif ($prototype) { + self::$instance = new HTMLPurifier($prototype); + } else { + self::$instance = new HTMLPurifier(); + } + } + return self::$instance; + } + + /** + * @note Backwards compatibility, see instance() + */ + public static function getInstance($prototype = null) { + return HTMLPurifier::instance($prototype); + } + +} + +// vim: et sw=4 sts=4 -- cgit v1.2.3