diff options
Diffstat (limited to 'library/HTMLPurifier.php')
-rw-r--r-- | library/HTMLPurifier.php | 237 |
1 files changed, 237 insertions, 0 deletions
diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php new file mode 100644 index 000000000..ba2c7b306 --- /dev/null +++ b/library/HTMLPurifier.php @@ -0,0 +1,237 @@ +<?php + +/*! @mainpage + * + * HTML Purifier is an HTML filter that will take an arbitrary snippet of + * HTML and rigorously test, validate and filter it into a version that + * is safe for output onto webpages. It achieves this by: + * + * -# Lexing (parsing into tokens) the document, + * -# Executing various strategies on the tokens: + * -# Removing all elements not in the whitelist, + * -# Making the tokens well-formed, + * -# Fixing the nesting of the nodes, and + * -# Validating attributes of the nodes; and + * -# Generating HTML from the purified tokens. + * + * However, most users will only need to interface with the HTMLPurifier + * and HTMLPurifier_Config. + */ + +/* + HTML Purifier 4.1.1 - Standards Compliant HTML Filtering + Copyright (C) 2006-2008 Edward Z. Yang + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * Facade that coordinates HTML Purifier's subsystems in order to purify HTML. + * + * @note There are several points in which configuration can be specified + * for HTML Purifier. The precedence of these (from lowest to + * highest) is as follows: + * -# Instance: new HTMLPurifier($config) + * -# Invocation: purify($html, $config) + * These configurations are entirely independent of each other and + * are *not* merged (this behavior may change in the future). + * + * @todo We need an easier way to inject strategies using the configuration + * object. + */ +class HTMLPurifier +{ + + /** Version of HTML Purifier */ + public $version = '4.1.1'; + + /** Constant with version of HTML Purifier */ + const VERSION = '4.1.1'; + + /** Global configuration object */ + public $config; + + /** Array of extra HTMLPurifier_Filter objects to run on HTML, for backwards compatibility */ + private $filters = array(); + + /** Single instance of HTML Purifier */ + private static $instance; + + protected $strategy, $generator; + + /** + * Resultant HTMLPurifier_Context of last run purification. Is an array + * of contexts if the last called method was purifyArray(). + */ + public $context; + + /** + * Initializes the purifier. + * @param $config Optional HTMLPurifier_Config object for all instances of + * the purifier, if omitted, a default configuration is + * supplied (which can be overridden on a per-use basis). + * The parameter can also be any type that + * HTMLPurifier_Config::create() supports. + */ + public function __construct($config = null) { + + $this->config = HTMLPurifier_Config::create($config); + + $this->strategy = new HTMLPurifier_Strategy_Core(); + + } + + /** + * Adds a filter to process the output. First come first serve + * @param $filter HTMLPurifier_Filter object + */ + public function addFilter($filter) { + trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING); + $this->filters[] = $filter; + } + + /** + * Filters an HTML snippet/document to be XSS-free and standards-compliant. + * + * @param $html String of HTML to purify + * @param $config HTMLPurifier_Config object for this operation, if omitted, + * defaults to the config object specified during this + * object's construction. The parameter can also be any type + * that HTMLPurifier_Config::create() supports. + * @return Purified HTML + */ + public function purify($html, $config = null) { + + // :TODO: make the config merge in, instead of replace + $config = $config ? HTMLPurifier_Config::create($config) : $this->config; + + // implementation is partially environment dependant, partially + // configuration dependant + $lexer = HTMLPurifier_Lexer::create($config); + + $context = new HTMLPurifier_Context(); + + // setup HTML generator + $this->generator = new HTMLPurifier_Generator($config, $context); + $context->register('Generator', $this->generator); + + // set up global context variables + if ($config->get('Core.CollectErrors')) { + // may get moved out if other facilities use it + $language_factory = HTMLPurifier_LanguageFactory::instance(); + $language = $language_factory->create($config, $context); + $context->register('Locale', $language); + + $error_collector = new HTMLPurifier_ErrorCollector($context); + $context->register('ErrorCollector', $error_collector); + } + + // setup id_accumulator context, necessary due to the fact that + // AttrValidator can be called from many places + $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); + $context->register('IDAccumulator', $id_accumulator); + + $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); + + // setup filters + $filter_flags = $config->getBatch('Filter'); + $custom_filters = $filter_flags['Custom']; + unset($filter_flags['Custom']); + $filters = array(); + foreach ($filter_flags as $filter => $flag) { + if (!$flag) continue; + if (strpos($filter, '.') !== false) continue; + $class = "HTMLPurifier_Filter_$filter"; + $filters[] = new $class; + } + foreach ($custom_filters as $filter) { + // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat + $filters[] = $filter; + } + $filters = array_merge($filters, $this->filters); + // maybe prepare(), but later + + for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { + $html = $filters[$i]->preFilter($html, $config, $context); + } + + // purified HTML + $html = + $this->generator->generateFromTokens( + // list of tokens + $this->strategy->execute( + // list of un-purified tokens + $lexer->tokenizeHTML( + // un-purified HTML + $html, $config, $context + ), + $config, $context + ) + ); + + for ($i = $filter_size - 1; $i >= 0; $i--) { + $html = $filters[$i]->postFilter($html, $config, $context); + } + + $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); + $this->context =& $context; + return $html; + } + + /** + * Filters an array of HTML snippets + * @param $config Optional HTMLPurifier_Config object for this operation. + * See HTMLPurifier::purify() for more details. + * @return Array of purified HTML + */ + public function purifyArray($array_of_html, $config = null) { + $context_array = array(); + foreach ($array_of_html as $key => $html) { + $array_of_html[$key] = $this->purify($html, $config); + $context_array[$key] = $this->context; + } + $this->context = $context_array; + return $array_of_html; + } + + /** + * Singleton for enforcing just one HTML Purifier in your system + * @param $prototype Optional prototype HTMLPurifier instance to + * overload singleton with, or HTMLPurifier_Config + * instance to configure the generated version with. + */ + public static function instance($prototype = null) { + if (!self::$instance || $prototype) { + if ($prototype instanceof HTMLPurifier) { + self::$instance = $prototype; + } elseif ($prototype) { + self::$instance = new HTMLPurifier($prototype); + } else { + self::$instance = new HTMLPurifier(); + } + } + return self::$instance; + } + + /** + * @note Backwards compatibility, see instance() + */ + public static function getInstance($prototype = null) { + return HTMLPurifier::instance($prototype); + } + +} + +// vim: et sw=4 sts=4 |