From a0052f0176bd079e6a94baec59fea2ec5a8d651e Mon Sep 17 00:00:00 2001 From: friendica Date: Thu, 1 Jan 2015 22:18:27 -0800 Subject: htmlpurifier update - compatibility issue with language library autoloader --- library/HTMLPurifier/Lexer.php | 191 ++++++++++++++++++++++++++--------------- 1 file changed, 121 insertions(+), 70 deletions(-) (limited to 'library/HTMLPurifier/Lexer.php') diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index b05e11546..43732621d 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -62,16 +62,20 @@ class HTMLPurifier_Lexer * To specify your own prototype, set %Core.LexerImpl to it. * This change in behavior de-singletonizes the lexer object. * - * @param $config Instance of HTMLPurifier_Config - * @return Concrete lexer. + * @param HTMLPurifier_Config $config + * @return HTMLPurifier_Lexer + * @throws HTMLPurifier_Exception */ - public static function create($config) { - + public static function create($config) + { if (!($config instanceof HTMLPurifier_Config)) { $lexer = $config; - trigger_error("Passing a prototype to - HTMLPurifier_Lexer::create() is deprecated, please instead - use %Core.LexerImpl", E_USER_WARNING); + trigger_error( + "Passing a prototype to + HTMLPurifier_Lexer::create() is deprecated, please instead + use %Core.LexerImpl", + E_USER_WARNING + ); } else { $lexer = $config->get('Core.LexerImpl'); } @@ -84,30 +88,28 @@ class HTMLPurifier_Lexer if (is_object($lexer)) { $inst = $lexer; } else { - - if (is_null($lexer)) { do { - // auto-detection algorithm - - if ($needs_tracking) { - $lexer = 'DirectLex'; - break; - } - - if ( - class_exists('DOMDocument') && - method_exists('DOMDocument', 'loadHTML') && - !extension_loaded('domxml') - ) { - // check for DOM support, because while it's part of the - // core, it can be disabled compile time. Also, the PECL - // domxml extension overrides the default DOM, and is evil - // and nasty and we shan't bother to support it - $lexer = 'DOMLex'; - } else { - $lexer = 'DirectLex'; - } - - } while(0); } // do..while so we can break + if (is_null($lexer)) { + do { + // auto-detection algorithm + if ($needs_tracking) { + $lexer = 'DirectLex'; + break; + } + + if (class_exists('DOMDocument') && + method_exists('DOMDocument', 'loadHTML') && + !extension_loaded('domxml') + ) { + // check for DOM support, because while it's part of the + // core, it can be disabled compile time. Also, the PECL + // domxml extension overrides the default DOM, and is evil + // and nasty and we shan't bother to support it + $lexer = 'DOMLex'; + } else { + $lexer = 'DirectLex'; + } + } while (0); + } // do..while so we can break // instantiate recognized string names switch ($lexer) { @@ -121,16 +123,24 @@ class HTMLPurifier_Lexer $inst = new HTMLPurifier_Lexer_PH5P(); break; default: - throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer)); + throw new HTMLPurifier_Exception( + "Cannot instantiate unrecognized Lexer type " . + htmlspecialchars($lexer) + ); } } - if (!$inst) throw new HTMLPurifier_Exception('No lexer was instantiated'); + if (!$inst) { + throw new HTMLPurifier_Exception('No lexer was instantiated'); + } // once PHP DOM implements native line numbers, or we // hack out something using XSLT, remove this stipulation if ($needs_tracking && !$inst->tracksLineNumbers) { - throw new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'); + throw new HTMLPurifier_Exception( + 'Cannot use lexer that does not support line numbers with ' . + 'Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)' + ); } return $inst; @@ -139,23 +149,25 @@ class HTMLPurifier_Lexer // -- CONVENIENCE MEMBERS --------------------------------------------- - public function __construct() { + public function __construct() + { $this->_entity_parser = new HTMLPurifier_EntityParser(); } /** * Most common entity to raw value conversion table for special entities. + * @type array */ protected $_special_entity2str = - array( - '"' => '"', - '&' => '&', - '<' => '<', - '>' => '>', - ''' => "'", - ''' => "'", - ''' => "'" - ); + array( + '"' => '"', + '&' => '&', + '<' => '<', + '>' => '>', + ''' => "'", + ''' => "'", + ''' => "'" + ); /** * Parses special entities into the proper characters. @@ -168,27 +180,33 @@ class HTMLPurifier_Lexer * completely parsed, but that's only because all other entities should * have been handled previously in substituteNonSpecialEntities() * - * @param $string String character data to be parsed. - * @returns Parsed character data. + * @param string $string String character data to be parsed. + * @return string Parsed character data. */ - public function parseData($string) { - + public function parseData($string) + { // following functions require at least one character - if ($string === '') return ''; + if ($string === '') { + return ''; + } // subtracts amps that cannot possibly be escaped $num_amp = substr_count($string, '&') - substr_count($string, '& ') - - ($string[strlen($string)-1] === '&' ? 1 : 0); + ($string[strlen($string) - 1] === '&' ? 1 : 0); - if (!$num_amp) return $string; // abort if no entities + if (!$num_amp) { + return $string; + } // abort if no entities $num_esc_amp = substr_count($string, '&'); $string = strtr($string, $this->_special_entity2str); // code duplication for sake of optimization, see above $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') - - ($string[strlen($string)-1] === '&' ? 1 : 0); + ($string[strlen($string) - 1] === '&' ? 1 : 0); - if ($num_amp_2 <= $num_esc_amp) return $string; + if ($num_amp_2 <= $num_esc_amp) { + return $string; + } // hmm... now we have some uncommon entities. Use the callback. $string = $this->_entity_parser->substituteSpecialEntities($string); @@ -197,21 +215,23 @@ class HTMLPurifier_Lexer /** * Lexes an HTML string into tokens. - * * @param $string String HTML. - * @return HTMLPurifier_Token array representation of HTML. + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return HTMLPurifier_Token[] array representation of HTML. */ - public function tokenizeHTML($string, $config, $context) { + public function tokenizeHTML($string, $config, $context) + { trigger_error('Call to abstract class', E_USER_ERROR); } /** * Translates CDATA sections into regular sections (through escaping). - * - * @param $string HTML string to process. - * @returns HTML with CDATA sections escaped. + * @param string $string HTML string to process. + * @return string HTML with CDATA sections escaped. */ - protected static function escapeCDATA($string) { + protected static function escapeCDATA($string) + { return preg_replace_callback( '//s', array('HTMLPurifier_Lexer', 'CDATACallback'), @@ -221,8 +241,11 @@ class HTMLPurifier_Lexer /** * Special CDATA case that is especially convoluted for