From a0052f0176bd079e6a94baec59fea2ec5a8d651e Mon Sep 17 00:00:00 2001 From: friendica Date: Thu, 1 Jan 2015 22:18:27 -0800 Subject: htmlpurifier update - compatibility issue with language library autoloader --- library/HTMLPurifier/Lexer/DirectLex.php | 217 +++++++++++++++++++------------ 1 file changed, 133 insertions(+), 84 deletions(-) (limited to 'library/HTMLPurifier/Lexer/DirectLex.php') diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php index 456e6e190..746b6e315 100644 --- a/library/HTMLPurifier/Lexer/DirectLex.php +++ b/library/HTMLPurifier/Lexer/DirectLex.php @@ -12,30 +12,44 @@ */ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer { - + /** + * @type bool + */ public $tracksLineNumbers = true; /** * Whitespace characters for str(c)spn. + * @type string */ protected $_whitespace = "\x20\x09\x0D\x0A"; /** * Callback function for script CDATA fudge - * @param $matches, in form of array(opening tag, contents, closing tag) + * @param array $matches, in form of array(opening tag, contents, closing tag) + * @return string */ - protected function scriptCallback($matches) { + protected function scriptCallback($matches) + { return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8') . $matches[3]; } - public function tokenizeHTML($html, $config, $context) { - + /** + * @param String $html + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return array|HTMLPurifier_Token[] + */ + public function tokenizeHTML($html, $config, $context) + { // special normalization for script tags without any armor // our "armor" heurstic is a < sign any number of whitespaces after // the first script tag if ($config->get('HTML.Trusted')) { - $html = preg_replace_callback('#(]*>)(\s*[^<].+?)()#si', - array($this, 'scriptCallback'), $html); + $html = preg_replace_callback( + '#(]*>)(\s*[^<].+?)()#si', + array($this, 'scriptCallback'), + $html + ); } $html = $this->normalize($html, $config, $context); @@ -55,15 +69,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer if ($maintain_line_numbers) { $current_line = 1; - $current_col = 0; + $current_col = 0; $length = strlen($html); } else { $current_line = false; - $current_col = false; + $current_col = false; $length = false; } $context->register('CurrentLine', $current_line); - $context->register('CurrentCol', $current_col); + $context->register('CurrentCol', $current_col); $nl = "\n"; // how often to manually recalculate. This will ALWAYS be right, // but it's pretty wasteful. Set to 0 to turn off @@ -77,16 +91,14 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // for testing synchronization $loops = 0; - while(++$loops) { - + while (++$loops) { // $cursor is either at the start of a token, or inside of // a tag (i.e. there was a < immediately before it), as indicated // by $inside_tag if ($maintain_line_numbers) { - // $rcursor, however, is always at the start of a token. - $rcursor = $cursor - (int) $inside_tag; + $rcursor = $cursor - (int)$inside_tag; // Column number is cheap, so we calculate it every round. // We're interested at the *end* of the newline string, so @@ -96,14 +108,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1); // recalculate lines - if ( - $synchronize_interval && // synchronization is on - $cursor > 0 && // cursor is further than zero - $loops % $synchronize_interval === 0 // time to synchronize! - ) { + if ($synchronize_interval && // synchronization is on + $cursor > 0 && // cursor is further than zero + $loops % $synchronize_interval === 0) { // time to synchronize! $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor); } - } $position_next_lt = strpos($html, '<', $cursor); @@ -119,35 +128,42 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer if (!$inside_tag && $position_next_lt !== false) { // We are not inside tag and there still is another tag to parse $token = new - HTMLPurifier_Token_Text( - $this->parseData( - substr( - $html, $cursor, $position_next_lt - $cursor - ) + HTMLPurifier_Token_Text( + $this->parseData( + substr( + $html, + $cursor, + $position_next_lt - $cursor ) - ); + ) + ); if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor); } $array[] = $token; - $cursor = $position_next_lt + 1; + $cursor = $position_next_lt + 1; $inside_tag = true; continue; } elseif (!$inside_tag) { // We are not inside tag but there are no more tags // If we're already at the end, break - if ($cursor === strlen($html)) break; + if ($cursor === strlen($html)) { + break; + } // Create Text of rest of string $token = new - HTMLPurifier_Token_Text( - $this->parseData( - substr( - $html, $cursor - ) + HTMLPurifier_Token_Text( + $this->parseData( + substr( + $html, + $cursor ) - ); - if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); + ) + ); + if ($maintain_line_numbers) { + $token->rawPosition($current_line, $current_col); + } $array[] = $token; break; } elseif ($inside_tag && $position_next_gt !== false) { @@ -171,16 +187,16 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } // Check if it's a comment - if ( - substr($segment, 0, 3) === '!--' - ) { + if (substr($segment, 0, 3) === '!--') { // re-determine segment length, looking for --> $position_comment_end = strpos($html, '-->', $cursor); if ($position_comment_end === false) { // uh oh, we have a comment that extends to // infinity. Can't be helped: set comment // end position to end of string - if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment'); + if ($e) { + $e->send(E_WARNING, 'Lexer: Unclosed comment'); + } $position_comment_end = strlen($html); $end = true; } else { @@ -189,11 +205,13 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $strlen_segment = $position_comment_end - $cursor; $segment = substr($html, $cursor, $strlen_segment); $token = new - HTMLPurifier_Token_Comment( - substr( - $segment, 3, $strlen_segment - 3 - ) - ); + HTMLPurifier_Token_Comment( + substr( + $segment, + 3, + $strlen_segment - 3 + ) + ); if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment); @@ -205,7 +223,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } // Check if it's an end tag - $is_end_tag = (strpos($segment,'/') === 0); + $is_end_tag = (strpos($segment, '/') === 0); if ($is_end_tag) { $type = substr($segment, 1); $token = new HTMLPurifier_Token_End($type); @@ -224,7 +242,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // text and go our merry way if (!ctype_alpha($segment[0])) { // XML: $segment[0] !== '_' && $segment[0] !== ':' - if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt'); + if ($e) { + $e->send(E_NOTICE, 'Lexer: Unescaped lt'); + } $token = new HTMLPurifier_Token_Text('<'); if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); @@ -239,7 +259,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // trailing slash. Remember, we could have a tag like
, so // any later token processing scripts must convert improperly // classified EmptyTags from StartTags. - $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1); + $is_self_closing = (strrpos($segment, '/') === $strlen_segment - 1); if ($is_self_closing) { $strlen_segment--; $segment = substr($segment, 0, $strlen_segment); @@ -269,14 +289,16 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $attribute_string = trim( substr( - $segment, $position_first_space + $segment, + $position_first_space ) ); if ($attribute_string) { $attr = $this->parseAttributeString( - $attribute_string - , $config, $context - ); + $attribute_string, + $config, + $context + ); } else { $attr = array(); } @@ -296,15 +318,19 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer continue; } else { // inside tag, but there's no ending > sign - if ($e) $e->send(E_WARNING, 'Lexer: Missing gt'); + if ($e) { + $e->send(E_WARNING, 'Lexer: Missing gt'); + } $token = new - HTMLPurifier_Token_Text( - '<' . - $this->parseData( - substr($html, $cursor) - ) - ); - if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); + HTMLPurifier_Token_Text( + '<' . + $this->parseData( + substr($html, $cursor) + ) + ); + if ($maintain_line_numbers) { + $token->rawPosition($current_line, $current_col); + } // no cursor scroll? Hmm... $array[] = $token; break; @@ -319,8 +345,14 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer /** * PHP 5.0.x compatible substr_count that implements offset and length + * @param string $haystack + * @param string $needle + * @param int $offset + * @param int $length + * @return int */ - protected function substrCount($haystack, $needle, $offset, $length) { + protected function substrCount($haystack, $needle, $offset, $length) + { static $oldVersion; if ($oldVersion === null) { $oldVersion = version_compare(PHP_VERSION, '5.1', '<'); @@ -336,13 +368,18 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer /** * Takes the inside of an HTML tag and makes an assoc array of attributes. * - * @param $string Inside of tag excluding name. - * @returns Assoc array of attributes. + * @param string $string Inside of tag excluding name. + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return array Assoc array of attributes. */ - public function parseAttributeString($string, $config, $context) { - $string = (string) $string; // quick typecast + public function parseAttributeString($string, $config, $context) + { + $string = (string)$string; // quick typecast - if ($string == '') return array(); // no attributes + if ($string == '') { + return array(); + } // no attributes $e = false; if ($config->get('Core.CollectErrors')) { @@ -361,46 +398,55 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer list($key, $quoted_value) = explode('=', $string); $quoted_value = trim($quoted_value); if (!$key) { - if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); + if ($e) { + $e->send(E_ERROR, 'Lexer: Missing attribute key'); + } return array(); } - if (!$quoted_value) return array($key => ''); + if (!$quoted_value) { + return array($key => ''); + } $first_char = @$quoted_value[0]; - $last_char = @$quoted_value[strlen($quoted_value)-1]; + $last_char = @$quoted_value[strlen($quoted_value) - 1]; $same_quote = ($first_char == $last_char); $open_quote = ($first_char == '"' || $first_char == "'"); - if ( $same_quote && $open_quote) { + if ($same_quote && $open_quote) { // well behaved $value = substr($quoted_value, 1, strlen($quoted_value) - 2); } else { // not well behaved if ($open_quote) { - if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote'); + if ($e) { + $e->send(E_ERROR, 'Lexer: Missing end quote'); + } $value = substr($quoted_value, 1); } else { $value = $quoted_value; } } - if ($value === false) $value = ''; + if ($value === false) { + $value = ''; + } return array($key => $this->parseData($value)); } // setup loop environment - $array = array(); // return assoc array of attributes + $array = array(); // return assoc array of attributes $cursor = 0; // current position in string (moves forward) - $size = strlen($string); // size of the string (stays the same) + $size = strlen($string); // size of the string (stays the same) // if we have unquoted attributes, the parser expects a terminating // space, so let's guarantee that there's always a terminating space. $string .= ' '; - while(true) { - - if ($cursor >= $size) { - break; + $old_cursor = -1; + while ($cursor < $size) { + if ($old_cursor >= $cursor) { + throw new Exception("Infinite loop detected"); } + $old_cursor = $cursor; $cursor += ($value = strspn($string, $this->_whitespace, $cursor)); // grab the key @@ -415,8 +461,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $key = substr($string, $key_begin, $key_end - $key_begin); if (!$key) { - if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); - $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop + if ($e) { + $e->send(E_ERROR, 'Lexer: Missing attribute key'); + } + $cursor += 1 + strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop continue; // empty key } @@ -467,24 +515,25 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } $value = substr($string, $value_begin, $value_end - $value_begin); - if ($value === false) $value = ''; + if ($value === false) { + $value = ''; + } $array[$key] = $this->parseData($value); $cursor++; - } else { // boolattr if ($key !== '') { $array[$key] = $key; } else { // purely theoretical - if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); + if ($e) { + $e->send(E_ERROR, 'Lexer: Missing attribute key'); + } } - } } return $array; } - } // vim: et sw=4 sts=4 -- cgit v1.2.3