From a0052f0176bd079e6a94baec59fea2ec5a8d651e Mon Sep 17 00:00:00 2001 From: friendica <info@friendica.com> Date: Thu, 1 Jan 2015 22:18:27 -0800 Subject: htmlpurifier update - compatibility issue with language library autoloader --- library/HTMLPurifier/Injector/AutoParagraph.php | 99 ++++++++++++---------- library/HTMLPurifier/Injector/DisplayLinkURI.php | 22 ++++- library/HTMLPurifier/Injector/Linkify.php | 27 ++++-- library/HTMLPurifier/Injector/PurifierLinkify.php | 46 +++++++--- library/HTMLPurifier/Injector/RemoveEmpty.php | 82 ++++++++++++++---- .../Injector/RemoveSpansWithoutAttributes.php | 36 ++++++-- library/HTMLPurifier/Injector/SafeObject.php | 53 +++++++++--- 7 files changed, 267 insertions(+), 98 deletions(-) (limited to 'library/HTMLPurifier/Injector') diff --git a/library/HTMLPurifier/Injector/AutoParagraph.php b/library/HTMLPurifier/Injector/AutoParagraph.php index afa760892..4afdd128d 100644 --- a/library/HTMLPurifier/Injector/AutoParagraph.php +++ b/library/HTMLPurifier/Injector/AutoParagraph.php @@ -8,17 +8,31 @@ */ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector { - + /** + * @type string + */ public $name = 'AutoParagraph'; + + /** + * @type array + */ public $needed = array('p'); - private function _pStart() { + /** + * @return HTMLPurifier_Token_Start + */ + private function _pStart() + { $par = new HTMLPurifier_Token_Start('p'); $par->armor['MakeWellFormed_TagClosedError'] = true; return $par; } - public function handleText(&$token) { + /** + * @param HTMLPurifier_Token_Text $token + */ + public function handleText(&$token) + { $text = $token->data; // Does the current parent allow <p> tags? if ($this->allowsElement('p')) { @@ -72,11 +86,9 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector // ---- } } - // Is the current parent a <p> tag? - } elseif ( - !empty($this->currentNesting) && - $this->currentNesting[count($this->currentNesting)-1]->name == 'p' - ) { + // Is the current parent a <p> tag? + } elseif (!empty($this->currentNesting) && + $this->currentNesting[count($this->currentNesting) - 1]->name == 'p') { // State 3.1: ...<p>PAR1 // ---- @@ -84,7 +96,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector // ------------ $token = array(); $this->_splitText($text, $token); - // Abort! + // Abort! } else { // State 4.1: ...<b>PAR1 // ---- @@ -94,7 +106,11 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector } } - public function handleElement(&$token) { + /** + * @param HTMLPurifier_Token $token + */ + public function handleElement(&$token) + { // We don't have to check if we're already in a <p> tag for block // tokens, because the tag would have been autoclosed by MakeWellFormed. if ($this->allowsElement('p')) { @@ -102,7 +118,6 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector if ($this->_isInline($token)) { // State 1: <div>...<b> // --- - // Check if this token is adjacent to the parent token // (seek backwards until token isn't whitespace) $i = null; @@ -110,31 +125,24 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector if (!$prev instanceof HTMLPurifier_Token_Start) { // Token wasn't adjacent - - if ( - $prev instanceof HTMLPurifier_Token_Text && + if ($prev instanceof HTMLPurifier_Token_Text && substr($prev->data, -2) === "\n\n" ) { // State 1.1.4: <div><p>PAR1</p>\n\n<b> // --- - // Quite frankly, this should be handled by splitText $token = array($this->_pStart(), $token); } else { // State 1.1.1: <div><p>PAR1</p><b> // --- - // State 1.1.2: <div><br /><b> // --- - // State 1.1.3: <div>PAR<b> // --- } - } else { // State 1.2.1: <div><b> // --- - // Lookahead to see if <p> is needed. if ($this->_pLookAhead()) { // State 1.3.1: <div><b>PAR1\n\nPAR2 @@ -166,24 +174,20 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector $i = null; if ($this->backward($i, $prev)) { - if ( - !$prev instanceof HTMLPurifier_Token_Text - ) { + if (!$prev instanceof HTMLPurifier_Token_Text) { // State 3.1.1: ...</p>{p}<b> // --- - // State 3.2.1: ...</p><div> // ----- - - if (!is_array($token)) $token = array($token); + if (!is_array($token)) { + $token = array($token); + } array_unshift($token, new HTMLPurifier_Token_Text("\n\n")); } else { // State 3.1.2: ...</p>\n\n{p}<b> // --- - // State 3.2.2: ...</p>\n\n<div> // ----- - // Note: PAR<ELEM> cannot occur because PAR would have been // wrapped in <p> tags. } @@ -192,7 +196,6 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector } else { // State 2.2: <ul><li> // ---- - // State 2.4: <p><b> // --- } @@ -201,18 +204,17 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector /** * Splits up a text in paragraph tokens and appends them * to the result stream that will replace the original - * @param $data String text data that will be processed + * @param string $data String text data that will be processed * into paragraphs - * @param $result Reference to array of tokens that the + * @param HTMLPurifier_Token[] $result Reference to array of tokens that the * tags will be appended onto - * @param $config Instance of HTMLPurifier_Config - * @param $context Instance of HTMLPurifier_Context */ - private function _splitText($data, &$result) { + private function _splitText($data, &$result) + { $raw_paragraphs = explode("\n\n", $data); - $paragraphs = array(); // without empty paragraphs + $paragraphs = array(); // without empty paragraphs $needs_start = false; - $needs_end = false; + $needs_end = false; $c = count($raw_paragraphs); if ($c == 1) { @@ -285,26 +287,33 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector array_pop($result); // removes \n\n array_pop($result); // removes </p> } - } /** * Returns true if passed token is inline (and, ergo, allowed in * paragraph tags) + * @param HTMLPurifier_Token $token + * @return bool */ - private function _isInline($token) { + private function _isInline($token) + { return isset($this->htmlDefinition->info['p']->child->elements[$token->name]); } /** * Looks ahead in the token list and determines whether or not we need * to insert a <p> tag. + * @return bool */ - private function _pLookAhead() { - $this->current($i, $current); - if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1; - else $nesting = 0; + private function _pLookAhead() + { + if ($this->currentToken instanceof HTMLPurifier_Token_Start) { + $nesting = 1; + } else { + $nesting = 0; + } $ok = false; + $i = null; while ($this->forwardUntilEndToken($i, $current, $nesting)) { $result = $this->_checkNeedsP($current); if ($result !== null) { @@ -318,9 +327,12 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector /** * Determines if a particular token requires an earlier inline token * to get a paragraph. This should be used with _forwardUntilEndToken + * @param HTMLPurifier_Token $current + * @return bool */ - private function _checkNeedsP($current) { - if ($current instanceof HTMLPurifier_Token_Start){ + private function _checkNeedsP($current) + { + if ($current instanceof HTMLPurifier_Token_Start) { if (!$this->_isInline($current)) { // <div>PAR1<div> // ---- @@ -339,7 +351,6 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector } return null; } - } // vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Injector/DisplayLinkURI.php b/library/HTMLPurifier/Injector/DisplayLinkURI.php index 9dce9bd08..c19b1bc27 100644 --- a/library/HTMLPurifier/Injector/DisplayLinkURI.php +++ b/library/HTMLPurifier/Injector/DisplayLinkURI.php @@ -5,15 +5,29 @@ */ class HTMLPurifier_Injector_DisplayLinkURI extends HTMLPurifier_Injector { - + /** + * @type string + */ public $name = 'DisplayLinkURI'; + + /** + * @type array + */ public $needed = array('a'); - public function handleElement(&$token) { + /** + * @param $token + */ + public function handleElement(&$token) + { } - public function handleEnd(&$token) { - if (isset($token->start->attr['href'])){ + /** + * @param HTMLPurifier_Token $token + */ + public function handleEnd(&$token) + { + if (isset($token->start->attr['href'])) { $url = $token->start->attr['href']; unset($token->start->attr['href']); $token = array($token, new HTMLPurifier_Token_Text(" ($url)")); diff --git a/library/HTMLPurifier/Injector/Linkify.php b/library/HTMLPurifier/Injector/Linkify.php index 296dac282..069708c25 100644 --- a/library/HTMLPurifier/Injector/Linkify.php +++ b/library/HTMLPurifier/Injector/Linkify.php @@ -5,12 +5,24 @@ */ class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector { - + /** + * @type string + */ public $name = 'Linkify'; + + /** + * @type array + */ public $needed = array('a' => array('href')); - public function handleText(&$token) { - if (!$this->allowsElement('a')) return; + /** + * @param HTMLPurifier_Token $token + */ + public function handleText(&$token) + { + if (!$this->allowsElement('a')) { + return; + } if (strpos($token->data, '://') === false) { // our really quick heuristic failed, abort @@ -21,7 +33,8 @@ class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector // there is/are URL(s). Let's split the string: // Note: this regex is extremely permissive - $bits = preg_split('#((?:https?|ftp)://[^\s\'"<>()]+)#S', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + $bits = preg_split('#((?:https?|ftp)://[^\s\'",<>()]+)#Su', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + $token = array(); @@ -30,7 +43,9 @@ class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector // $l = is link for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { if (!$l) { - if ($bits[$i] === '') continue; + if ($bits[$i] === '') { + continue; + } $token[] = new HTMLPurifier_Token_Text($bits[$i]); } else { $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i])); @@ -38,9 +53,7 @@ class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector $token[] = new HTMLPurifier_Token_End('a'); } } - } - } // vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Injector/PurifierLinkify.php b/library/HTMLPurifier/Injector/PurifierLinkify.php index ad2455a91..cb9046f33 100644 --- a/library/HTMLPurifier/Injector/PurifierLinkify.php +++ b/library/HTMLPurifier/Injector/PurifierLinkify.php @@ -6,19 +6,43 @@ */ class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector { - + /** + * @type string + */ public $name = 'PurifierLinkify'; + + /** + * @type string + */ public $docURL; + + /** + * @type array + */ public $needed = array('a' => array('href')); - public function prepare($config, $context) { + /** + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return string + */ + public function prepare($config, $context) + { $this->docURL = $config->get('AutoFormat.PurifierLinkify.DocURL'); return parent::prepare($config, $context); } - public function handleText(&$token) { - if (!$this->allowsElement('a')) return; - if (strpos($token->data, '%') === false) return; + /** + * @param HTMLPurifier_Token $token + */ + public function handleText(&$token) + { + if (!$this->allowsElement('a')) { + return; + } + if (strpos($token->data, '%') === false) { + return; + } $bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); $token = array(); @@ -28,18 +52,20 @@ class HTMLPurifier_Injector_PurifierLinkify extends HTMLPurifier_Injector // $l = is link for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { if (!$l) { - if ($bits[$i] === '') continue; + if ($bits[$i] === '') { + continue; + } $token[] = new HTMLPurifier_Token_Text($bits[$i]); } else { - $token[] = new HTMLPurifier_Token_Start('a', - array('href' => str_replace('%s', $bits[$i], $this->docURL))); + $token[] = new HTMLPurifier_Token_Start( + 'a', + array('href' => str_replace('%s', $bits[$i], $this->docURL)) + ); $token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]); $token[] = new HTMLPurifier_Token_End('a'); } } - } - } // vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Injector/RemoveEmpty.php b/library/HTMLPurifier/Injector/RemoveEmpty.php index 638bfca03..cd885722e 100644 --- a/library/HTMLPurifier/Injector/RemoveEmpty.php +++ b/library/HTMLPurifier/Injector/RemoveEmpty.php @@ -2,10 +2,44 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector { + /** + * @type HTMLPurifier_Context + */ + private $context; - private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions; + /** + * @type HTMLPurifier_Config + */ + private $config; - public function prepare($config, $context) { + /** + * @type HTMLPurifier_AttrValidator + */ + private $attrValidator; + + /** + * @type bool + */ + private $removeNbsp; + + /** + * @type bool + */ + private $removeNbspExceptions; + + /** + * @type array + * TODO: make me configurable + */ + private $_exclude = array('colgroup' => 1, 'th' => 1, 'td' => 1, 'iframe' => 1); + + /** + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return void + */ + public function prepare($config, $context) + { parent::prepare($config, $context); $this->config = $config; $this->context = $context; @@ -14,38 +48,54 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector $this->attrValidator = new HTMLPurifier_AttrValidator(); } - public function handleElement(&$token) { - if (!$token instanceof HTMLPurifier_Token_Start) return; + /** + * @param HTMLPurifier_Token $token + */ + public function handleElement(&$token) + { + if (!$token instanceof HTMLPurifier_Token_Start) { + return; + } $next = false; - for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { - $next = $this->inputTokens[$i]; + $deleted = 1; // the current tag + for ($i = count($this->inputZipper->back) - 1; $i >= 0; $i--, $deleted++) { + $next = $this->inputZipper->back[$i]; if ($next instanceof HTMLPurifier_Token_Text) { - if ($next->is_whitespace) continue; + if ($next->is_whitespace) { + continue; + } if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { $plain = str_replace("\xC2\xA0", "", $next->data); $isWsOrNbsp = $plain === '' || ctype_space($plain); - if ($isWsOrNbsp) continue; + if ($isWsOrNbsp) { + continue; + } } } break; } if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { - if ($token->name == 'colgroup') return; + if (isset($this->_exclude[$token->name])) { + return; + } $this->attrValidator->validateToken($token, $this->config, $this->context); $token->armor['ValidateAttributes'] = true; - if (isset($token->attr['id']) || isset($token->attr['name'])) return; - $token = $i - $this->inputIndex + 1; - for ($b = $this->inputIndex - 1; $b > 0; $b--) { - $prev = $this->inputTokens[$b]; - if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) continue; + if (isset($token->attr['id']) || isset($token->attr['name'])) { + return; + } + $token = $deleted + 1; + for ($b = 0, $c = count($this->inputZipper->front); $b < $c; $b++) { + $prev = $this->inputZipper->front[$b]; + if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) { + continue; + } break; } // This is safe because we removed the token that triggered this. - $this->rewind($b - 1); + $this->rewindOffset($b+$deleted); return; } } - } // vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php b/library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php index b21313470..9ee7aa84d 100644 --- a/library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php +++ b/library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php @@ -5,25 +5,45 @@ */ class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector { + /** + * @type string + */ public $name = 'RemoveSpansWithoutAttributes'; + + /** + * @type array + */ public $needed = array('span'); + /** + * @type HTMLPurifier_AttrValidator + */ private $attrValidator; /** - * Used by AttrValidator + * Used by AttrValidator. + * @type HTMLPurifier_Config */ private $config; + + /** + * @type HTMLPurifier_Context + */ private $context; - public function prepare($config, $context) { + public function prepare($config, $context) + { $this->attrValidator = new HTMLPurifier_AttrValidator(); $this->config = $config; $this->context = $context; return parent::prepare($config, $context); } - public function handleElement(&$token) { + /** + * @param HTMLPurifier_Token $token + */ + public function handleElement(&$token) + { if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) { return; } @@ -39,8 +59,8 @@ class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_In } $nesting = 0; - $spanContentTokens = array(); - while ($this->forwardUntilEndToken($i, $current, $nesting)) {} + while ($this->forwardUntilEndToken($i, $current, $nesting)) { + } if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') { // Mark closing span tag for deletion @@ -50,7 +70,11 @@ class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_In } } - public function handleEnd(&$token) { + /** + * @param HTMLPurifier_Token $token + */ + public function handleEnd(&$token) + { if ($token->markForDeletion) { $token = false; } diff --git a/library/HTMLPurifier/Injector/SafeObject.php b/library/HTMLPurifier/Injector/SafeObject.php index 9e178ce01..3d17e07af 100644 --- a/library/HTMLPurifier/Injector/SafeObject.php +++ b/library/HTMLPurifier/Injector/SafeObject.php @@ -6,29 +6,61 @@ */ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector { + /** + * @type string + */ public $name = 'SafeObject'; + + /** + * @type array + */ public $needed = array('object', 'param'); + /** + * @type array + */ protected $objectStack = array(); - protected $paramStack = array(); - // Keep this synchronized with AttrTransform/SafeParam.php + /** + * @type array + */ + protected $paramStack = array(); + + /** + * Keep this synchronized with AttrTransform/SafeParam.php. + * @type array + */ protected $addParam = array( 'allowScriptAccess' => 'never', 'allowNetworking' => 'internal', ); + + /** + * @type array + */ protected $allowedParam = array( 'wmode' => true, 'movie' => true, 'flashvars' => true, 'src' => true, + 'allowFullScreen' => true, // if omitted, assume to be 'false' ); - public function prepare($config, $context) { + /** + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return void + */ + public function prepare($config, $context) + { parent::prepare($config, $context); } - public function handleElement(&$token) { + /** + * @param HTMLPurifier_Token $token + */ + public function handleElement(&$token) + { if ($token->name == 'object') { $this->objectStack[] = $token; $this->paramStack[] = array(); @@ -50,16 +82,15 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector // attribute, which we need if a type is specified. This is // *very* Flash specific. if (!isset($this->objectStack[$i]->attr['data']) && - ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) { + ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src') + ) { $this->objectStack[$i]->attr['data'] = $token->attr['value']; } // Check if the parameter is the correct value but has not // already been added - if ( - !isset($this->paramStack[$i][$n]) && + if (!isset($this->paramStack[$i][$n]) && isset($this->addParam[$n]) && - $token->attr['name'] === $this->addParam[$n] - ) { + $token->attr['name'] === $this->addParam[$n]) { // keep token, and add to param stack $this->paramStack[$i][$n] = true; } elseif (isset($this->allowedParam[$n])) { @@ -75,7 +106,8 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector } } - public function handleEnd(&$token) { + public function handleEnd(&$token) + { // This is the WRONG way of handling the object and param stacks; // we should be inserting them directly on the relevant object tokens // so that the global stack handling handles it. @@ -84,7 +116,6 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector array_pop($this->paramStack); } } - } // vim: et sw=4 sts=4 -- cgit v1.2.3