From f718e2b0db0fe3477212a8dd6c3ec067f4432862 Mon Sep 17 00:00:00 2001 From: Klaus Weidenbach Date: Sat, 18 Mar 2017 17:50:05 +0100 Subject: :arrow_up: Update HTML Purifier library. Updated HTML Purifier from 4.6.0 to 4.9.2 with better PHP7 compatibility. Used composer to manage this library. --- library/HTMLPurifier/Lexer/DOMLex.php | 280 ---------------------------------- 1 file changed, 280 deletions(-) delete mode 100644 library/HTMLPurifier/Lexer/DOMLex.php (limited to 'library/HTMLPurifier/Lexer/DOMLex.php') diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php deleted file mode 100644 index 720754454..000000000 --- a/library/HTMLPurifier/Lexer/DOMLex.php +++ /dev/null @@ -1,280 +0,0 @@ -factory = new HTMLPurifier_TokenFactory(); - } - - /** - * @param string $html - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return HTMLPurifier_Token[] - */ - public function tokenizeHTML($html, $config, $context) - { - $html = $this->normalize($html, $config, $context); - - // attempt to armor stray angled brackets that cannot possibly - // form tags and thus are probably being used as emoticons - if ($config->get('Core.AggressivelyFixLt')) { - $char = '[^a-z!\/]'; - $comment = "/|\z)/is"; - $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html); - do { - $old = $html; - $html = preg_replace("/<($char)/i", '<\\1', $html); - } while ($html !== $old); - $html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments - } - - // preprocess html, essential for UTF-8 - $html = $this->wrapHTML($html, $config, $context); - - $doc = new DOMDocument(); - $doc->encoding = 'UTF-8'; // theoretically, the above has this covered - - set_error_handler(array($this, 'muteErrorHandler')); - $doc->loadHTML($html); - restore_error_handler(); - - $tokens = array(); - $this->tokenizeDOM( - $doc->getElementsByTagName('html')->item(0)-> // - getElementsByTagName('body')->item(0)-> // - getElementsByTagName('div')->item(0), //
- $tokens - ); - return $tokens; - } - - /** - * Iterative function that tokenizes a node, putting it into an accumulator. - * To iterate is human, to recurse divine - L. Peter Deutsch - * @param DOMNode $node DOMNode to be tokenized. - * @param HTMLPurifier_Token[] $tokens Array-list of already tokenized tokens. - * @return HTMLPurifier_Token of node appended to previously passed tokens. - */ - protected function tokenizeDOM($node, &$tokens) - { - $level = 0; - $nodes = array($level => new HTMLPurifier_Queue(array($node))); - $closingNodes = array(); - do { - while (!$nodes[$level]->isEmpty()) { - $node = $nodes[$level]->shift(); // FIFO - $collect = $level > 0 ? true : false; - $needEndingTag = $this->createStartNode($node, $tokens, $collect); - if ($needEndingTag) { - $closingNodes[$level][] = $node; - } - if ($node->childNodes && $node->childNodes->length) { - $level++; - $nodes[$level] = new HTMLPurifier_Queue(); - foreach ($node->childNodes as $childNode) { - $nodes[$level]->push($childNode); - } - } - } - $level--; - if ($level && isset($closingNodes[$level])) { - while ($node = array_pop($closingNodes[$level])) { - $this->createEndNode($node, $tokens); - } - } - } while ($level > 0); - } - - /** - * @param DOMNode $node DOMNode to be tokenized. - * @param HTMLPurifier_Token[] $tokens Array-list of already tokenized tokens. - * @param bool $collect Says whether or start and close are collected, set to - * false at first recursion because it's the implicit DIV - * tag you're dealing with. - * @return bool if the token needs an endtoken - * @todo data and tagName properties don't seem to exist in DOMNode? - */ - protected function createStartNode($node, &$tokens, $collect) - { - // intercept non element nodes. WE MUST catch all of them, - // but we're not getting the character reference nodes because - // those should have been preprocessed - if ($node->nodeType === XML_TEXT_NODE) { - $tokens[] = $this->factory->createText($node->data); - return false; - } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) { - // undo libxml's special treatment of