diff options
Diffstat (limited to 'lib/htmlpurifier/library/HTMLPurifier/ChildDef')
8 files changed, 0 insertions, 736 deletions
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php deleted file mode 100644 index 15c364ee3..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php +++ /dev/null @@ -1,48 +0,0 @@ -<?php - -/** - * Definition that uses different definitions depending on context. - * - * The del and ins tags are notable because they allow different types of - * elements depending on whether or not they're in a block or inline context. - * Chameleon allows this behavior to happen by using two different - * definitions depending on context. While this somewhat generalized, - * it is specifically intended for those two tags. - */ -class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef -{ - - /** - * Instance of the definition object to use when inline. Usually stricter. - */ - public $inline; - - /** - * Instance of the definition object to use when block. - */ - public $block; - - public $type = 'chameleon'; - - /** - * @param $inline List of elements to allow when inline. - * @param $block List of elements to allow when block. - */ - public function __construct($inline, $block) { - $this->inline = new HTMLPurifier_ChildDef_Optional($inline); - $this->block = new HTMLPurifier_ChildDef_Optional($block); - $this->elements = $this->block->elements; - } - - public function validateChildren($tokens_of_children, $config, $context) { - if ($context->get('IsInline') === false) { - return $this->block->validateChildren( - $tokens_of_children, $config, $context); - } else { - return $this->inline->validateChildren( - $tokens_of_children, $config, $context); - } - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php deleted file mode 100644 index b68047b4b..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php +++ /dev/null @@ -1,90 +0,0 @@ -<?php - -/** - * Custom validation class, accepts DTD child definitions - * - * @warning Currently this class is an all or nothing proposition, that is, - * it will only give a bool return value. - */ -class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef -{ - public $type = 'custom'; - public $allow_empty = false; - /** - * Allowed child pattern as defined by the DTD - */ - public $dtd_regex; - /** - * PCRE regex derived from $dtd_regex - * @private - */ - private $_pcre_regex; - /** - * @param $dtd_regex Allowed child pattern from the DTD - */ - public function __construct($dtd_regex) { - $this->dtd_regex = $dtd_regex; - $this->_compileRegex(); - } - /** - * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex) - */ - protected function _compileRegex() { - $raw = str_replace(' ', '', $this->dtd_regex); - if ($raw{0} != '(') { - $raw = "($raw)"; - } - $el = '[#a-zA-Z0-9_.-]+'; - $reg = $raw; - - // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M - // DOING! Seriously: if there's problems, please report them. - - // collect all elements into the $elements array - preg_match_all("/$el/", $reg, $matches); - foreach ($matches[0] as $match) { - $this->elements[$match] = true; - } - - // setup all elements as parentheticals with leading commas - $reg = preg_replace("/$el/", '(,\\0)', $reg); - - // remove commas when they were not solicited - $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg); - - // remove all non-paranthetical commas: they are handled by first regex - $reg = preg_replace("/,\(/", '(', $reg); - - $this->_pcre_regex = $reg; - } - public function validateChildren($tokens_of_children, $config, $context) { - $list_of_children = ''; - $nesting = 0; // depth into the nest - foreach ($tokens_of_children as $token) { - if (!empty($token->is_whitespace)) continue; - - $is_child = ($nesting == 0); // direct - - if ($token instanceof HTMLPurifier_Token_Start) { - $nesting++; - } elseif ($token instanceof HTMLPurifier_Token_End) { - $nesting--; - } - - if ($is_child) { - $list_of_children .= $token->name . ','; - } - } - // add leading comma to deal with stray comma declarations - $list_of_children = ',' . rtrim($list_of_children, ','); - $okay = - preg_match( - '/^,?'.$this->_pcre_regex.'$/', - $list_of_children - ); - - return (bool) $okay; - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php deleted file mode 100644 index 13171f665..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php +++ /dev/null @@ -1,20 +0,0 @@ -<?php - -/** - * Definition that disallows all elements. - * @warning validateChildren() in this class is actually never called, because - * empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed - * before child definitions are parsed in earnest by - * HTMLPurifier_Strategy_FixNesting. - */ -class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef -{ - public $allow_empty = true; - public $type = 'empty'; - public function __construct() {} - public function validateChildren($tokens_of_children, $config, $context) { - return array(); - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php deleted file mode 100644 index cdaa2893a..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php +++ /dev/null @@ -1,120 +0,0 @@ -<?php - -/** - * Definition for list containers ul and ol. - */ -class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef -{ - public $type = 'list'; - // lying a little bit, so that we can handle ul and ol ourselves - // XXX: This whole business with 'wrap' is all a bit unsatisfactory - public $elements = array('li' => true, 'ul' => true, 'ol' => true); - public function validateChildren($tokens_of_children, $config, $context) { - // Flag for subclasses - $this->whitespace = false; - - // if there are no tokens, delete parent node - if (empty($tokens_of_children)) return false; - - // the new set of children - $result = array(); - - // current depth into the nest - $nesting = 0; - - // a little sanity check to make sure it's not ALL whitespace - $all_whitespace = true; - - $seen_li = false; - $need_close_li = false; - - foreach ($tokens_of_children as $token) { - if (!empty($token->is_whitespace)) { - $result[] = $token; - continue; - } - $all_whitespace = false; // phew, we're not talking about whitespace - - if ($nesting == 1 && $need_close_li) { - $result[] = new HTMLPurifier_Token_End('li'); - $nesting--; - $need_close_li = false; - } - - $is_child = ($nesting == 0); - - if ($token instanceof HTMLPurifier_Token_Start) { - $nesting++; - } elseif ($token instanceof HTMLPurifier_Token_End) { - $nesting--; - } - - if ($is_child) { - if ($token->name === 'li') { - // good - $seen_li = true; - } elseif ($token->name === 'ul' || $token->name === 'ol') { - // we want to tuck this into the previous li - $need_close_li = true; - $nesting++; - if (!$seen_li) { - // create a new li element - $result[] = new HTMLPurifier_Token_Start('li'); - } else { - // backtrack until </li> found - while(true) { - $t = array_pop($result); - if ($t instanceof HTMLPurifier_Token_End) { - // XXX actually, these invariants could very plausibly be violated - // if we are doing silly things with modifying the set of allowed elements. - // FORTUNATELY, it doesn't make a difference, since the allowed - // elements are hard-coded here! - if ($t->name !== 'li') { - trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR); - return false; - } - break; - } elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh - if ($t->name !== 'li') { - trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR); - return false; - } - // XXX this should have a helper for it... - $result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor); - break; - } else { - if (!$t->is_whitespace) { - trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR); - return false; - } - } - } - } - } else { - // start wrapping (this doesn't precisely mimic - // browser behavior, but what browsers do is kind of - // hard to mimic in a standards compliant way - // XXX Actually, this has no impact in practice, - // because this gets handled earlier. Arguably, - // we should rip out all of that processing - $result[] = new HTMLPurifier_Token_Start('li'); - $nesting++; - $seen_li = true; - $need_close_li = true; - } - } - $result[] = $token; - } - if ($need_close_li) { - $result[] = new HTMLPurifier_Token_End('li'); - } - if (empty($result)) return false; - if ($all_whitespace) { - return false; - } - if ($tokens_of_children == $result) return true; - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php deleted file mode 100644 index 32bcb9898..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php +++ /dev/null @@ -1,26 +0,0 @@ -<?php - -/** - * Definition that allows a set of elements, and allows no children. - * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required, - * really, one shouldn't inherit from the other. Only altered behavior - * is to overload a returned false with an array. Thus, it will never - * return false. - */ -class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required -{ - public $allow_empty = true; - public $type = 'optional'; - public function validateChildren($tokens_of_children, $config, $context) { - $result = parent::validateChildren($tokens_of_children, $config, $context); - // we assume that $tokens_of_children is not modified - if ($result === false) { - if (empty($tokens_of_children)) return true; - elseif ($this->whitespace) return $tokens_of_children; - else return array(); - } - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php deleted file mode 100644 index 4889f249b..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php +++ /dev/null @@ -1,117 +0,0 @@ -<?php - -/** - * Definition that allows a set of elements, but disallows empty children. - */ -class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef -{ - /** - * Lookup table of allowed elements. - * @public - */ - public $elements = array(); - /** - * Whether or not the last passed node was all whitespace. - */ - protected $whitespace = false; - /** - * @param $elements List of allowed element names (lowercase). - */ - public function __construct($elements) { - if (is_string($elements)) { - $elements = str_replace(' ', '', $elements); - $elements = explode('|', $elements); - } - $keys = array_keys($elements); - if ($keys == array_keys($keys)) { - $elements = array_flip($elements); - foreach ($elements as $i => $x) { - $elements[$i] = true; - if (empty($i)) unset($elements[$i]); // remove blank - } - } - $this->elements = $elements; - } - public $allow_empty = false; - public $type = 'required'; - public function validateChildren($tokens_of_children, $config, $context) { - // Flag for subclasses - $this->whitespace = false; - - // if there are no tokens, delete parent node - if (empty($tokens_of_children)) return false; - - // the new set of children - $result = array(); - - // current depth into the nest - $nesting = 0; - - // whether or not we're deleting a node - $is_deleting = false; - - // whether or not parsed character data is allowed - // this controls whether or not we silently drop a tag - // or generate escaped HTML from it - $pcdata_allowed = isset($this->elements['#PCDATA']); - - // a little sanity check to make sure it's not ALL whitespace - $all_whitespace = true; - - // some configuration - $escape_invalid_children = $config->get('Core.EscapeInvalidChildren'); - - // generator - $gen = new HTMLPurifier_Generator($config, $context); - - foreach ($tokens_of_children as $token) { - if (!empty($token->is_whitespace)) { - $result[] = $token; - continue; - } - $all_whitespace = false; // phew, we're not talking about whitespace - - $is_child = ($nesting == 0); - - if ($token instanceof HTMLPurifier_Token_Start) { - $nesting++; - } elseif ($token instanceof HTMLPurifier_Token_End) { - $nesting--; - } - - if ($is_child) { - $is_deleting = false; - if (!isset($this->elements[$token->name])) { - $is_deleting = true; - if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) { - $result[] = $token; - } elseif ($pcdata_allowed && $escape_invalid_children) { - $result[] = new HTMLPurifier_Token_Text( - $gen->generateFromToken($token) - ); - } - continue; - } - } - if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) { - $result[] = $token; - } elseif ($pcdata_allowed && $escape_invalid_children) { - $result[] = - new HTMLPurifier_Token_Text( - $gen->generateFromToken($token) - ); - } else { - // drop silently - } - } - if (empty($result)) return false; - if ($all_whitespace) { - $this->whitespace = true; - return false; - } - if ($tokens_of_children == $result) return true; - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php deleted file mode 100644 index dfae8a6e5..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php +++ /dev/null @@ -1,88 +0,0 @@ -<?php - -/** - * Takes the contents of blockquote when in strict and reformats for validation. - */ -class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required -{ - protected $real_elements; - protected $fake_elements; - public $allow_empty = true; - public $type = 'strictblockquote'; - protected $init = false; - - /** - * @note We don't want MakeWellFormed to auto-close inline elements since - * they might be allowed. - */ - public function getAllowedElements($config) { - $this->init($config); - return $this->fake_elements; - } - - public function validateChildren($tokens_of_children, $config, $context) { - - $this->init($config); - - // trick the parent class into thinking it allows more - $this->elements = $this->fake_elements; - $result = parent::validateChildren($tokens_of_children, $config, $context); - $this->elements = $this->real_elements; - - if ($result === false) return array(); - if ($result === true) $result = $tokens_of_children; - - $def = $config->getHTMLDefinition(); - $block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper); - $block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper); - $is_inline = false; - $depth = 0; - $ret = array(); - - // assuming that there are no comment tokens - foreach ($result as $i => $token) { - $token = $result[$i]; - // ifs are nested for readability - if (!$is_inline) { - if (!$depth) { - if ( - ($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) || - (!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name])) - ) { - $is_inline = true; - $ret[] = $block_wrap_start; - } - } - } else { - if (!$depth) { - // starting tokens have been inline text / empty - if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) { - if (isset($this->elements[$token->name])) { - // ended - $ret[] = $block_wrap_end; - $is_inline = false; - } - } - } - } - $ret[] = $token; - if ($token instanceof HTMLPurifier_Token_Start) $depth++; - if ($token instanceof HTMLPurifier_Token_End) $depth--; - } - if ($is_inline) $ret[] = $block_wrap_end; - return $ret; - } - - private function init($config) { - if (!$this->init) { - $def = $config->getHTMLDefinition(); - // allow all inline elements - $this->real_elements = $this->elements; - $this->fake_elements = $def->info_content_sets['Flow']; - $this->fake_elements['#PCDATA'] = true; - $this->init = true; - } - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php deleted file mode 100644 index 9a93421a1..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php +++ /dev/null @@ -1,227 +0,0 @@ -<?php - -/** - * Definition for tables. The general idea is to extract out all of the - * essential bits, and then reconstruct it later. - * - * This is a bit confusing, because the DTDs and the W3C - * validators seem to disagree on the appropriate definition. The - * DTD claims: - * - * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+) - * - * But actually, the HTML4 spec then has this to say: - * - * The TBODY start tag is always required except when the table - * contains only one table body and no table head or foot sections. - * The TBODY end tag may always be safely omitted. - * - * So the DTD is kind of wrong. The validator is, unfortunately, kind - * of on crack. - * - * The definition changed again in XHTML1.1; and in my opinion, this - * formulation makes the most sense. - * - * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ )) - * - * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode. - * If we encounter a thead, tfoot or tbody, we are placed in the former - * mode, and we *must* wrap any stray tr segments with a tbody. But if - * we don't run into any of them, just have tr tags is OK. - */ -class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef -{ - public $allow_empty = false; - public $type = 'table'; - public $elements = array('tr' => true, 'tbody' => true, 'thead' => true, - 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true); - public function __construct() {} - public function validateChildren($tokens_of_children, $config, $context) { - if (empty($tokens_of_children)) return false; - - // this ensures that the loop gets run one last time before closing - // up. It's a little bit of a hack, but it works! Just make sure you - // get rid of the token later. - $tokens_of_children[] = false; - - // only one of these elements is allowed in a table - $caption = false; - $thead = false; - $tfoot = false; - - // as many of these as you want - $cols = array(); - $content = array(); - - $nesting = 0; // current depth so we can determine nodes - $is_collecting = false; // are we globbing together tokens to package - // into one of the collectors? - $collection = array(); // collected nodes - $tag_index = 0; // the first node might be whitespace, - // so this tells us where the start tag is - $tbody_mode = false; // if true, then we need to wrap any stray - // <tr>s with a <tbody>. - - foreach ($tokens_of_children as $token) { - $is_child = ($nesting == 0); - - if ($token === false) { - // terminating sequence started - } elseif ($token instanceof HTMLPurifier_Token_Start) { - $nesting++; - } elseif ($token instanceof HTMLPurifier_Token_End) { - $nesting--; - } - - // handle node collection - if ($is_collecting) { - if ($is_child) { - // okay, let's stash the tokens away - // first token tells us the type of the collection - switch ($collection[$tag_index]->name) { - case 'tbody': - $tbody_mode = true; - case 'tr': - $content[] = $collection; - break; - case 'caption': - if ($caption !== false) break; - $caption = $collection; - break; - case 'thead': - case 'tfoot': - $tbody_mode = true; - // XXX This breaks rendering properties with - // Firefox, which never floats a <thead> to - // the top. Ever. (Our scheme will float the - // first <thead> to the top.) So maybe - // <thead>s that are not first should be - // turned into <tbody>? Very tricky, indeed. - - // access the appropriate variable, $thead or $tfoot - $var = $collection[$tag_index]->name; - if ($$var === false) { - $$var = $collection; - } else { - // Oops, there's a second one! What - // should we do? Current behavior is to - // transmutate the first and last entries into - // tbody tags, and then put into content. - // Maybe a better idea is to *attach - // it* to the existing thead or tfoot? - // We don't do this, because Firefox - // doesn't float an extra tfoot to the - // bottom like it does for the first one. - $collection[$tag_index]->name = 'tbody'; - $collection[count($collection)-1]->name = 'tbody'; - $content[] = $collection; - } - break; - case 'colgroup': - $cols[] = $collection; - break; - } - $collection = array(); - $is_collecting = false; - $tag_index = 0; - } else { - // add the node to the collection - $collection[] = $token; - } - } - - // terminate - if ($token === false) break; - - if ($is_child) { - // determine what we're dealing with - if ($token->name == 'col') { - // the only empty tag in the possie, we can handle it - // immediately - $cols[] = array_merge($collection, array($token)); - $collection = array(); - $tag_index = 0; - continue; - } - switch($token->name) { - case 'caption': - case 'colgroup': - case 'thead': - case 'tfoot': - case 'tbody': - case 'tr': - $is_collecting = true; - $collection[] = $token; - continue; - default: - if (!empty($token->is_whitespace)) { - $collection[] = $token; - $tag_index++; - } - continue; - } - } - } - - if (empty($content)) return false; - - $ret = array(); - if ($caption !== false) $ret = array_merge($ret, $caption); - if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); - if ($thead !== false) $ret = array_merge($ret, $thead); - if ($tfoot !== false) $ret = array_merge($ret, $tfoot); - - if ($tbody_mode) { - // a little tricky, since the start of the collection may be - // whitespace - $inside_tbody = false; - foreach ($content as $token_array) { - // find the starting token - foreach ($token_array as $t) { - if ($t->name === 'tr' || $t->name === 'tbody') { - break; - } - } // iterator variable carries over - if ($t->name === 'tr') { - if ($inside_tbody) { - $ret = array_merge($ret, $token_array); - } else { - $ret[] = new HTMLPurifier_Token_Start('tbody'); - $ret = array_merge($ret, $token_array); - $inside_tbody = true; - } - } elseif ($t->name === 'tbody') { - if ($inside_tbody) { - $ret[] = new HTMLPurifier_Token_End('tbody'); - $inside_tbody = false; - $ret = array_merge($ret, $token_array); - } else { - $ret = array_merge($ret, $token_array); - } - } else { - trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR); - } - } - if ($inside_tbody) { - $ret[] = new HTMLPurifier_Token_End('tbody'); - } - } else { - foreach ($content as $token_array) { - // invariant: everything in here is <tr>s - $ret = array_merge($ret, $token_array); - } - } - - if (!empty($collection) && $is_collecting == false){ - // grab the trailing space - $ret = array_merge($ret, $collection); - } - - array_pop($tokens_of_children); // remove phantom token - - return ($ret === $tokens_of_children) ? true : $ret; - - } -} - -// vim: et sw=4 sts=4 |