diff options
Diffstat (limited to 'library/HTMLPurifier/ChildDef')
-rw-r--r-- | library/HTMLPurifier/ChildDef/Chameleon.php | 67 | ||||
-rw-r--r-- | library/HTMLPurifier/ChildDef/Custom.php | 102 | ||||
-rw-r--r-- | library/HTMLPurifier/ChildDef/Empty.php | 38 | ||||
-rw-r--r-- | library/HTMLPurifier/ChildDef/List.php | 86 | ||||
-rw-r--r-- | library/HTMLPurifier/ChildDef/Optional.php | 45 | ||||
-rw-r--r-- | library/HTMLPurifier/ChildDef/Required.php | 118 | ||||
-rw-r--r-- | library/HTMLPurifier/ChildDef/StrictBlockquote.php | 110 | ||||
-rw-r--r-- | library/HTMLPurifier/ChildDef/Table.php | 224 |
8 files changed, 0 insertions, 790 deletions
diff --git a/library/HTMLPurifier/ChildDef/Chameleon.php b/library/HTMLPurifier/ChildDef/Chameleon.php deleted file mode 100644 index 7439be26b..000000000 --- a/library/HTMLPurifier/ChildDef/Chameleon.php +++ /dev/null @@ -1,67 +0,0 @@ -<?php - -/** - * Definition that uses different definitions depending on context. - * - * The del and ins tags are notable because they allow different types of - * elements depending on whether or not they're in a block or inline context. - * Chameleon allows this behavior to happen by using two different - * definitions depending on context. While this somewhat generalized, - * it is specifically intended for those two tags. - */ -class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef -{ - - /** - * Instance of the definition object to use when inline. Usually stricter. - * @type HTMLPurifier_ChildDef_Optional - */ - public $inline; - - /** - * Instance of the definition object to use when block. - * @type HTMLPurifier_ChildDef_Optional - */ - public $block; - - /** - * @type string - */ - public $type = 'chameleon'; - - /** - * @param array $inline List of elements to allow when inline. - * @param array $block List of elements to allow when block. - */ - public function __construct($inline, $block) - { - $this->inline = new HTMLPurifier_ChildDef_Optional($inline); - $this->block = new HTMLPurifier_ChildDef_Optional($block); - $this->elements = $this->block->elements; - } - - /** - * @param HTMLPurifier_Node[] $children - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool - */ - public function validateChildren($children, $config, $context) - { - if ($context->get('IsInline') === false) { - return $this->block->validateChildren( - $children, - $config, - $context - ); - } else { - return $this->inline->validateChildren( - $children, - $config, - $context - ); - } - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ChildDef/Custom.php b/library/HTMLPurifier/ChildDef/Custom.php deleted file mode 100644 index 128132e96..000000000 --- a/library/HTMLPurifier/ChildDef/Custom.php +++ /dev/null @@ -1,102 +0,0 @@ -<?php - -/** - * Custom validation class, accepts DTD child definitions - * - * @warning Currently this class is an all or nothing proposition, that is, - * it will only give a bool return value. - */ -class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef -{ - /** - * @type string - */ - public $type = 'custom'; - - /** - * @type bool - */ - public $allow_empty = false; - - /** - * Allowed child pattern as defined by the DTD. - * @type string - */ - public $dtd_regex; - - /** - * PCRE regex derived from $dtd_regex. - * @type string - */ - private $_pcre_regex; - - /** - * @param $dtd_regex Allowed child pattern from the DTD - */ - public function __construct($dtd_regex) - { - $this->dtd_regex = $dtd_regex; - $this->_compileRegex(); - } - - /** - * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex) - */ - protected function _compileRegex() - { - $raw = str_replace(' ', '', $this->dtd_regex); - if ($raw{0} != '(') { - $raw = "($raw)"; - } - $el = '[#a-zA-Z0-9_.-]+'; - $reg = $raw; - - // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M - // DOING! Seriously: if there's problems, please report them. - - // collect all elements into the $elements array - preg_match_all("/$el/", $reg, $matches); - foreach ($matches[0] as $match) { - $this->elements[$match] = true; - } - - // setup all elements as parentheticals with leading commas - $reg = preg_replace("/$el/", '(,\\0)', $reg); - - // remove commas when they were not solicited - $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg); - - // remove all non-paranthetical commas: they are handled by first regex - $reg = preg_replace("/,\(/", '(', $reg); - - $this->_pcre_regex = $reg; - } - - /** - * @param HTMLPurifier_Node[] $children - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool - */ - public function validateChildren($children, $config, $context) - { - $list_of_children = ''; - $nesting = 0; // depth into the nest - foreach ($children as $node) { - if (!empty($node->is_whitespace)) { - continue; - } - $list_of_children .= $node->name . ','; - } - // add leading comma to deal with stray comma declarations - $list_of_children = ',' . rtrim($list_of_children, ','); - $okay = - preg_match( - '/^,?' . $this->_pcre_regex . '$/', - $list_of_children - ); - return (bool)$okay; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ChildDef/Empty.php b/library/HTMLPurifier/ChildDef/Empty.php deleted file mode 100644 index a8a6cbdd2..000000000 --- a/library/HTMLPurifier/ChildDef/Empty.php +++ /dev/null @@ -1,38 +0,0 @@ -<?php - -/** - * Definition that disallows all elements. - * @warning validateChildren() in this class is actually never called, because - * empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed - * before child definitions are parsed in earnest by - * HTMLPurifier_Strategy_FixNesting. - */ -class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef -{ - /** - * @type bool - */ - public $allow_empty = true; - - /** - * @type string - */ - public $type = 'empty'; - - public function __construct() - { - } - - /** - * @param HTMLPurifier_Node[] $children - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return array - */ - public function validateChildren($children, $config, $context) - { - return array(); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ChildDef/List.php b/library/HTMLPurifier/ChildDef/List.php deleted file mode 100644 index 891b9f6f5..000000000 --- a/library/HTMLPurifier/ChildDef/List.php +++ /dev/null @@ -1,86 +0,0 @@ -<?php - -/** - * Definition for list containers ul and ol. - * - * What does this do? The big thing is to handle ol/ul at the top - * level of list nodes, which should be handled specially by /folding/ - * them into the previous list node. We generally shouldn't ever - * see other disallowed elements, because the autoclose behavior - * in MakeWellFormed handles it. - */ -class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef -{ - /** - * @type string - */ - public $type = 'list'; - /** - * @type array - */ - // lying a little bit, so that we can handle ul and ol ourselves - // XXX: This whole business with 'wrap' is all a bit unsatisfactory - public $elements = array('li' => true, 'ul' => true, 'ol' => true); - - /** - * @param array $children - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return array - */ - public function validateChildren($children, $config, $context) - { - // Flag for subclasses - $this->whitespace = false; - - // if there are no tokens, delete parent node - if (empty($children)) { - return false; - } - - // the new set of children - $result = array(); - - // a little sanity check to make sure it's not ALL whitespace - $all_whitespace = true; - - $current_li = false; - - foreach ($children as $node) { - if (!empty($node->is_whitespace)) { - $result[] = $node; - continue; - } - $all_whitespace = false; // phew, we're not talking about whitespace - - if ($node->name === 'li') { - // good - $current_li = $node; - $result[] = $node; - } else { - // we want to tuck this into the previous li - // Invariant: we expect the node to be ol/ul - // ToDo: Make this more robust in the case of not ol/ul - // by distinguishing between existing li and li created - // to handle non-list elements; non-list elements should - // not be appended to an existing li; only li created - // for non-list. This distinction is not currently made. - if ($current_li === false) { - $current_li = new HTMLPurifier_Node_Element('li'); - $result[] = $current_li; - } - $current_li->children[] = $node; - $current_li->empty = false; // XXX fascinating! Check for this error elsewhere ToDo - } - } - if (empty($result)) { - return false; - } - if ($all_whitespace) { - return false; - } - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ChildDef/Optional.php b/library/HTMLPurifier/ChildDef/Optional.php deleted file mode 100644 index b9468063b..000000000 --- a/library/HTMLPurifier/ChildDef/Optional.php +++ /dev/null @@ -1,45 +0,0 @@ -<?php - -/** - * Definition that allows a set of elements, and allows no children. - * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required, - * really, one shouldn't inherit from the other. Only altered behavior - * is to overload a returned false with an array. Thus, it will never - * return false. - */ -class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required -{ - /** - * @type bool - */ - public $allow_empty = true; - - /** - * @type string - */ - public $type = 'optional'; - - /** - * @param array $children - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return array - */ - public function validateChildren($children, $config, $context) - { - $result = parent::validateChildren($children, $config, $context); - // we assume that $children is not modified - if ($result === false) { - if (empty($children)) { - return true; - } elseif ($this->whitespace) { - return $children; - } else { - return array(); - } - } - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ChildDef/Required.php b/library/HTMLPurifier/ChildDef/Required.php deleted file mode 100644 index 0d1c8f5f3..000000000 --- a/library/HTMLPurifier/ChildDef/Required.php +++ /dev/null @@ -1,118 +0,0 @@ -<?php - -/** - * Definition that allows a set of elements, but disallows empty children. - */ -class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef -{ - /** - * Lookup table of allowed elements. - * @type array - */ - public $elements = array(); - - /** - * Whether or not the last passed node was all whitespace. - * @type bool - */ - protected $whitespace = false; - - /** - * @param array|string $elements List of allowed element names (lowercase). - */ - public function __construct($elements) - { - if (is_string($elements)) { - $elements = str_replace(' ', '', $elements); - $elements = explode('|', $elements); - } - $keys = array_keys($elements); - if ($keys == array_keys($keys)) { - $elements = array_flip($elements); - foreach ($elements as $i => $x) { - $elements[$i] = true; - if (empty($i)) { - unset($elements[$i]); - } // remove blank - } - } - $this->elements = $elements; - } - - /** - * @type bool - */ - public $allow_empty = false; - - /** - * @type string - */ - public $type = 'required'; - - /** - * @param array $children - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return array - */ - public function validateChildren($children, $config, $context) - { - // Flag for subclasses - $this->whitespace = false; - - // if there are no tokens, delete parent node - if (empty($children)) { - return false; - } - - // the new set of children - $result = array(); - - // whether or not parsed character data is allowed - // this controls whether or not we silently drop a tag - // or generate escaped HTML from it - $pcdata_allowed = isset($this->elements['#PCDATA']); - - // a little sanity check to make sure it's not ALL whitespace - $all_whitespace = true; - - $stack = array_reverse($children); - while (!empty($stack)) { - $node = array_pop($stack); - if (!empty($node->is_whitespace)) { - $result[] = $node; - continue; - } - $all_whitespace = false; // phew, we're not talking about whitespace - - if (!isset($this->elements[$node->name])) { - // special case text - // XXX One of these ought to be redundant or something - if ($pcdata_allowed && $node instanceof HTMLPurifier_Node_Text) { - $result[] = $node; - continue; - } - // spill the child contents in - // ToDo: Make configurable - if ($node instanceof HTMLPurifier_Node_Element) { - for ($i = count($node->children) - 1; $i >= 0; $i--) { - $stack[] = $node->children[$i]; - } - continue; - } - continue; - } - $result[] = $node; - } - if (empty($result)) { - return false; - } - if ($all_whitespace) { - $this->whitespace = true; - return false; - } - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/library/HTMLPurifier/ChildDef/StrictBlockquote.php deleted file mode 100644 index 3270a46e1..000000000 --- a/library/HTMLPurifier/ChildDef/StrictBlockquote.php +++ /dev/null @@ -1,110 +0,0 @@ -<?php - -/** - * Takes the contents of blockquote when in strict and reformats for validation. - */ -class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required -{ - /** - * @type array - */ - protected $real_elements; - - /** - * @type array - */ - protected $fake_elements; - - /** - * @type bool - */ - public $allow_empty = true; - - /** - * @type string - */ - public $type = 'strictblockquote'; - - /** - * @type bool - */ - protected $init = false; - - /** - * @param HTMLPurifier_Config $config - * @return array - * @note We don't want MakeWellFormed to auto-close inline elements since - * they might be allowed. - */ - public function getAllowedElements($config) - { - $this->init($config); - return $this->fake_elements; - } - - /** - * @param array $children - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return array - */ - public function validateChildren($children, $config, $context) - { - $this->init($config); - - // trick the parent class into thinking it allows more - $this->elements = $this->fake_elements; - $result = parent::validateChildren($children, $config, $context); - $this->elements = $this->real_elements; - - if ($result === false) { - return array(); - } - if ($result === true) { - $result = $children; - } - - $def = $config->getHTMLDefinition(); - $block_wrap_name = $def->info_block_wrapper; - $block_wrap = false; - $ret = array(); - - foreach ($result as $node) { - if ($block_wrap === false) { - if (($node instanceof HTMLPurifier_Node_Text && !$node->is_whitespace) || - ($node instanceof HTMLPurifier_Node_Element && !isset($this->elements[$node->name]))) { - $block_wrap = new HTMLPurifier_Node_Element($def->info_block_wrapper); - $ret[] = $block_wrap; - } - } else { - if ($node instanceof HTMLPurifier_Node_Element && isset($this->elements[$node->name])) { - $block_wrap = false; - - } - } - if ($block_wrap) { - $block_wrap->children[] = $node; - } else { - $ret[] = $node; - } - } - return $ret; - } - - /** - * @param HTMLPurifier_Config $config - */ - private function init($config) - { - if (!$this->init) { - $def = $config->getHTMLDefinition(); - // allow all inline elements - $this->real_elements = $this->elements; - $this->fake_elements = $def->info_content_sets['Flow']; - $this->fake_elements['#PCDATA'] = true; - $this->init = true; - } - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ChildDef/Table.php b/library/HTMLPurifier/ChildDef/Table.php deleted file mode 100644 index 3e4a0f218..000000000 --- a/library/HTMLPurifier/ChildDef/Table.php +++ /dev/null @@ -1,224 +0,0 @@ -<?php - -/** - * Definition for tables. The general idea is to extract out all of the - * essential bits, and then reconstruct it later. - * - * This is a bit confusing, because the DTDs and the W3C - * validators seem to disagree on the appropriate definition. The - * DTD claims: - * - * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+) - * - * But actually, the HTML4 spec then has this to say: - * - * The TBODY start tag is always required except when the table - * contains only one table body and no table head or foot sections. - * The TBODY end tag may always be safely omitted. - * - * So the DTD is kind of wrong. The validator is, unfortunately, kind - * of on crack. - * - * The definition changed again in XHTML1.1; and in my opinion, this - * formulation makes the most sense. - * - * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ )) - * - * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode. - * If we encounter a thead, tfoot or tbody, we are placed in the former - * mode, and we *must* wrap any stray tr segments with a tbody. But if - * we don't run into any of them, just have tr tags is OK. - */ -class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef -{ - /** - * @type bool - */ - public $allow_empty = false; - - /** - * @type string - */ - public $type = 'table'; - - /** - * @type array - */ - public $elements = array( - 'tr' => true, - 'tbody' => true, - 'thead' => true, - 'tfoot' => true, - 'caption' => true, - 'colgroup' => true, - 'col' => true - ); - - public function __construct() - { - } - - /** - * @param array $children - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return array - */ - public function validateChildren($children, $config, $context) - { - if (empty($children)) { - return false; - } - - // only one of these elements is allowed in a table - $caption = false; - $thead = false; - $tfoot = false; - - // whitespace - $initial_ws = array(); - $after_caption_ws = array(); - $after_thead_ws = array(); - $after_tfoot_ws = array(); - - // as many of these as you want - $cols = array(); - $content = array(); - - $tbody_mode = false; // if true, then we need to wrap any stray - // <tr>s with a <tbody>. - - $ws_accum =& $initial_ws; - - foreach ($children as $node) { - if ($node instanceof HTMLPurifier_Node_Comment) { - $ws_accum[] = $node; - continue; - } - switch ($node->name) { - case 'tbody': - $tbody_mode = true; - // fall through - case 'tr': - $content[] = $node; - $ws_accum =& $content; - break; - case 'caption': - // there can only be one caption! - if ($caption !== false) break; - $caption = $node; - $ws_accum =& $after_caption_ws; - break; - case 'thead': - $tbody_mode = true; - // XXX This breaks rendering properties with - // Firefox, which never floats a <thead> to - // the top. Ever. (Our scheme will float the - // first <thead> to the top.) So maybe - // <thead>s that are not first should be - // turned into <tbody>? Very tricky, indeed. - if ($thead === false) { - $thead = $node; - $ws_accum =& $after_thead_ws; - } else { - // Oops, there's a second one! What - // should we do? Current behavior is to - // transmutate the first and last entries into - // tbody tags, and then put into content. - // Maybe a better idea is to *attach - // it* to the existing thead or tfoot? - // We don't do this, because Firefox - // doesn't float an extra tfoot to the - // bottom like it does for the first one. - $node->name = 'tbody'; - $content[] = $node; - $ws_accum =& $content; - } - break; - case 'tfoot': - // see above for some aveats - $tbody_mode = true; - if ($tfoot === false) { - $tfoot = $node; - $ws_accum =& $after_tfoot_ws; - } else { - $node->name = 'tbody'; - $content[] = $node; - $ws_accum =& $content; - } - break; - case 'colgroup': - case 'col': - $cols[] = $node; - $ws_accum =& $cols; - break; - case '#PCDATA': - // How is whitespace handled? We treat is as sticky to - // the *end* of the previous element. So all of the - // nonsense we have worked on is to keep things - // together. - if (!empty($node->is_whitespace)) { - $ws_accum[] = $node; - } - break; - } - } - - if (empty($content)) { - return false; - } - - $ret = $initial_ws; - if ($caption !== false) { - $ret[] = $caption; - $ret = array_merge($ret, $after_caption_ws); - } - if ($cols !== false) { - $ret = array_merge($ret, $cols); - } - if ($thead !== false) { - $ret[] = $thead; - $ret = array_merge($ret, $after_thead_ws); - } - if ($tfoot !== false) { - $ret[] = $tfoot; - $ret = array_merge($ret, $after_tfoot_ws); - } - - if ($tbody_mode) { - // we have to shuffle tr into tbody - $current_tr_tbody = null; - - foreach($content as $node) { - switch ($node->name) { - case 'tbody': - $current_tr_tbody = null; - $ret[] = $node; - break; - case 'tr': - if ($current_tr_tbody === null) { - $current_tr_tbody = new HTMLPurifier_Node_Element('tbody'); - $ret[] = $current_tr_tbody; - } - $current_tr_tbody->children[] = $node; - break; - case '#PCDATA': - assert($node->is_whitespace); - if ($current_tr_tbody === null) { - $ret[] = $node; - } else { - $current_tr_tbody->children[] = $node; - } - break; - } - } - } else { - $ret = array_merge($ret, $content); - } - - return $ret; - - } -} - -// vim: et sw=4 sts=4 |