diff options
Diffstat (limited to 'vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef')
8 files changed, 796 insertions, 0 deletions
diff --git a/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php new file mode 100644 index 000000000..7439be26b --- /dev/null +++ b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php @@ -0,0 +1,67 @@ +<?php + +/** + * Definition that uses different definitions depending on context. + * + * The del and ins tags are notable because they allow different types of + * elements depending on whether or not they're in a block or inline context. + * Chameleon allows this behavior to happen by using two different + * definitions depending on context. While this somewhat generalized, + * it is specifically intended for those two tags. + */ +class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef +{ + + /** + * Instance of the definition object to use when inline. Usually stricter. + * @type HTMLPurifier_ChildDef_Optional + */ + public $inline; + + /** + * Instance of the definition object to use when block. + * @type HTMLPurifier_ChildDef_Optional + */ + public $block; + + /** + * @type string + */ + public $type = 'chameleon'; + + /** + * @param array $inline List of elements to allow when inline. + * @param array $block List of elements to allow when block. + */ + public function __construct($inline, $block) + { + $this->inline = new HTMLPurifier_ChildDef_Optional($inline); + $this->block = new HTMLPurifier_ChildDef_Optional($block); + $this->elements = $this->block->elements; + } + + /** + * @param HTMLPurifier_Node[] $children + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return bool + */ + public function validateChildren($children, $config, $context) + { + if ($context->get('IsInline') === false) { + return $this->block->validateChildren( + $children, + $config, + $context + ); + } else { + return $this->inline->validateChildren( + $children, + $config, + $context + ); + } + } +} + +// vim: et sw=4 sts=4 diff --git a/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php new file mode 100644 index 000000000..128132e96 --- /dev/null +++ b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php @@ -0,0 +1,102 @@ +<?php + +/** + * Custom validation class, accepts DTD child definitions + * + * @warning Currently this class is an all or nothing proposition, that is, + * it will only give a bool return value. + */ +class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef +{ + /** + * @type string + */ + public $type = 'custom'; + + /** + * @type bool + */ + public $allow_empty = false; + + /** + * Allowed child pattern as defined by the DTD. + * @type string + */ + public $dtd_regex; + + /** + * PCRE regex derived from $dtd_regex. + * @type string + */ + private $_pcre_regex; + + /** + * @param $dtd_regex Allowed child pattern from the DTD + */ + public function __construct($dtd_regex) + { + $this->dtd_regex = $dtd_regex; + $this->_compileRegex(); + } + + /** + * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex) + */ + protected function _compileRegex() + { + $raw = str_replace(' ', '', $this->dtd_regex); + if ($raw{0} != '(') { + $raw = "($raw)"; + } + $el = '[#a-zA-Z0-9_.-]+'; + $reg = $raw; + + // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M + // DOING! Seriously: if there's problems, please report them. + + // collect all elements into the $elements array + preg_match_all("/$el/", $reg, $matches); + foreach ($matches[0] as $match) { + $this->elements[$match] = true; + } + + // setup all elements as parentheticals with leading commas + $reg = preg_replace("/$el/", '(,\\0)', $reg); + + // remove commas when they were not solicited + $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg); + + // remove all non-paranthetical commas: they are handled by first regex + $reg = preg_replace("/,\(/", '(', $reg); + + $this->_pcre_regex = $reg; + } + + /** + * @param HTMLPurifier_Node[] $children + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return bool + */ + public function validateChildren($children, $config, $context) + { + $list_of_children = ''; + $nesting = 0; // depth into the nest + foreach ($children as $node) { + if (!empty($node->is_whitespace)) { + continue; + } + $list_of_children .= $node->name . ','; + } + // add leading comma to deal with stray comma declarations + $list_of_children = ',' . rtrim($list_of_children, ','); + $okay = + preg_match( + '/^,?' . $this->_pcre_regex . '$/', + $list_of_children + ); + return (bool)$okay; + } +} + +// vim: et sw=4 sts=4 diff --git a/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php new file mode 100644 index 000000000..a8a6cbdd2 --- /dev/null +++ b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php @@ -0,0 +1,38 @@ +<?php + +/** + * Definition that disallows all elements. + * @warning validateChildren() in this class is actually never called, because + * empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed + * before child definitions are parsed in earnest by + * HTMLPurifier_Strategy_FixNesting. + */ +class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef +{ + /** + * @type bool + */ + public $allow_empty = true; + + /** + * @type string + */ + public $type = 'empty'; + + public function __construct() + { + } + + /** + * @param HTMLPurifier_Node[] $children + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return array + */ + public function validateChildren($children, $config, $context) + { + return array(); + } +} + +// vim: et sw=4 sts=4 diff --git a/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/List.php b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/List.php new file mode 100644 index 000000000..5a53a4b49 --- /dev/null +++ b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/List.php @@ -0,0 +1,92 @@ +<?php + +/** + * Definition for list containers ul and ol. + * + * What does this do? The big thing is to handle ol/ul at the top + * level of list nodes, which should be handled specially by /folding/ + * them into the previous list node. We generally shouldn't ever + * see other disallowed elements, because the autoclose behavior + * in MakeWellFormed handles it. + */ +class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef +{ + /** + * @type string + */ + public $type = 'list'; + /** + * @type array + */ + // lying a little bit, so that we can handle ul and ol ourselves + // XXX: This whole business with 'wrap' is all a bit unsatisfactory + public $elements = array('li' => true, 'ul' => true, 'ol' => true); + + /** + * @param array $children + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return array + */ + public function validateChildren($children, $config, $context) + { + // Flag for subclasses + $this->whitespace = false; + + // if there are no tokens, delete parent node + if (empty($children)) { + return false; + } + + // if li is not allowed, delete parent node + if (!isset($config->getHTMLDefinition()->info['li'])) { + trigger_error("Cannot allow ul/ol without allowing li", E_USER_WARNING); + return false; + } + + // the new set of children + $result = array(); + + // a little sanity check to make sure it's not ALL whitespace + $all_whitespace = true; + + $current_li = false; + + foreach ($children as $node) { + if (!empty($node->is_whitespace)) { + $result[] = $node; + continue; + } + $all_whitespace = false; // phew, we're not talking about whitespace + + if ($node->name === 'li') { + // good + $current_li = $node; + $result[] = $node; + } else { + // we want to tuck this into the previous li + // Invariant: we expect the node to be ol/ul + // ToDo: Make this more robust in the case of not ol/ul + // by distinguishing between existing li and li created + // to handle non-list elements; non-list elements should + // not be appended to an existing li; only li created + // for non-list. This distinction is not currently made. + if ($current_li === false) { + $current_li = new HTMLPurifier_Node_Element('li'); + $result[] = $current_li; + } + $current_li->children[] = $node; + $current_li->empty = false; // XXX fascinating! Check for this error elsewhere ToDo + } + } + if (empty($result)) { + return false; + } + if ($all_whitespace) { + return false; + } + return $result; + } +} + +// vim: et sw=4 sts=4 diff --git a/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php new file mode 100644 index 000000000..b9468063b --- /dev/null +++ b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php @@ -0,0 +1,45 @@ +<?php + +/** + * Definition that allows a set of elements, and allows no children. + * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required, + * really, one shouldn't inherit from the other. Only altered behavior + * is to overload a returned false with an array. Thus, it will never + * return false. + */ +class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required +{ + /** + * @type bool + */ + public $allow_empty = true; + + /** + * @type string + */ + public $type = 'optional'; + + /** + * @param array $children + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return array + */ + public function validateChildren($children, $config, $context) + { + $result = parent::validateChildren($children, $config, $context); + // we assume that $children is not modified + if ($result === false) { + if (empty($children)) { + return true; + } elseif ($this->whitespace) { + return $children; + } else { + return array(); + } + } + return $result; + } +} + +// vim: et sw=4 sts=4 diff --git a/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php new file mode 100644 index 000000000..0d1c8f5f3 --- /dev/null +++ b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php @@ -0,0 +1,118 @@ +<?php + +/** + * Definition that allows a set of elements, but disallows empty children. + */ +class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef +{ + /** + * Lookup table of allowed elements. + * @type array + */ + public $elements = array(); + + /** + * Whether or not the last passed node was all whitespace. + * @type bool + */ + protected $whitespace = false; + + /** + * @param array|string $elements List of allowed element names (lowercase). + */ + public function __construct($elements) + { + if (is_string($elements)) { + $elements = str_replace(' ', '', $elements); + $elements = explode('|', $elements); + } + $keys = array_keys($elements); + if ($keys == array_keys($keys)) { + $elements = array_flip($elements); + foreach ($elements as $i => $x) { + $elements[$i] = true; + if (empty($i)) { + unset($elements[$i]); + } // remove blank + } + } + $this->elements = $elements; + } + + /** + * @type bool + */ + public $allow_empty = false; + + /** + * @type string + */ + public $type = 'required'; + + /** + * @param array $children + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return array + */ + public function validateChildren($children, $config, $context) + { + // Flag for subclasses + $this->whitespace = false; + + // if there are no tokens, delete parent node + if (empty($children)) { + return false; + } + + // the new set of children + $result = array(); + + // whether or not parsed character data is allowed + // this controls whether or not we silently drop a tag + // or generate escaped HTML from it + $pcdata_allowed = isset($this->elements['#PCDATA']); + + // a little sanity check to make sure it's not ALL whitespace + $all_whitespace = true; + + $stack = array_reverse($children); + while (!empty($stack)) { + $node = array_pop($stack); + if (!empty($node->is_whitespace)) { + $result[] = $node; + continue; + } + $all_whitespace = false; // phew, we're not talking about whitespace + + if (!isset($this->elements[$node->name])) { + // special case text + // XXX One of these ought to be redundant or something + if ($pcdata_allowed && $node instanceof HTMLPurifier_Node_Text) { + $result[] = $node; + continue; + } + // spill the child contents in + // ToDo: Make configurable + if ($node instanceof HTMLPurifier_Node_Element) { + for ($i = count($node->children) - 1; $i >= 0; $i--) { + $stack[] = $node->children[$i]; + } + continue; + } + continue; + } + $result[] = $node; + } + if (empty($result)) { + return false; + } + if ($all_whitespace) { + $this->whitespace = true; + return false; + } + return $result; + } +} + +// vim: et sw=4 sts=4 diff --git a/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php new file mode 100644 index 000000000..3270a46e1 --- /dev/null +++ b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php @@ -0,0 +1,110 @@ +<?php + +/** + * Takes the contents of blockquote when in strict and reformats for validation. + */ +class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required +{ + /** + * @type array + */ + protected $real_elements; + + /** + * @type array + */ + protected $fake_elements; + + /** + * @type bool + */ + public $allow_empty = true; + + /** + * @type string + */ + public $type = 'strictblockquote'; + + /** + * @type bool + */ + protected $init = false; + + /** + * @param HTMLPurifier_Config $config + * @return array + * @note We don't want MakeWellFormed to auto-close inline elements since + * they might be allowed. + */ + public function getAllowedElements($config) + { + $this->init($config); + return $this->fake_elements; + } + + /** + * @param array $children + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return array + */ + public function validateChildren($children, $config, $context) + { + $this->init($config); + + // trick the parent class into thinking it allows more + $this->elements = $this->fake_elements; + $result = parent::validateChildren($children, $config, $context); + $this->elements = $this->real_elements; + + if ($result === false) { + return array(); + } + if ($result === true) { + $result = $children; + } + + $def = $config->getHTMLDefinition(); + $block_wrap_name = $def->info_block_wrapper; + $block_wrap = false; + $ret = array(); + + foreach ($result as $node) { + if ($block_wrap === false) { + if (($node instanceof HTMLPurifier_Node_Text && !$node->is_whitespace) || + ($node instanceof HTMLPurifier_Node_Element && !isset($this->elements[$node->name]))) { + $block_wrap = new HTMLPurifier_Node_Element($def->info_block_wrapper); + $ret[] = $block_wrap; + } + } else { + if ($node instanceof HTMLPurifier_Node_Element && isset($this->elements[$node->name])) { + $block_wrap = false; + + } + } + if ($block_wrap) { + $block_wrap->children[] = $node; + } else { + $ret[] = $node; + } + } + return $ret; + } + + /** + * @param HTMLPurifier_Config $config + */ + private function init($config) + { + if (!$this->init) { + $def = $config->getHTMLDefinition(); + // allow all inline elements + $this->real_elements = $this->elements; + $this->fake_elements = $def->info_content_sets['Flow']; + $this->fake_elements['#PCDATA'] = true; + $this->init = true; + } + } +} + +// vim: et sw=4 sts=4 diff --git a/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php new file mode 100644 index 000000000..cb6b3e6cd --- /dev/null +++ b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php @@ -0,0 +1,224 @@ +<?php + +/** + * Definition for tables. The general idea is to extract out all of the + * essential bits, and then reconstruct it later. + * + * This is a bit confusing, because the DTDs and the W3C + * validators seem to disagree on the appropriate definition. The + * DTD claims: + * + * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+) + * + * But actually, the HTML4 spec then has this to say: + * + * The TBODY start tag is always required except when the table + * contains only one table body and no table head or foot sections. + * The TBODY end tag may always be safely omitted. + * + * So the DTD is kind of wrong. The validator is, unfortunately, kind + * of on crack. + * + * The definition changed again in XHTML1.1; and in my opinion, this + * formulation makes the most sense. + * + * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ )) + * + * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode. + * If we encounter a thead, tfoot or tbody, we are placed in the former + * mode, and we *must* wrap any stray tr segments with a tbody. But if + * we don't run into any of them, just have tr tags is OK. + */ +class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef +{ + /** + * @type bool + */ + public $allow_empty = false; + + /** + * @type string + */ + public $type = 'table'; + + /** + * @type array + */ + public $elements = array( + 'tr' => true, + 'tbody' => true, + 'thead' => true, + 'tfoot' => true, + 'caption' => true, + 'colgroup' => true, + 'col' => true + ); + + public function __construct() + { + } + + /** + * @param array $children + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return array + */ + public function validateChildren($children, $config, $context) + { + if (empty($children)) { + return false; + } + + // only one of these elements is allowed in a table + $caption = false; + $thead = false; + $tfoot = false; + + // whitespace + $initial_ws = array(); + $after_caption_ws = array(); + $after_thead_ws = array(); + $after_tfoot_ws = array(); + + // as many of these as you want + $cols = array(); + $content = array(); + + $tbody_mode = false; // if true, then we need to wrap any stray + // <tr>s with a <tbody>. + + $ws_accum =& $initial_ws; + + foreach ($children as $node) { + if ($node instanceof HTMLPurifier_Node_Comment) { + $ws_accum[] = $node; + continue; + } + switch ($node->name) { + case 'tbody': + $tbody_mode = true; + // fall through + case 'tr': + $content[] = $node; + $ws_accum =& $content; + break; + case 'caption': + // there can only be one caption! + if ($caption !== false) break; + $caption = $node; + $ws_accum =& $after_caption_ws; + break; + case 'thead': + $tbody_mode = true; + // XXX This breaks rendering properties with + // Firefox, which never floats a <thead> to + // the top. Ever. (Our scheme will float the + // first <thead> to the top.) So maybe + // <thead>s that are not first should be + // turned into <tbody>? Very tricky, indeed. + if ($thead === false) { + $thead = $node; + $ws_accum =& $after_thead_ws; + } else { + // Oops, there's a second one! What + // should we do? Current behavior is to + // transmutate the first and last entries into + // tbody tags, and then put into content. + // Maybe a better idea is to *attach + // it* to the existing thead or tfoot? + // We don't do this, because Firefox + // doesn't float an extra tfoot to the + // bottom like it does for the first one. + $node->name = 'tbody'; + $content[] = $node; + $ws_accum =& $content; + } + break; + case 'tfoot': + // see above for some aveats + $tbody_mode = true; + if ($tfoot === false) { + $tfoot = $node; + $ws_accum =& $after_tfoot_ws; + } else { + $node->name = 'tbody'; + $content[] = $node; + $ws_accum =& $content; + } + break; + case 'colgroup': + case 'col': + $cols[] = $node; + $ws_accum =& $cols; + break; + case '#PCDATA': + // How is whitespace handled? We treat is as sticky to + // the *end* of the previous element. So all of the + // nonsense we have worked on is to keep things + // together. + if (!empty($node->is_whitespace)) { + $ws_accum[] = $node; + } + break; + } + } + + if (empty($content)) { + return false; + } + + $ret = $initial_ws; + if ($caption !== false) { + $ret[] = $caption; + $ret = array_merge($ret, $after_caption_ws); + } + if ($cols !== false) { + $ret = array_merge($ret, $cols); + } + if ($thead !== false) { + $ret[] = $thead; + $ret = array_merge($ret, $after_thead_ws); + } + if ($tfoot !== false) { + $ret[] = $tfoot; + $ret = array_merge($ret, $after_tfoot_ws); + } + + if ($tbody_mode) { + // we have to shuffle tr into tbody + $current_tr_tbody = null; + + foreach($content as $node) { + switch ($node->name) { + case 'tbody': + $current_tr_tbody = null; + $ret[] = $node; + break; + case 'tr': + if ($current_tr_tbody === null) { + $current_tr_tbody = new HTMLPurifier_Node_Element('tbody'); + $ret[] = $current_tr_tbody; + } + $current_tr_tbody->children[] = $node; + break; + case '#PCDATA': + //assert($node->is_whitespace); + if ($current_tr_tbody === null) { + $ret[] = $node; + } else { + $current_tr_tbody->children[] = $node; + } + break; + } + } + } else { + $ret = array_merge($ret, $content); + } + + return $ret; + + } +} + +// vim: et sw=4 sts=4 |