aboutsummaryrefslogtreecommitdiffstats
path: root/library/HTMLPurifier/ChildDef
diff options
context:
space:
mode:
Diffstat (limited to 'library/HTMLPurifier/ChildDef')
-rw-r--r--library/HTMLPurifier/ChildDef/Chameleon.php33
-rw-r--r--library/HTMLPurifier/ChildDef/Custom.php56
-rw-r--r--library/HTMLPurifier/ChildDef/Empty.php22
-rw-r--r--library/HTMLPurifier/ChildDef/List.php86
-rw-r--r--library/HTMLPurifier/ChildDef/Optional.php31
-rw-r--r--library/HTMLPurifier/ChildDef/Required.php103
-rw-r--r--library/HTMLPurifier/ChildDef/StrictBlockquote.php96
-rw-r--r--library/HTMLPurifier/ChildDef/Table.php302
8 files changed, 494 insertions, 235 deletions
diff --git a/library/HTMLPurifier/ChildDef/Chameleon.php b/library/HTMLPurifier/ChildDef/Chameleon.php
index 15c364ee3..7439be26b 100644
--- a/library/HTMLPurifier/ChildDef/Chameleon.php
+++ b/library/HTMLPurifier/ChildDef/Chameleon.php
@@ -14,33 +14,52 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
/**
* Instance of the definition object to use when inline. Usually stricter.
+ * @type HTMLPurifier_ChildDef_Optional
*/
public $inline;
/**
* Instance of the definition object to use when block.
+ * @type HTMLPurifier_ChildDef_Optional
*/
public $block;
+ /**
+ * @type string
+ */
public $type = 'chameleon';
/**
- * @param $inline List of elements to allow when inline.
- * @param $block List of elements to allow when block.
+ * @param array $inline List of elements to allow when inline.
+ * @param array $block List of elements to allow when block.
*/
- public function __construct($inline, $block) {
+ public function __construct($inline, $block)
+ {
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
- $this->block = new HTMLPurifier_ChildDef_Optional($block);
+ $this->block = new HTMLPurifier_ChildDef_Optional($block);
$this->elements = $this->block->elements;
}
- public function validateChildren($tokens_of_children, $config, $context) {
+ /**
+ * @param HTMLPurifier_Node[] $children
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function validateChildren($children, $config, $context)
+ {
if ($context->get('IsInline') === false) {
return $this->block->validateChildren(
- $tokens_of_children, $config, $context);
+ $children,
+ $config,
+ $context
+ );
} else {
return $this->inline->validateChildren(
- $tokens_of_children, $config, $context);
+ $children,
+ $config,
+ $context
+ );
}
}
}
diff --git a/library/HTMLPurifier/ChildDef/Custom.php b/library/HTMLPurifier/ChildDef/Custom.php
index b68047b4b..128132e96 100644
--- a/library/HTMLPurifier/ChildDef/Custom.php
+++ b/library/HTMLPurifier/ChildDef/Custom.php
@@ -8,28 +8,42 @@
*/
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
{
+ /**
+ * @type string
+ */
public $type = 'custom';
+
+ /**
+ * @type bool
+ */
public $allow_empty = false;
+
/**
- * Allowed child pattern as defined by the DTD
+ * Allowed child pattern as defined by the DTD.
+ * @type string
*/
public $dtd_regex;
+
/**
- * PCRE regex derived from $dtd_regex
- * @private
+ * PCRE regex derived from $dtd_regex.
+ * @type string
*/
private $_pcre_regex;
+
/**
* @param $dtd_regex Allowed child pattern from the DTD
*/
- public function __construct($dtd_regex) {
+ public function __construct($dtd_regex)
+ {
$this->dtd_regex = $dtd_regex;
$this->_compileRegex();
}
+
/**
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
*/
- protected function _compileRegex() {
+ protected function _compileRegex()
+ {
$raw = str_replace(' ', '', $this->dtd_regex);
if ($raw{0} != '(') {
$raw = "($raw)";
@@ -57,33 +71,31 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
$this->_pcre_regex = $reg;
}
- public function validateChildren($tokens_of_children, $config, $context) {
+
+ /**
+ * @param HTMLPurifier_Node[] $children
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function validateChildren($children, $config, $context)
+ {
$list_of_children = '';
$nesting = 0; // depth into the nest
- foreach ($tokens_of_children as $token) {
- if (!empty($token->is_whitespace)) continue;
-
- $is_child = ($nesting == 0); // direct
-
- if ($token instanceof HTMLPurifier_Token_Start) {
- $nesting++;
- } elseif ($token instanceof HTMLPurifier_Token_End) {
- $nesting--;
- }
-
- if ($is_child) {
- $list_of_children .= $token->name . ',';
+ foreach ($children as $node) {
+ if (!empty($node->is_whitespace)) {
+ continue;
}
+ $list_of_children .= $node->name . ',';
}
// add leading comma to deal with stray comma declarations
$list_of_children = ',' . rtrim($list_of_children, ',');
$okay =
preg_match(
- '/^,?'.$this->_pcre_regex.'$/',
+ '/^,?' . $this->_pcre_regex . '$/',
$list_of_children
);
-
- return (bool) $okay;
+ return (bool)$okay;
}
}
diff --git a/library/HTMLPurifier/ChildDef/Empty.php b/library/HTMLPurifier/ChildDef/Empty.php
index 13171f665..a8a6cbdd2 100644
--- a/library/HTMLPurifier/ChildDef/Empty.php
+++ b/library/HTMLPurifier/ChildDef/Empty.php
@@ -9,10 +9,28 @@
*/
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
{
+ /**
+ * @type bool
+ */
public $allow_empty = true;
+
+ /**
+ * @type string
+ */
public $type = 'empty';
- public function __construct() {}
- public function validateChildren($tokens_of_children, $config, $context) {
+
+ public function __construct()
+ {
+ }
+
+ /**
+ * @param HTMLPurifier_Node[] $children
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return array
+ */
+ public function validateChildren($children, $config, $context)
+ {
return array();
}
}
diff --git a/library/HTMLPurifier/ChildDef/List.php b/library/HTMLPurifier/ChildDef/List.php
new file mode 100644
index 000000000..891b9f6f5
--- /dev/null
+++ b/library/HTMLPurifier/ChildDef/List.php
@@ -0,0 +1,86 @@
+<?php
+
+/**
+ * Definition for list containers ul and ol.
+ *
+ * What does this do? The big thing is to handle ol/ul at the top
+ * level of list nodes, which should be handled specially by /folding/
+ * them into the previous list node. We generally shouldn't ever
+ * see other disallowed elements, because the autoclose behavior
+ * in MakeWellFormed handles it.
+ */
+class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
+{
+ /**
+ * @type string
+ */
+ public $type = 'list';
+ /**
+ * @type array
+ */
+ // lying a little bit, so that we can handle ul and ol ourselves
+ // XXX: This whole business with 'wrap' is all a bit unsatisfactory
+ public $elements = array('li' => true, 'ul' => true, 'ol' => true);
+
+ /**
+ * @param array $children
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return array
+ */
+ public function validateChildren($children, $config, $context)
+ {
+ // Flag for subclasses
+ $this->whitespace = false;
+
+ // if there are no tokens, delete parent node
+ if (empty($children)) {
+ return false;
+ }
+
+ // the new set of children
+ $result = array();
+
+ // a little sanity check to make sure it's not ALL whitespace
+ $all_whitespace = true;
+
+ $current_li = false;
+
+ foreach ($children as $node) {
+ if (!empty($node->is_whitespace)) {
+ $result[] = $node;
+ continue;
+ }
+ $all_whitespace = false; // phew, we're not talking about whitespace
+
+ if ($node->name === 'li') {
+ // good
+ $current_li = $node;
+ $result[] = $node;
+ } else {
+ // we want to tuck this into the previous li
+ // Invariant: we expect the node to be ol/ul
+ // ToDo: Make this more robust in the case of not ol/ul
+ // by distinguishing between existing li and li created
+ // to handle non-list elements; non-list elements should
+ // not be appended to an existing li; only li created
+ // for non-list. This distinction is not currently made.
+ if ($current_li === false) {
+ $current_li = new HTMLPurifier_Node_Element('li');
+ $result[] = $current_li;
+ }
+ $current_li->children[] = $node;
+ $current_li->empty = false; // XXX fascinating! Check for this error elsewhere ToDo
+ }
+ }
+ if (empty($result)) {
+ return false;
+ }
+ if ($all_whitespace) {
+ return false;
+ }
+ return $result;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/ChildDef/Optional.php b/library/HTMLPurifier/ChildDef/Optional.php
index 32bcb9898..b9468063b 100644
--- a/library/HTMLPurifier/ChildDef/Optional.php
+++ b/library/HTMLPurifier/ChildDef/Optional.php
@@ -9,15 +9,34 @@
*/
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
{
+ /**
+ * @type bool
+ */
public $allow_empty = true;
+
+ /**
+ * @type string
+ */
public $type = 'optional';
- public function validateChildren($tokens_of_children, $config, $context) {
- $result = parent::validateChildren($tokens_of_children, $config, $context);
- // we assume that $tokens_of_children is not modified
+
+ /**
+ * @param array $children
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return array
+ */
+ public function validateChildren($children, $config, $context)
+ {
+ $result = parent::validateChildren($children, $config, $context);
+ // we assume that $children is not modified
if ($result === false) {
- if (empty($tokens_of_children)) return true;
- elseif ($this->whitespace) return $tokens_of_children;
- else return array();
+ if (empty($children)) {
+ return true;
+ } elseif ($this->whitespace) {
+ return $children;
+ } else {
+ return array();
+ }
}
return $result;
}
diff --git a/library/HTMLPurifier/ChildDef/Required.php b/library/HTMLPurifier/ChildDef/Required.php
index 4889f249b..0d1c8f5f3 100644
--- a/library/HTMLPurifier/ChildDef/Required.php
+++ b/library/HTMLPurifier/ChildDef/Required.php
@@ -7,17 +7,21 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
{
/**
* Lookup table of allowed elements.
- * @public
+ * @type array
*/
public $elements = array();
+
/**
* Whether or not the last passed node was all whitespace.
+ * @type bool
*/
protected $whitespace = false;
+
/**
- * @param $elements List of allowed element names (lowercase).
+ * @param array|string $elements List of allowed element names (lowercase).
*/
- public function __construct($elements) {
+ public function __construct($elements)
+ {
if (is_string($elements)) {
$elements = str_replace(' ', '', $elements);
$elements = explode('|', $elements);
@@ -27,29 +31,43 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$elements = array_flip($elements);
foreach ($elements as $i => $x) {
$elements[$i] = true;
- if (empty($i)) unset($elements[$i]); // remove blank
+ if (empty($i)) {
+ unset($elements[$i]);
+ } // remove blank
}
}
$this->elements = $elements;
}
+
+ /**
+ * @type bool
+ */
public $allow_empty = false;
+
+ /**
+ * @type string
+ */
public $type = 'required';
- public function validateChildren($tokens_of_children, $config, $context) {
+
+ /**
+ * @param array $children
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return array
+ */
+ public function validateChildren($children, $config, $context)
+ {
// Flag for subclasses
$this->whitespace = false;
// if there are no tokens, delete parent node
- if (empty($tokens_of_children)) return false;
+ if (empty($children)) {
+ return false;
+ }
// the new set of children
$result = array();
- // current depth into the nest
- $nesting = 0;
-
- // whether or not we're deleting a node
- $is_deleting = false;
-
// whether or not parsed character data is allowed
// this controls whether or not we silently drop a tag
// or generate escaped HTML from it
@@ -58,58 +76,41 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
- // some configuration
- $escape_invalid_children = $config->get('Core.EscapeInvalidChildren');
-
- // generator
- $gen = new HTMLPurifier_Generator($config, $context);
-
- foreach ($tokens_of_children as $token) {
- if (!empty($token->is_whitespace)) {
- $result[] = $token;
+ $stack = array_reverse($children);
+ while (!empty($stack)) {
+ $node = array_pop($stack);
+ if (!empty($node->is_whitespace)) {
+ $result[] = $node;
continue;
}
$all_whitespace = false; // phew, we're not talking about whitespace
- $is_child = ($nesting == 0);
-
- if ($token instanceof HTMLPurifier_Token_Start) {
- $nesting++;
- } elseif ($token instanceof HTMLPurifier_Token_End) {
- $nesting--;
- }
-
- if ($is_child) {
- $is_deleting = false;
- if (!isset($this->elements[$token->name])) {
- $is_deleting = true;
- if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) {
- $result[] = $token;
- } elseif ($pcdata_allowed && $escape_invalid_children) {
- $result[] = new HTMLPurifier_Token_Text(
- $gen->generateFromToken($token)
- );
+ if (!isset($this->elements[$node->name])) {
+ // special case text
+ // XXX One of these ought to be redundant or something
+ if ($pcdata_allowed && $node instanceof HTMLPurifier_Node_Text) {
+ $result[] = $node;
+ continue;
+ }
+ // spill the child contents in
+ // ToDo: Make configurable
+ if ($node instanceof HTMLPurifier_Node_Element) {
+ for ($i = count($node->children) - 1; $i >= 0; $i--) {
+ $stack[] = $node->children[$i];
}
continue;
}
+ continue;
}
- if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) {
- $result[] = $token;
- } elseif ($pcdata_allowed && $escape_invalid_children) {
- $result[] =
- new HTMLPurifier_Token_Text(
- $gen->generateFromToken($token)
- );
- } else {
- // drop silently
- }
+ $result[] = $node;
+ }
+ if (empty($result)) {
+ return false;
}
- if (empty($result)) return false;
if ($all_whitespace) {
$this->whitespace = true;
return false;
}
- if ($tokens_of_children == $result) return true;
return $result;
}
}
diff --git a/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/library/HTMLPurifier/ChildDef/StrictBlockquote.php
index dfae8a6e5..3270a46e1 100644
--- a/library/HTMLPurifier/ChildDef/StrictBlockquote.php
+++ b/library/HTMLPurifier/ChildDef/StrictBlockquote.php
@@ -5,75 +5,97 @@
*/
class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required
{
+ /**
+ * @type array
+ */
protected $real_elements;
+
+ /**
+ * @type array
+ */
protected $fake_elements;
+
+ /**
+ * @type bool
+ */
public $allow_empty = true;
+
+ /**
+ * @type string
+ */
public $type = 'strictblockquote';
+
+ /**
+ * @type bool
+ */
protected $init = false;
/**
+ * @param HTMLPurifier_Config $config
+ * @return array
* @note We don't want MakeWellFormed to auto-close inline elements since
* they might be allowed.
*/
- public function getAllowedElements($config) {
+ public function getAllowedElements($config)
+ {
$this->init($config);
return $this->fake_elements;
}
- public function validateChildren($tokens_of_children, $config, $context) {
-
+ /**
+ * @param array $children
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return array
+ */
+ public function validateChildren($children, $config, $context)
+ {
$this->init($config);
// trick the parent class into thinking it allows more
$this->elements = $this->fake_elements;
- $result = parent::validateChildren($tokens_of_children, $config, $context);
+ $result = parent::validateChildren($children, $config, $context);
$this->elements = $this->real_elements;
- if ($result === false) return array();
- if ($result === true) $result = $tokens_of_children;
+ if ($result === false) {
+ return array();
+ }
+ if ($result === true) {
+ $result = $children;
+ }
$def = $config->getHTMLDefinition();
- $block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
- $block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper);
- $is_inline = false;
- $depth = 0;
+ $block_wrap_name = $def->info_block_wrapper;
+ $block_wrap = false;
$ret = array();
- // assuming that there are no comment tokens
- foreach ($result as $i => $token) {
- $token = $result[$i];
- // ifs are nested for readability
- if (!$is_inline) {
- if (!$depth) {
- if (
- ($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) ||
- (!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name]))
- ) {
- $is_inline = true;
- $ret[] = $block_wrap_start;
- }
+ foreach ($result as $node) {
+ if ($block_wrap === false) {
+ if (($node instanceof HTMLPurifier_Node_Text && !$node->is_whitespace) ||
+ ($node instanceof HTMLPurifier_Node_Element && !isset($this->elements[$node->name]))) {
+ $block_wrap = new HTMLPurifier_Node_Element($def->info_block_wrapper);
+ $ret[] = $block_wrap;
}
} else {
- if (!$depth) {
- // starting tokens have been inline text / empty
- if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) {
- if (isset($this->elements[$token->name])) {
- // ended
- $ret[] = $block_wrap_end;
- $is_inline = false;
- }
- }
+ if ($node instanceof HTMLPurifier_Node_Element && isset($this->elements[$node->name])) {
+ $block_wrap = false;
+
}
}
- $ret[] = $token;
- if ($token instanceof HTMLPurifier_Token_Start) $depth++;
- if ($token instanceof HTMLPurifier_Token_End) $depth--;
+ if ($block_wrap) {
+ $block_wrap->children[] = $node;
+ } else {
+ $ret[] = $node;
+ }
}
- if ($is_inline) $ret[] = $block_wrap_end;
return $ret;
}
- private function init($config) {
+ /**
+ * @param HTMLPurifier_Config $config
+ */
+ private function init($config)
+ {
if (!$this->init) {
$def = $config->getHTMLDefinition();
// allow all inline elements
diff --git a/library/HTMLPurifier/ChildDef/Table.php b/library/HTMLPurifier/ChildDef/Table.php
index 34f0227dd..3e4a0f218 100644
--- a/library/HTMLPurifier/ChildDef/Table.php
+++ b/library/HTMLPurifier/ChildDef/Table.php
@@ -1,140 +1,222 @@
<?php
/**
- * Definition for tables
+ * Definition for tables. The general idea is to extract out all of the
+ * essential bits, and then reconstruct it later.
+ *
+ * This is a bit confusing, because the DTDs and the W3C
+ * validators seem to disagree on the appropriate definition. The
+ * DTD claims:
+ *
+ * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
+ *
+ * But actually, the HTML4 spec then has this to say:
+ *
+ * The TBODY start tag is always required except when the table
+ * contains only one table body and no table head or foot sections.
+ * The TBODY end tag may always be safely omitted.
+ *
+ * So the DTD is kind of wrong. The validator is, unfortunately, kind
+ * of on crack.
+ *
+ * The definition changed again in XHTML1.1; and in my opinion, this
+ * formulation makes the most sense.
+ *
+ * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
+ *
+ * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
+ * If we encounter a thead, tfoot or tbody, we are placed in the former
+ * mode, and we *must* wrap any stray tr segments with a tbody. But if
+ * we don't run into any of them, just have tr tags is OK.
*/
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
{
+ /**
+ * @type bool
+ */
public $allow_empty = false;
+
+ /**
+ * @type string
+ */
public $type = 'table';
- public $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
- 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
- public function __construct() {}
- public function validateChildren($tokens_of_children, $config, $context) {
- if (empty($tokens_of_children)) return false;
- // this ensures that the loop gets run one last time before closing
- // up. It's a little bit of a hack, but it works! Just make sure you
- // get rid of the token later.
- $tokens_of_children[] = false;
+ /**
+ * @type array
+ */
+ public $elements = array(
+ 'tr' => true,
+ 'tbody' => true,
+ 'thead' => true,
+ 'tfoot' => true,
+ 'caption' => true,
+ 'colgroup' => true,
+ 'col' => true
+ );
+
+ public function __construct()
+ {
+ }
+
+ /**
+ * @param array $children
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return array
+ */
+ public function validateChildren($children, $config, $context)
+ {
+ if (empty($children)) {
+ return false;
+ }
// only one of these elements is allowed in a table
$caption = false;
- $thead = false;
- $tfoot = false;
+ $thead = false;
+ $tfoot = false;
+
+ // whitespace
+ $initial_ws = array();
+ $after_caption_ws = array();
+ $after_thead_ws = array();
+ $after_tfoot_ws = array();
// as many of these as you want
- $cols = array();
+ $cols = array();
$content = array();
- $nesting = 0; // current depth so we can determine nodes
- $is_collecting = false; // are we globbing together tokens to package
- // into one of the collectors?
- $collection = array(); // collected nodes
- $tag_index = 0; // the first node might be whitespace,
- // so this tells us where the start tag is
-
- foreach ($tokens_of_children as $token) {
- $is_child = ($nesting == 0);
-
- if ($token === false) {
- // terminating sequence started
- } elseif ($token instanceof HTMLPurifier_Token_Start) {
- $nesting++;
- } elseif ($token instanceof HTMLPurifier_Token_End) {
- $nesting--;
- }
+ $tbody_mode = false; // if true, then we need to wrap any stray
+ // <tr>s with a <tbody>.
- // handle node collection
- if ($is_collecting) {
- if ($is_child) {
- // okay, let's stash the tokens away
- // first token tells us the type of the collection
- switch ($collection[$tag_index]->name) {
- case 'tr':
- case 'tbody':
- $content[] = $collection;
- break;
- case 'caption':
- if ($caption !== false) break;
- $caption = $collection;
- break;
- case 'thead':
- case 'tfoot':
- // access the appropriate variable, $thead or $tfoot
- $var = $collection[$tag_index]->name;
- if ($$var === false) {
- $$var = $collection;
- } else {
- // transmutate the first and less entries into
- // tbody tags, and then put into content
- $collection[$tag_index]->name = 'tbody';
- $collection[count($collection)-1]->name = 'tbody';
- $content[] = $collection;
- }
- break;
- case 'colgroup':
- $cols[] = $collection;
- break;
- }
- $collection = array();
- $is_collecting = false;
- $tag_index = 0;
+ $ws_accum =& $initial_ws;
+
+ foreach ($children as $node) {
+ if ($node instanceof HTMLPurifier_Node_Comment) {
+ $ws_accum[] = $node;
+ continue;
+ }
+ switch ($node->name) {
+ case 'tbody':
+ $tbody_mode = true;
+ // fall through
+ case 'tr':
+ $content[] = $node;
+ $ws_accum =& $content;
+ break;
+ case 'caption':
+ // there can only be one caption!
+ if ($caption !== false) break;
+ $caption = $node;
+ $ws_accum =& $after_caption_ws;
+ break;
+ case 'thead':
+ $tbody_mode = true;
+ // XXX This breaks rendering properties with
+ // Firefox, which never floats a <thead> to
+ // the top. Ever. (Our scheme will float the
+ // first <thead> to the top.) So maybe
+ // <thead>s that are not first should be
+ // turned into <tbody>? Very tricky, indeed.
+ if ($thead === false) {
+ $thead = $node;
+ $ws_accum =& $after_thead_ws;
} else {
- // add the node to the collection
- $collection[] = $token;
+ // Oops, there's a second one! What
+ // should we do? Current behavior is to
+ // transmutate the first and last entries into
+ // tbody tags, and then put into content.
+ // Maybe a better idea is to *attach
+ // it* to the existing thead or tfoot?
+ // We don't do this, because Firefox
+ // doesn't float an extra tfoot to the
+ // bottom like it does for the first one.
+ $node->name = 'tbody';
+ $content[] = $node;
+ $ws_accum =& $content;
}
- }
-
- // terminate
- if ($token === false) break;
-
- if ($is_child) {
- // determine what we're dealing with
- if ($token->name == 'col') {
- // the only empty tag in the possie, we can handle it
- // immediately
- $cols[] = array_merge($collection, array($token));
- $collection = array();
- $tag_index = 0;
- continue;
+ break;
+ case 'tfoot':
+ // see above for some aveats
+ $tbody_mode = true;
+ if ($tfoot === false) {
+ $tfoot = $node;
+ $ws_accum =& $after_tfoot_ws;
+ } else {
+ $node->name = 'tbody';
+ $content[] = $node;
+ $ws_accum =& $content;
}
- switch($token->name) {
- case 'caption':
- case 'colgroup':
- case 'thead':
- case 'tfoot':
- case 'tbody':
- case 'tr':
- $is_collecting = true;
- $collection[] = $token;
- continue;
- default:
- if (!empty($token->is_whitespace)) {
- $collection[] = $token;
- $tag_index++;
- }
- continue;
+ break;
+ case 'colgroup':
+ case 'col':
+ $cols[] = $node;
+ $ws_accum =& $cols;
+ break;
+ case '#PCDATA':
+ // How is whitespace handled? We treat is as sticky to
+ // the *end* of the previous element. So all of the
+ // nonsense we have worked on is to keep things
+ // together.
+ if (!empty($node->is_whitespace)) {
+ $ws_accum[] = $node;
}
+ break;
}
}
- if (empty($content)) return false;
-
- $ret = array();
- if ($caption !== false) $ret = array_merge($ret, $caption);
- if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
- if ($thead !== false) $ret = array_merge($ret, $thead);
- if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
- foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
- if (!empty($collection) && $is_collecting == false){
- // grab the trailing space
- $ret = array_merge($ret, $collection);
+ if (empty($content)) {
+ return false;
+ }
+
+ $ret = $initial_ws;
+ if ($caption !== false) {
+ $ret[] = $caption;
+ $ret = array_merge($ret, $after_caption_ws);
+ }
+ if ($cols !== false) {
+ $ret = array_merge($ret, $cols);
+ }
+ if ($thead !== false) {
+ $ret[] = $thead;
+ $ret = array_merge($ret, $after_thead_ws);
+ }
+ if ($tfoot !== false) {
+ $ret[] = $tfoot;
+ $ret = array_merge($ret, $after_tfoot_ws);
}
- array_pop($tokens_of_children); // remove phantom token
+ if ($tbody_mode) {
+ // we have to shuffle tr into tbody
+ $current_tr_tbody = null;
+
+ foreach($content as $node) {
+ switch ($node->name) {
+ case 'tbody':
+ $current_tr_tbody = null;
+ $ret[] = $node;
+ break;
+ case 'tr':
+ if ($current_tr_tbody === null) {
+ $current_tr_tbody = new HTMLPurifier_Node_Element('tbody');
+ $ret[] = $current_tr_tbody;
+ }
+ $current_tr_tbody->children[] = $node;
+ break;
+ case '#PCDATA':
+ assert($node->is_whitespace);
+ if ($current_tr_tbody === null) {
+ $ret[] = $node;
+ } else {
+ $current_tr_tbody->children[] = $node;
+ }
+ break;
+ }
+ }
+ } else {
+ $ret = array_merge($ret, $content);
+ }
- return ($ret === $tokens_of_children) ? true : $ret;
+ return $ret;
}
}