aboutsummaryrefslogtreecommitdiffstats
path: root/library/HTML5
diff options
context:
space:
mode:
authorMike Macgirvin <mike@macgirvin.com>2010-07-01 16:48:07 -0700
committerMike Macgirvin <mike@macgirvin.com>2010-07-01 16:48:07 -0700
commit6348e70daa113e8b3203de8fbc919d08c90d972e (patch)
tree1bc3dd3bc85fe6136411086785cf6753960e22f9 /library/HTML5
downloadvolse-hubzilla-6348e70daa113e8b3203de8fbc919d08c90d972e.tar.gz
volse-hubzilla-6348e70daa113e8b3203de8fbc919d08c90d972e.tar.bz2
volse-hubzilla-6348e70daa113e8b3203de8fbc919d08c90d972e.zip
Initial checkin
Diffstat (limited to 'library/HTML5')
-rw-r--r--library/HTML5/Data.php120
-rw-r--r--library/HTML5/InputStream.php284
-rw-r--r--library/HTML5/Parser.php36
-rw-r--r--library/HTML5/Tokenizer.php2307
-rw-r--r--library/HTML5/TreeBuilder.php3715
-rw-r--r--library/HTML5/named-character-references.ser1
6 files changed, 6463 insertions, 0 deletions
diff --git a/library/HTML5/Data.php b/library/HTML5/Data.php
new file mode 100644
index 000000000..fa97e3ee8
--- /dev/null
+++ b/library/HTML5/Data.php
@@ -0,0 +1,120 @@
+<?php
+
+// warning: this file is encoded in UTF-8!
+
+class HTML5_Data
+{
+
+ // at some point this should be moved to a .ser file. Another
+ // possible optimization is to give UTF-8 bytes, not Unicode
+ // codepoints
+ protected static $realCodepointTable = array(
+ 0x0D => 0x000A, // LINE FEED (LF)
+ 0x80 => 0x20AC, // EURO SIGN ('€')
+ 0x81 => 0xFFFD, // REPLACEMENT CHARACTER
+ 0x82 => 0x201A, // SINGLE LOW-9 QUOTATION MARK ('‚')
+ 0x83 => 0x0192, // LATIN SMALL LETTER F WITH HOOK ('ƒ')
+ 0x84 => 0x201E, // DOUBLE LOW-9 QUOTATION MARK ('„')
+ 0x85 => 0x2026, // HORIZONTAL ELLIPSIS ('…')
+ 0x86 => 0x2020, // DAGGER ('†')
+ 0x87 => 0x2021, // DOUBLE DAGGER ('‡')
+ 0x88 => 0x02C6, // MODIFIER LETTER CIRCUMFLEX ACCENT ('ˆ')
+ 0x89 => 0x2030, // PER MILLE SIGN ('‰')
+ 0x8A => 0x0160, // LATIN CAPITAL LETTER S WITH CARON ('Š')
+ 0x8B => 0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('‹')
+ 0x8C => 0x0152, // LATIN CAPITAL LIGATURE OE ('Œ')
+ 0x8D => 0xFFFD, // REPLACEMENT CHARACTER
+ 0x8E => 0x017D, // LATIN CAPITAL LETTER Z WITH CARON ('Ž')
+ 0x8F => 0xFFFD, // REPLACEMENT CHARACTER
+ 0x90 => 0xFFFD, // REPLACEMENT CHARACTER
+ 0x91 => 0x2018, // LEFT SINGLE QUOTATION MARK ('‘')
+ 0x92 => 0x2019, // RIGHT SINGLE QUOTATION MARK ('’')
+ 0x93 => 0x201C, // LEFT DOUBLE QUOTATION MARK ('“')
+ 0x94 => 0x201D, // RIGHT DOUBLE QUOTATION MARK ('”')
+ 0x95 => 0x2022, // BULLET ('•')
+ 0x96 => 0x2013, // EN DASH ('–')
+ 0x97 => 0x2014, // EM DASH ('—')
+ 0x98 => 0x02DC, // SMALL TILDE ('˜')
+ 0x99 => 0x2122, // TRADE MARK SIGN ('™')
+ 0x9A => 0x0161, // LATIN SMALL LETTER S WITH CARON ('š')
+ 0x9B => 0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('›')
+ 0x9C => 0x0153, // LATIN SMALL LIGATURE OE ('œ')
+ 0x9D => 0xFFFD, // REPLACEMENT CHARACTER
+ 0x9E => 0x017E, // LATIN SMALL LETTER Z WITH CARON ('ž')
+ 0x9F => 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS ('Ÿ')
+ );
+
+ protected static $namedCharacterReferences;
+
+ protected static $namedCharacterReferenceMaxLength;
+
+ /**
+ * Returns the "real" Unicode codepoint of a malformed character
+ * reference.
+ */
+ public static function getRealCodepoint($ref) {
+ if (!isset(self::$realCodepointTable[$ref])) return false;
+ else return self::$realCodepointTable[$ref];
+ }
+
+ public static function getNamedCharacterReferences() {
+ if (!self::$namedCharacterReferences) {
+ self::$namedCharacterReferences = unserialize(
+ file_get_contents(dirname(__FILE__) . '/named-character-references.ser'));
+ }
+ return self::$namedCharacterReferences;
+ }
+
+ public static function getNamedCharacterReferenceMaxLength() {
+ if (!self::$namedCharacterReferenceMaxLength) {
+ $namedCharacterReferences = self::getNamedCharacterReferences();
+ $lengths = array_map('strlen', array_keys($namedCharacterReferences));
+ self::$namedCharacterReferenceMaxLength = max($lengths);
+ }
+ return self::$namedCharacterReferenceMaxLength;
+ }
+
+
+ /**
+ * Converts a Unicode codepoint to sequence of UTF-8 bytes.
+ * @note Shamelessly stolen from HTML Purifier, which is also
+ * shamelessly stolen from Feyd (which is in public domain).
+ */
+ public static function utf8chr($code) {
+ if($code > 0x10FFFF or $code < 0x0 or
+ ($code >= 0xD800 and $code <= 0xDFFF) ) {
+ // bits are set outside the "valid" range as defined
+ // by UNICODE 4.1.0
+ return "\xEF\xBF\xBD";
+ }
+
+ $x = $y = $z = $w = 0;
+ if ($code < 0x80) {
+ // regular ASCII character
+ $x = $code;
+ } else {
+ // set up bits for UTF-8
+ $x = ($code & 0x3F) | 0x80;
+ if ($code < 0x800) {
+ $y = (($code & 0x7FF) >> 6) | 0xC0;
+ } else {
+ $y = (($code & 0xFC0) >> 6) | 0x80;
+ if($code < 0x10000) {
+ $z = (($code >> 12) & 0x0F) | 0xE0;
+ } else {
+ $z = (($code >> 12) & 0x3F) | 0x80;
+ $w = (($code >> 18) & 0x07) | 0xF0;
+ }
+ }
+ }
+ // set up the actual character
+ $ret = '';
+ if($w) $ret .= chr($w);
+ if($z) $ret .= chr($z);
+ if($y) $ret .= chr($y);
+ $ret .= chr($x);
+
+ return $ret;
+ }
+
+}
diff --git a/library/HTML5/InputStream.php b/library/HTML5/InputStream.php
new file mode 100644
index 000000000..f98b42723
--- /dev/null
+++ b/library/HTML5/InputStream.php
@@ -0,0 +1,284 @@
+<?php
+
+/*
+
+Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+
+// Some conventions:
+// /* */ indicates verbatim text from the HTML 5 specification
+// // indicates regular comments
+
+class HTML5_InputStream {
+ /**
+ * The string data we're parsing.
+ */
+ private $data;
+
+ /**
+ * The current integer byte position we are in $data
+ */
+ private $char;
+
+ /**
+ * Length of $data; when $char === $data, we are at the end-of-file.
+ */
+ private $EOF;
+
+ /**
+ * Parse errors.
+ */
+ public $errors = array();
+
+ /**
+ * @param $data Data to parse
+ */
+ public function __construct($data) {
+
+ /* Given an encoding, the bytes in the input stream must be
+ converted to Unicode characters for the tokeniser, as
+ described by the rules for that encoding, except that the
+ leading U+FEFF BYTE ORDER MARK character, if any, must not
+ be stripped by the encoding layer (it is stripped by the rule below).
+
+ Bytes or sequences of bytes in the original byte stream that
+ could not be converted to Unicode characters must be converted
+ to U+FFFD REPLACEMENT CHARACTER code points. */
+
+ // XXX currently assuming input data is UTF-8; once we
+ // build encoding detection this will no longer be the case
+ //
+ // We previously had an mbstring implementation here, but that
+ // implementation is heavily non-conforming, so it's been
+ // omitted.
+ if (extension_loaded('iconv')) {
+ // non-conforming
+ $data = @iconv('UTF-8', 'UTF-8//IGNORE', $data);
+ } else {
+ // we can make a conforming native implementation
+ throw new Exception('Not implemented, please install mbstring or iconv');
+ }
+
+ /* One leading U+FEFF BYTE ORDER MARK character must be
+ ignored if any are present. */
+ if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
+ $data = substr($data, 3);
+ }
+
+ /* All U+0000 NULL characters in the input must be replaced
+ by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such
+ characters is a parse error. */
+ for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i++) {
+ $this->errors[] = array(
+ 'type' => HTML5_Tokenizer::PARSEERROR,
+ 'data' => 'null-character'
+ );
+ }
+ /* U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED
+ (LF) characters are treated specially. Any CR characters
+ that are followed by LF characters must be removed, and any
+ CR characters not followed by LF characters must be converted
+ to LF characters. Thus, newlines in HTML DOMs are represented
+ by LF characters, and there are never any CR characters in the
+ input to the tokenization stage. */
+ $data = str_replace(
+ array(
+ "\0",
+ "\r\n",
+ "\r"
+ ),
+ array(
+ "\xEF\xBF\xBD",
+ "\n",
+ "\n"
+ ),
+ $data
+ );
+
+ /* Any occurrences of any characters in the ranges U+0001 to
+ U+0008, U+000B, U+000E to U+001F, U+007F to U+009F,
+ U+D800 to U+DFFF , U+FDD0 to U+FDEF, and
+ characters U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF,
+ U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE,
+ U+6FFFF, U+7FFFE, U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF,
+ U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF, U+DFFFE,
+ U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, and
+ U+10FFFF are parse errors. (These are all control characters
+ or permanently undefined Unicode characters.) */
+ // Check PCRE is loaded.
+ if (extension_loaded('pcre')) {
+ $count = preg_match_all(
+ '/(?:
+ [\x01-\x08\x0B\x0E-\x1F\x7F] # U+0001 to U+0008, U+000B, U+000E to U+001F and U+007F
+ |
+ \xC2[\x80-\x9F] # U+0080 to U+009F
+ |
+ \xED(?:\xA0[\x80-\xFF]|[\xA1-\xBE][\x00-\xFF]|\xBF[\x00-\xBF]) # U+D800 to U+DFFFF
+ |
+ \xEF\xB7[\x90-\xAF] # U+FDD0 to U+FDEF
+ |
+ \xEF\xBF[\xBE\xBF] # U+FFFE and U+FFFF
+ |
+ [\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
+ )/x',
+ $data,
+ $matches
+ );
+ for ($i = 0; $i < $count; $i++) {
+ $this->errors[] = array(
+ 'type' => HTML5_Tokenizer::PARSEERROR,
+ 'data' => 'invalid-codepoint'
+ );
+ }
+ } else {
+ // XXX: Need non-PCRE impl, probably using substr_count
+ }
+
+ $this->data = $data;
+ $this->char = 0;
+ $this->EOF = strlen($data);
+ }
+
+ /**
+ * Returns the current line that the tokenizer is at.
+ */
+ public function getCurrentLine() {
+ // Check the string isn't empty
+ if($this->EOF) {
+ // Add one to $this->char because we want the number for the next
+ // byte to be processed.
+ return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1;
+ } else {
+ // If the string is empty, we are on the first line (sorta).
+ return 1;
+ }
+ }
+
+ /**
+ * Returns the current column of the current line that the tokenizer is at.
+ */
+ public function getColumnOffset() {
+ // strrpos is weird, and the offset needs to be negative for what we
+ // want (i.e., the last \n before $this->char). This needs to not have
+ // one (to make it point to the next character, the one we want the
+ // position of) added to it because strrpos's behaviour includes the
+ // final offset byte.
+ $lastLine = strrpos($this->data, "\n", $this->char - 1 - strlen($this->data));
+
+ // However, for here we want the length up until the next byte to be
+ // processed, so add one to the current byte ($this->char).
+ if($lastLine !== false) {
+ $findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
+ } else {
+ $findLengthOf = substr($this->data, 0, $this->char);
+ }
+
+ // Get the length for the string we need.
+ if(extension_loaded('iconv')) {
+ return iconv_strlen($findLengthOf, 'utf-8');
+ } elseif(extension_loaded('mbstring')) {
+ return mb_strlen($findLengthOf, 'utf-8');
+ } elseif(extension_loaded('xml')) {
+ return strlen(utf8_decode($findLengthOf));
+ } else {
+ $count = count_chars($findLengthOf);
+ // 0x80 = 0x7F - 0 + 1 (one added to get inclusive range)
+ // 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range)
+ return array_sum(array_slice($count, 0, 0x80)) +
+ array_sum(array_slice($count, 0xC2, 0x33));
+ }
+ }
+
+ /**
+ * Retrieve the currently consume character.
+ * @note This performs bounds checking
+ */
+ public function char() {
+ return ($this->char++ < $this->EOF)
+ ? $this->data[$this->char - 1]
+ : false;
+ }
+
+ /**
+ * Get all characters until EOF.
+ * @note This performs bounds checking
+ */
+ public function remainingChars() {
+ if($this->char < $this->EOF) {
+ $data = substr($this->data, $this->char);
+ $this->char = $this->EOF;
+ return $data;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Matches as far as possible until we reach a certain set of bytes
+ * and returns the matched substring.
+ * @param $bytes Bytes to match.
+ */
+ public function charsUntil($bytes, $max = null) {
+ if ($this->char < $this->EOF) {
+ if ($max === 0 || $max) {
+ $len = strcspn($this->data, $bytes, $this->char, $max);
+ } else {
+ $len = strcspn($this->data, $bytes, $this->char);
+ }
+ $string = (string) substr($this->data, $this->char, $len);
+ $this->char += $len;
+ return $string;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Matches as far as possible with a certain set of bytes
+ * and returns the matched substring.
+ * @param $bytes Bytes to match.
+ */
+ public function charsWhile($bytes, $max = null) {
+ if ($this->char < $this->EOF) {
+ if ($max === 0 || $max) {
+ $len = strspn($this->data, $bytes, $this->char, $max);
+ } else {
+ $len = strspn($this->data, $bytes, $this->char);
+ }
+ $string = (string) substr($this->data, $this->char, $len);
+ $this->char += $len;
+ return $string;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Unconsume one character.
+ */
+ public function unget() {
+ if ($this->char <= $this->EOF) {
+ $this->char--;
+ }
+ }
+}
diff --git a/library/HTML5/Parser.php b/library/HTML5/Parser.php
new file mode 100644
index 000000000..5f9ca560e
--- /dev/null
+++ b/library/HTML5/Parser.php
@@ -0,0 +1,36 @@
+<?php
+
+require_once dirname(__FILE__) . '/Data.php';
+require_once dirname(__FILE__) . '/InputStream.php';
+require_once dirname(__FILE__) . '/TreeBuilder.php';
+require_once dirname(__FILE__) . '/Tokenizer.php';
+
+/**
+ * Outwards facing interface for HTML5.
+ */
+class HTML5_Parser
+{
+ /**
+ * Parses a full HTML document.
+ * @param $text HTML text to parse
+ * @param $builder Custom builder implementation
+ * @return Parsed HTML as DOMDocument
+ */
+ static public function parse($text, $builder = null) {
+ $tokenizer = new HTML5_Tokenizer($text, $builder);
+ $tokenizer->parse();
+ return $tokenizer->save();
+ }
+ /**
+ * Parses an HTML fragment.
+ * @param $text HTML text to parse
+ * @param $context String name of context element to pretend parsing is in.
+ * @param $builder Custom builder implementation
+ * @return Parsed HTML as DOMDocument
+ */
+ static public function parseFragment($text, $context = null, $builder = null) {
+ $tokenizer = new HTML5_Tokenizer($text, $builder);
+ $tokenizer->parseFragment($context);
+ return $tokenizer->save();
+ }
+}
diff --git a/library/HTML5/Tokenizer.php b/library/HTML5/Tokenizer.php
new file mode 100644
index 000000000..06c73065f
--- /dev/null
+++ b/library/HTML5/Tokenizer.php
@@ -0,0 +1,2307 @@
+<?php
+
+/*
+
+Copyright 2007 Jeroen van der Meer <http://jero.net/>
+Copyright 2008 Edward Z. Yang <http://htmlpurifier.org/>
+Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+
+// Some conventions:
+// /* */ indicates verbatim text from the HTML 5 specification
+// // indicates regular comments
+
+// all flags are in hyphenated form
+
+class HTML5_Tokenizer {
+ /**
+ * Points to an InputStream object.
+ */
+ protected $stream;
+
+ /**
+ * Tree builder that the tokenizer emits token to.
+ */
+ private $tree;
+
+ /**
+ * Current content model we are parsing as.
+ */
+ protected $content_model;
+
+ /**
+ * Current token that is being built, but not yet emitted. Also
+ * is the last token emitted, if applicable.
+ */
+ protected $token;
+
+ // These are constants describing the content model
+ const PCDATA = 0;
+ const RCDATA = 1;
+ const CDATA = 2;
+ const PLAINTEXT = 3;
+
+ // These are constants describing tokens
+ // XXX should probably be moved somewhere else, probably the
+ // HTML5 class.
+ const DOCTYPE = 0;
+ const STARTTAG = 1;
+ const ENDTAG = 2;
+ const COMMENT = 3;
+ const CHARACTER = 4;
+ const SPACECHARACTER = 5;
+ const EOF = 6;
+ const PARSEERROR = 7;
+
+ // These are constants representing bunches of characters.
+ const ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
+ const UPPER_ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
+ const LOWER_ALPHA = 'abcdefghijklmnopqrstuvwxyz';
+ const DIGIT = '0123456789';
+ const HEX = '0123456789ABCDEFabcdef';
+ const WHITESPACE = "\t\n\x0c ";
+
+ /**
+ * @param $data Data to parse
+ */
+ public function __construct($data, $builder = null) {
+ $this->stream = new HTML5_InputStream($data);
+ if (!$builder) $this->tree = new HTML5_TreeBuilder;
+ $this->content_model = self::PCDATA;
+ }
+
+ public function parseFragment($context = null) {
+ $this->tree->setupContext($context);
+ if ($this->tree->content_model) {
+ $this->content_model = $this->tree->content_model;
+ $this->tree->content_model = null;
+ }
+ $this->parse();
+ }
+
+ // XXX maybe convert this into an iterator? regardless, this function
+ // and the save function should go into a Parser facade of some sort
+ /**
+ * Performs the actual parsing of the document.
+ */
+ public function parse() {
+ // Current state
+ $state = 'data';
+ // This is used to avoid having to have look-behind in the data state.
+ $lastFourChars = '';
+ /**
+ * Escape flag as specified by the HTML5 specification: "used to
+ * control the behavior of the tokeniser. It is either true or
+ * false, and initially must be set to the false state."
+ */
+ $escape = false;
+ //echo "\n\n";
+ while($state !== null) {
+
+ /*echo $state . ' ';
+ switch ($this->content_model) {
+ case self::PCDATA: echo 'PCDATA'; break;
+ case self::RCDATA: echo 'RCDATA'; break;
+ case self::CDATA: echo 'CDATA'; break;
+ case self::PLAINTEXT: echo 'PLAINTEXT'; break;
+ }
+ if ($escape) echo " escape";
+ echo "\n";*/
+
+ switch($state) {
+ case 'data':
+
+ /* Consume the next input character */
+ $char = $this->stream->char();
+ $lastFourChars .= $char;
+ if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4);
+
+ // see below for meaning
+ $hyp_cond =
+ !$escape &&
+ (
+ $this->content_model === self::RCDATA ||
+ $this->content_model === self::CDATA
+ );
+ $amp_cond =
+ !$escape &&
+ (
+ $this->content_model === self::PCDATA ||
+ $this->content_model === self::RCDATA
+ );
+ $lt_cond =
+ $this->content_model === self::PCDATA ||
+ (
+ (
+ $this->content_model === self::RCDATA ||
+ $this->content_model === self::CDATA
+ ) &&
+ !$escape
+ );
+ $gt_cond =
+ $escape &&
+ (
+ $this->content_model === self::RCDATA ||
+ $this->content_model === self::CDATA
+ );
+
+ if($char === '&' && $amp_cond) {
+ /* U+0026 AMPERSAND (&)
+ When the content model flag is set to one of the PCDATA or RCDATA
+ states and the escape flag is false: switch to the
+ character reference data state. Otherwise: treat it as per
+ the "anything else" entry below. */
+ $state = 'characterReferenceData';
+
+ } elseif(
+ $char === '-' &&
+ $hyp_cond &&
+ $lastFourChars === '<!--'
+ ) {
+ /*
+ U+002D HYPHEN-MINUS (-)
+ If the content model flag is set to either the RCDATA state or
+ the CDATA state, and the escape flag is false, and there are at
+ least three characters before this one in the input stream, and the
+ last four characters in the input stream, including this one, are
+ U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
+ and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
+ $escape = true;
+
+ /* In any case, emit the input character as a character token. Stay
+ in the data state. */
+ $this->emitToken(array(
+ 'type' => self::CHARACTER,
+ 'data' => '-'
+ ));
+ // We do the "any case" part as part of "anything else".
+
+ /* U+003C LESS-THAN SIGN (<) */
+ } elseif($char === '<' && $lt_cond) {
+ /* When the content model flag is set to the PCDATA state: switch
+ to the tag open state.
+
+ When the content model flag is set to either the RCDATA state or
+ the CDATA state and the escape flag is false: switch to the tag
+ open state.
+
+ Otherwise: treat it as per the "anything else" entry below. */
+ $state = 'tagOpen';
+
+ /* U+003E GREATER-THAN SIGN (>) */
+ } elseif(
+ $char === '>' &&
+ $gt_cond &&
+ substr($lastFourChars, 1) === '-->'
+ ) {
+ /* If the content model flag is set to either the RCDATA state or
+ the CDATA state, and the escape flag is true, and the last three
+ characters in the input stream including this one are U+002D
+ HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
+ set the escape flag to false. */
+ $escape = false;
+
+ /* In any case, emit the input character as a character token.
+ Stay in the data state. */
+ $this->emitToken(array(
+ 'type' => self::CHARACTER,
+ 'data' => '>'
+ ));
+ // We do the "any case" part as part of "anything else".
+
+ } elseif($char === false) {
+ /* EOF
+ Emit an end-of-file token. */
+ $state = null;
+ $this->tree->emitToken(array(
+ 'type' => self::EOF
+ ));
+
+ } elseif($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ // Directly after emitting a token you switch back to the "data
+ // state". At that point spaceCharacters are important so they are
+ // emitted separately.
+ $chars = $this->stream->charsWhile(self::WHITESPACE);
+ $this->emitToken(array(
+ 'type' => self::SPACECHARACTER,
+ 'data' => $char . $chars
+ ));
+ $lastFourChars .= $chars;
+ if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4);
+
+ } else {
+ /* Anything else
+ THIS IS AN OPTIMIZATION: Get as many character that
+ otherwise would also be treated as a character token and emit it
+ as a single character token. Stay in the data state. */
+
+ $mask = '';
+ if ($hyp_cond) $mask .= '-';
+ if ($amp_cond) $mask .= '&';
+ if ($lt_cond) $mask .= '<';
+ if ($gt_cond) $mask .= '>';
+
+ if ($mask === '') {
+ $chars = $this->stream->remainingChars();
+ } else {
+ $chars = $this->stream->charsUntil($mask);
+ }
+
+ $this->emitToken(array(
+ 'type' => self::CHARACTER,
+ 'data' => $char . $chars
+ ));
+
+ $lastFourChars .= $chars;
+ if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4);
+
+ $state = 'data';
+ }
+ break;
+
+ case 'characterReferenceData':
+ /* (This cannot happen if the content model flag
+ is set to the CDATA state.) */
+
+ /* Attempt to consume a character reference, with no
+ additional allowed character. */
+ $entity = $this->consumeCharacterReference();
+
+ /* If nothing is returned, emit a U+0026 AMPERSAND
+ character token. Otherwise, emit the character token that
+ was returned. */
+ // This is all done when consuming the character reference.
+ $this->emitToken(array(
+ 'type' => self::CHARACTER,
+ 'data' => $entity
+ ));
+
+ /* Finally, switch to the data state. */
+ $state = 'data';
+ break;
+
+ case 'tagOpen':
+ $char = $this->stream->char();
+
+ switch($this->content_model) {
+ case self::RCDATA:
+ case self::CDATA:
+ /* Consume the next input character. If it is a
+ U+002F SOLIDUS (/) character, switch to the close
+ tag open state. Otherwise, emit a U+003C LESS-THAN
+ SIGN character token and reconsume the current input
+ character in the data state. */
+ // We consumed above.
+
+ if($char === '/') {
+ $state = 'closeTagOpen';
+
+ } else {
+ $this->emitToken(array(
+ 'type' => self::CHARACTER,
+ 'data' => '<'
+ ));
+
+ $this->stream->unget();
+
+ $state = 'data';
+ }
+ break;
+
+ case self::PCDATA:
+ /* If the content model flag is set to the PCDATA state
+ Consume the next input character: */
+ // We consumed above.
+
+ if($char === '!') {
+ /* U+0021 EXCLAMATION MARK (!)
+ Switch to the markup declaration open state. */
+ $state = 'markupDeclarationOpen';
+
+ } elseif($char === '/') {
+ /* U+002F SOLIDUS (/)
+ Switch to the close tag open state. */
+ $state = 'closeTagOpen';
+
+ } elseif('A' <= $char && $char <= 'Z') {
+ /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
+ Create a new start tag token, set its tag name to the lowercase
+ version of the input character (add 0x0020 to the character's code
+ point), then switch to the tag name state. (Don't emit the token
+ yet; further details will be filled in before it is emitted.) */
+ $this->token = array(
+ 'name' => strtolower($char),
+ 'type' => self::STARTTAG,
+ 'attr' => array()
+ );
+
+ $state = 'tagName';
+
+ } elseif('a' <= $char && $char <= 'z') {
+ /* U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z
+ Create a new start tag token, set its tag name to the input
+ character, then switch to the tag name state. (Don't emit
+ the token yet; further details will be filled in before it
+ is emitted.) */
+ $this->token = array(
+ 'name' => $char,
+ 'type' => self::STARTTAG,
+ 'attr' => array()
+ );
+
+ $state = 'tagName';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Parse error. Emit a U+003C LESS-THAN SIGN character token and a
+ U+003E GREATER-THAN SIGN character token. Switch to the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-tag-name-but-got-right-bracket'
+ ));
+ $this->emitToken(array(
+ 'type' => self::CHARACTER,
+ 'data' => '<>'
+ ));
+
+ $state = 'data';
+
+ } elseif($char === '?') {
+ /* U+003F QUESTION MARK (?)
+ Parse error. Switch to the bogus comment state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-tag-name-but-got-question-mark'
+ ));
+ $this->token = array(
+ 'data' => '?',
+ 'type' => self::COMMENT
+ );
+ $state = 'bogusComment';
+
+ } else {
+ /* Anything else
+ Parse error. Emit a U+003C LESS-THAN SIGN character token and
+ reconsume the current input character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-tag-name'
+ ));
+ $this->emitToken(array(
+ 'type' => self::CHARACTER,
+ 'data' => '<'
+ ));
+
+ $state = 'data';
+ $this->stream->unget();
+ }
+ break;
+ }
+ break;
+
+ case 'closeTagOpen':
+ if (
+ $this->content_model === self::RCDATA ||
+ $this->content_model === self::CDATA
+ ) {
+ /* If the content model flag is set to the RCDATA or CDATA
+ states... */
+ $name = strtolower($this->stream->charsWhile(self::ALPHA));
+ $following = $this->stream->char();
+ $this->stream->unget();
+ if (
+ !$this->token ||
+ $this->token['name'] !== $name ||
+ $this->token['name'] === $name && !in_array($following, array("\x09", "\x0A", "\x0C", "\x20", "\x3E", "\x2F", false))
+ ) {
+ /* if no start tag token has ever been emitted by this instance
+ of the tokenizer (fragment case), or, if the next few
+ characters do not match the tag name of the last start tag
+ token emitted (compared in an ASCII case-insensitive manner),
+ or if they do but they are not immediately followed by one of
+ the following characters:
+
+ * U+0009 CHARACTER TABULATION
+ * U+000A LINE FEED (LF)
+ * U+000C FORM FEED (FF)
+ * U+0020 SPACE
+ * U+003E GREATER-THAN SIGN (>)
+ * U+002F SOLIDUS (/)
+ * EOF
+
+ ...then emit a U+003C LESS-THAN SIGN character token, a
+ U+002F SOLIDUS character token, and switch to the data
+ state to process the next input character. */
+ // XXX: Probably ought to replace in_array with $following === x ||...
+
+ // We also need to emit $name now we've consumed that, as we
+ // know it'll just be emitted as a character token.
+ $this->emitToken(array(
+ 'type' => self::CHARACTER,
+ 'data' => '</' . $name
+ ));
+
+ $state = 'data';
+ } else {
+ // This matches what would happen if we actually did the
+ // otherwise below (but we can't because we've consumed too
+ // much).
+
+ // Start the end tag token with the name we already have.
+ $this->token = array(
+ 'name' => $name,
+ 'type' => self::ENDTAG
+ );
+
+ // Change to tag name state.
+ $state = 'tagName';
+ }
+ } elseif ($this->content_model === self::PCDATA) {
+ /* Otherwise, if the content model flag is set to the PCDATA
+ state [...]: */
+ $char = $this->stream->char();
+
+ if ('A' <= $char && $char <= 'Z') {
+ /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
+ Create a new end tag token, set its tag name to the lowercase version
+ of the input character (add 0x0020 to the character's code point), then
+ switch to the tag name state. (Don't emit the token yet; further details
+ will be filled in before it is emitted.) */
+ $this->token = array(
+ 'name' => strtolower($char),
+ 'type' => self::ENDTAG
+ );
+
+ $state = 'tagName';
+
+ } elseif ('a' <= $char && $char <= 'z') {
+ /* U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z
+ Create a new end tag token, set its tag name to the
+ input character, then switch to the tag name state.
+ (Don't emit the token yet; further details will be
+ filled in before it is emitted.) */
+ $this->token = array(
+ 'name' => $char,
+ 'type' => self::ENDTAG
+ );
+
+ $state = 'tagName';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Parse error. Switch to the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-closing-tag-but-got-right-bracket'
+ ));
+ $state = 'data';
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
+ SOLIDUS character token. Reconsume the EOF character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-closing-tag-but-got-eof'
+ ));
+ $this->emitToken(array(
+ 'type' => self::CHARACTER,
+ 'data' => '</'
+ ));
+
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Parse error. Switch to the bogus comment state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-closing-tag-but-got-char'
+ ));
+ $this->token = array(
+ 'data' => $char,
+ 'type' => self::COMMENT
+ );
+ $state = 'bogusComment';
+ }
+ }
+ break;
+
+ case 'tagName':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the before attribute name state. */
+ $state = 'beforeAttributeName';
+
+ } elseif($char === '/') {
+ /* U+002F SOLIDUS (/)
+ Switch to the self-closing start tag state. */
+ $state = 'selfClosingStartTag';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $state = 'data';
+
+ } elseif('A' <= $char && $char <= 'Z') {
+ /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Append the lowercase version of the current input
+ character (add 0x0020 to the character's code point) to
+ the current tag token's tag name. Stay in the tag name state. */
+ $chars = $this->stream->charsWhile(self::UPPER_ALPHA);
+
+ $this->token['name'] .= strtolower($char . $chars);
+ $state = 'tagName';
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-tag-name'
+ ));
+ $this->emitToken($this->token);
+
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Anything else
+ Append the current input character to the current tag token's tag name.
+ Stay in the tag name state. */
+ $chars = $this->stream->charsUntil("\t\n\x0C />" . self::UPPER_ALPHA);
+
+ $this->token['name'] .= $char . $chars;
+ $state = 'tagName';
+ }
+ break;
+
+ case 'beforeAttributeName':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ // this conditional is optimized, check bottom
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before attribute name state. */
+ $state = 'beforeAttributeName';
+
+ } elseif($char === '/') {
+ /* U+002F SOLIDUS (/)
+ Switch to the self-closing start tag state. */
+ $state = 'selfClosingStartTag';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $state = 'data';
+
+ } elseif('A' <= $char && $char <= 'Z') {
+ /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Start a new attribute in the current tag token. Set that
+ attribute's name to the lowercase version of the current
+ input character (add 0x0020 to the character's code
+ point), and its value to the empty string. Switch to the
+ attribute name state.*/
+ $this->token['attr'][] = array(
+ 'name' => strtolower($char),
+ 'value' => ''
+ );
+
+ $state = 'attributeName';
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-attribute-name-but-got-eof'
+ ));
+ $this->emitToken($this->token);
+
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* U+0022 QUOTATION MARK (")
+ U+0027 APOSTROPHE (')
+ U+003D EQUALS SIGN (=)
+ Parse error. Treat it as per the "anything else" entry
+ below. */
+ if($char === '"' || $char === "'" || $char === '=') {
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'invalid-character-in-attribute-name'
+ ));
+ }
+
+ /* Anything else
+ Start a new attribute in the current tag token. Set that attribute's
+ name to the current input character, and its value to the empty string.
+ Switch to the attribute name state. */
+ $this->token['attr'][] = array(
+ 'name' => $char,
+ 'value' => ''
+ );
+
+ $state = 'attributeName';
+ }
+ break;
+
+ case 'attributeName':
+ // Consume the next input character:
+ $char = $this->stream->char();
+
+ // this conditional is optimized, check bottom
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the after attribute name state. */
+ $state = 'afterAttributeName';
+
+ } elseif($char === '/') {
+ /* U+002F SOLIDUS (/)
+ Switch to the self-closing start tag state. */
+ $state = 'selfClosingStartTag';
+
+ } elseif($char === '=') {
+ /* U+003D EQUALS SIGN (=)
+ Switch to the before attribute value state. */
+ $state = 'beforeAttributeValue';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $state = 'data';
+
+ } elseif('A' <= $char && $char <= 'Z') {
+ /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Append the lowercase version of the current input
+ character (add 0x0020 to the character's code point) to
+ the current attribute's name. Stay in the attribute name
+ state. */
+ $chars = $this->stream->charsWhile(self::UPPER_ALPHA);
+
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['name'] .= strtolower($char . $chars);
+
+ $state = 'attributeName';
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-attribute-name'
+ ));
+ $this->emitToken($this->token);
+
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* U+0022 QUOTATION MARK (")
+ U+0027 APOSTROPHE (')
+ Parse error. Treat it as per the "anything else"
+ entry below. */
+ if($char === '"' || $char === "'") {
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'invalid-character-in-attribute-name'
+ ));
+ }
+
+ /* Anything else
+ Append the current input character to the current attribute's name.
+ Stay in the attribute name state. */
+ $chars = $this->stream->charsUntil("\t\n\x0C /=>\"'" . self::UPPER_ALPHA);
+
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['name'] .= $char . $chars;
+
+ $state = 'attributeName';
+ }
+
+ /* When the user agent leaves the attribute name state
+ (and before emitting the tag token, if appropriate), the
+ complete attribute's name must be compared to the other
+ attributes on the same token; if there is already an
+ attribute on the token with the exact same name, then this
+ is a parse error and the new attribute must be dropped, along
+ with the value that gets associated with it (if any). */
+ // this might be implemented in the emitToken method
+ break;
+
+ case 'afterAttributeName':
+ // Consume the next input character:
+ $char = $this->stream->char();
+
+ // this is an optimized conditional, check the bottom
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the after attribute name state. */
+ $state = 'afterAttributeName';
+
+ } elseif($char === '/') {
+ /* U+002F SOLIDUS (/)
+ Switch to the self-closing start tag state. */
+ $state = 'selfClosingStartTag';
+
+ } elseif($char === '=') {
+ /* U+003D EQUALS SIGN (=)
+ Switch to the before attribute value state. */
+ $state = 'beforeAttributeValue';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $state = 'data';
+
+ } elseif('A' <= $char && $char <= 'Z') {
+ /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Start a new attribute in the current tag token. Set that
+ attribute's name to the lowercase version of the current
+ input character (add 0x0020 to the character's code
+ point), and its value to the empty string. Switch to the
+ attribute name state. */
+ $this->token['attr'][] = array(
+ 'name' => strtolower($char),
+ 'value' => ''
+ );
+
+ $state = 'attributeName';
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-end-of-tag-but-got-eof'
+ ));
+ $this->emitToken($this->token);
+
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* U+0022 QUOTATION MARK (")
+ U+0027 APOSTROPHE (')
+ Parse error. Treat it as per the "anything else"
+ entry below. */
+ if($char === '"' || $char === "'") {
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'invalid-character-after-attribute-name'
+ ));
+ }
+
+ /* Anything else
+ Start a new attribute in the current tag token. Set that attribute's
+ name to the current input character, and its value to the empty string.
+ Switch to the attribute name state. */
+ $this->token['attr'][] = array(
+ 'name' => $char,
+ 'value' => ''
+ );
+
+ $state = 'attributeName';
+ }
+ break;
+
+ case 'beforeAttributeValue':
+ // Consume the next input character:
+ $char = $this->stream->char();
+
+ // this is an optimized conditional
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before attribute value state. */
+ $state = 'beforeAttributeValue';
+
+ } elseif($char === '"') {
+ /* U+0022 QUOTATION MARK (")
+ Switch to the attribute value (double-quoted) state. */
+ $state = 'attributeValueDoubleQuoted';
+
+ } elseif($char === '&') {
+ /* U+0026 AMPERSAND (&)
+ Switch to the attribute value (unquoted) state and reconsume
+ this input character. */
+ $this->stream->unget();
+ $state = 'attributeValueUnquoted';
+
+ } elseif($char === '\'') {
+ /* U+0027 APOSTROPHE (')
+ Switch to the attribute value (single-quoted) state. */
+ $state = 'attributeValueSingleQuoted';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Parse error. Emit the current tag token. Switch to the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-attribute-value-but-got-right-bracket'
+ ));
+ $this->emitToken($this->token);
+ $state = 'data';
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume
+ the character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-attribute-value-but-got-eof'
+ ));
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* U+003D EQUALS SIGN (=)
+ Parse error. Treat it as per the "anything else" entry below. */
+ if($char === '=') {
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'equals-in-unquoted-attribute-value'
+ ));
+ }
+
+ /* Anything else
+ Append the current input character to the current attribute's value.
+ Switch to the attribute value (unquoted) state. */
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['value'] .= $char;
+
+ $state = 'attributeValueUnquoted';
+ }
+ break;
+
+ case 'attributeValueDoubleQuoted':
+ // Consume the next input character:
+ $char = $this->stream->char();
+
+ if($char === '"') {
+ /* U+0022 QUOTATION MARK (")
+ Switch to the after attribute value (quoted) state. */
+ $state = 'afterAttributeValueQuoted';
+
+ } elseif($char === '&') {
+ /* U+0026 AMPERSAND (&)
+ Switch to the character reference in attribute value
+ state, with the additional allowed character
+ being U+0022 QUOTATION MARK ("). */
+ $this->characterReferenceInAttributeValue('"');
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the character
+ in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-attribute-value-double-quote'
+ ));
+ $this->emitToken($this->token);
+
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Anything else
+ Append the current input character to the current attribute's value.
+ Stay in the attribute value (double-quoted) state. */
+ $chars = $this->stream->charsUntil('"&');
+
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['value'] .= $char . $chars;
+
+ $state = 'attributeValueDoubleQuoted';
+ }
+ break;
+
+ case 'attributeValueSingleQuoted':
+ // Consume the next input character:
+ $char = $this->stream->char();
+
+ if($char === "'") {
+ /* U+0022 QUOTATION MARK (')
+ Switch to the after attribute value state. */
+ $state = 'afterAttributeValueQuoted';
+
+ } elseif($char === '&') {
+ /* U+0026 AMPERSAND (&)
+ Switch to the entity in attribute value state. */
+ $this->characterReferenceInAttributeValue("'");
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the character
+ in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-attribute-value-single-quote'
+ ));
+ $this->emitToken($this->token);
+
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Anything else
+ Append the current input character to the current attribute's value.
+ Stay in the attribute value (single-quoted) state. */
+ $chars = $this->stream->charsUntil("'&");
+
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['value'] .= $char . $chars;
+
+ $state = 'attributeValueSingleQuoted';
+ }
+ break;
+
+ case 'attributeValueUnquoted':
+ // Consume the next input character:
+ $char = $this->stream->char();
+
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the before attribute name state. */
+ $state = 'beforeAttributeName';
+
+ } elseif($char === '&') {
+ /* U+0026 AMPERSAND (&)
+ Switch to the entity in attribute value state. */
+ $this->characterReferenceInAttributeValue();
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $state = 'data';
+
+ } elseif ($char === false) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume
+ the character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-attribute-value-no-quotes'
+ ));
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* U+0022 QUOTATION MARK (")
+ U+0027 APOSTROPHE (')
+ U+003D EQUALS SIGN (=)
+ Parse error. Treat it as per the "anything else"
+ entry below. */
+ if($char === '"' || $char === "'" || $char === '=') {
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-character-in-unquoted-attribute-value'
+ ));
+ }
+
+ /* Anything else
+ Append the current input character to the current attribute's value.
+ Stay in the attribute value (unquoted) state. */
+ $chars = $this->stream->charsUntil("\t\n\x0c &>\"'=");
+
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['value'] .= $char . $chars;
+
+ $state = 'attributeValueUnquoted';
+ }
+ break;
+
+ case 'afterAttributeValueQuoted':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the before attribute name state. */
+ $state = 'beforeAttributeName';
+
+ } elseif ($char === '/') {
+ /* U+002F SOLIDUS (/)
+ Switch to the self-closing start tag state. */
+ $state = 'selfClosingStartTag';
+
+ } elseif ($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current tag token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $state = 'data';
+
+ } elseif ($char === false) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-EOF-after-attribute-value'
+ ));
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Anything else
+ Parse error. Reconsume the character in the before attribute
+ name state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-character-after-attribute-value'
+ ));
+ $this->stream->unget();
+ $state = 'beforeAttributeName';
+ }
+ break;
+
+ case 'selfClosingStartTag':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if ($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Set the self-closing flag of the current tag token.
+ Emit the current tag token. Switch to the data state. */
+ // not sure if this is the name we want
+ $this->token['self-closing'] = true;
+ /* When an end tag token is emitted with its self-closing flag set,
+ that is a parse error. */
+ if ($this->token['type'] === self::ENDTAG) {
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'self-closing-end-tag'
+ ));
+ }
+ $this->emitToken($this->token);
+ $state = 'data';
+
+ } elseif ($char === false) {
+ /* EOF
+ Parse error. Emit the current tag token. Reconsume the
+ EOF character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-eof-after-self-closing'
+ ));
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Anything else
+ Parse error. Reconsume the character in the before attribute name state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-character-after-self-closing'
+ ));
+ $this->stream->unget();
+ $state = 'beforeAttributeName';
+ }
+ break;
+
+ case 'bogusComment':
+ /* (This can only happen if the content model flag is set to the PCDATA state.) */
+ /* Consume every character up to the first U+003E GREATER-THAN SIGN
+ character (>) or the end of the file (EOF), whichever comes first. Emit
+ a comment token whose data is the concatenation of all the characters
+ starting from and including the character that caused the state machine
+ to switch into the bogus comment state, up to and including the last
+ consumed character before the U+003E character, if any, or up to the
+ end of the file otherwise. (If the comment was started by the end of
+ the file (EOF), the token is empty.) */
+ $this->token['data'] .= (string) $this->stream->charsUntil('>');
+ $this->stream->char();
+
+ $this->emitToken($this->token);
+
+ /* Switch to the data state. */
+ $state = 'data';
+ break;
+
+ case 'markupDeclarationOpen':
+ // Consume for below
+ $hyphens = $this->stream->charsWhile('-', 2);
+ if ($hyphens === '-') {
+ $this->stream->unget();
+ }
+ if ($hyphens !== '--') {
+ $alpha = $this->stream->charsWhile(self::ALPHA, 7);
+ }
+
+ /* If the next two characters are both U+002D HYPHEN-MINUS (-)
+ characters, consume those two characters, create a comment token whose
+ data is the empty string, and switch to the comment state. */
+ if($hyphens === '--') {
+ $state = 'commentStart';
+ $this->token = array(
+ 'data' => '',
+ 'type' => self::COMMENT
+ );
+
+ /* Otherwise if the next seven characters are a case-insensitive match
+ for the word "DOCTYPE", then consume those characters and switch to the
+ DOCTYPE state. */
+ } elseif(strtoupper($alpha) === 'DOCTYPE') {
+ $state = 'doctype';
+
+ // XXX not implemented
+ /* Otherwise, if the insertion mode is "in foreign content"
+ and the current node is not an element in the HTML namespace
+ and the next seven characters are an ASCII case-sensitive
+ match for the string "[CDATA[" (the five uppercase letters
+ "CDATA" with a U+005B LEFT SQUARE BRACKET character before
+ and after), then consume those characters and switch to the
+ CDATA section state (which is unrelated to the content model
+ flag's CDATA state). */
+
+ /* Otherwise, is is a parse error. Switch to the bogus comment state.
+ The next character that is consumed, if any, is the first character
+ that will be in the comment. */
+ } else {
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-dashes-or-doctype'
+ ));
+ $this->token = array(
+ 'data' => (string) $alpha,
+ 'type' => self::COMMENT
+ );
+ $state = 'bogusComment';
+ }
+ break;
+
+ case 'commentStart':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if ($char === '-') {
+ /* U+002D HYPHEN-MINUS (-)
+ Switch to the comment start dash state. */
+ $state = 'commentStartDash';
+ } elseif ($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Parse error. Emit the comment token. Switch to the
+ data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'incorrect-comment'
+ ));
+ $this->emitToken($this->token);
+ $state = 'data';
+ } elseif ($char === false) {
+ /* EOF
+ Parse error. Emit the comment token. Reconsume the
+ EOF character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-comment'
+ ));
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+ } else {
+ /* Anything else
+ Append the input character to the comment token's
+ data. Switch to the comment state. */
+ $this->token['data'] .= $char;
+ $state = 'comment';
+ }
+ break;
+
+ case 'commentStartDash':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+ if ($char === '-') {
+ /* U+002D HYPHEN-MINUS (-)
+ Switch to the comment end state */
+ $state = 'commentEnd';
+ } elseif ($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Parse error. Emit the comment token. Switch to the
+ data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'incorrect-comment'
+ ));
+ $this->emitToken($this->token);
+ $state = 'data';
+ } elseif ($char === false) {
+ /* Parse error. Emit the comment token. Reconsume the
+ EOF character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-comment'
+ ));
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+ } else {
+ $this->token['data'] .= '-' . $char;
+ $state = 'comment';
+ }
+ break;
+
+ case 'comment':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === '-') {
+ /* U+002D HYPHEN-MINUS (-)
+ Switch to the comment end dash state */
+ $state = 'commentEndDash';
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Emit the comment token. Reconsume the EOF character
+ in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-comment'
+ ));
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Anything else
+ Append the input character to the comment token's data. Stay in
+ the comment state. */
+ $chars = $this->stream->charsUntil('-');
+
+ $this->token['data'] .= $char . $chars;
+ }
+ break;
+
+ case 'commentEndDash':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === '-') {
+ /* U+002D HYPHEN-MINUS (-)
+ Switch to the comment end state */
+ $state = 'commentEnd';
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Emit the comment token. Reconsume the EOF character
+ in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-comment-end-dash'
+ ));
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Anything else
+ Append a U+002D HYPHEN-MINUS (-) character and the input
+ character to the comment token's data. Switch to the comment state. */
+ $this->token['data'] .= '-'.$char;
+ $state = 'comment';
+ }
+ break;
+
+ case 'commentEnd':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the comment token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $state = 'data';
+
+ } elseif($char === '-') {
+ /* U+002D HYPHEN-MINUS (-)
+ Parse error. Append a U+002D HYPHEN-MINUS (-) character
+ to the comment token's data. Stay in the comment end
+ state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-dash-after-double-dash-in-comment'
+ ));
+ $this->token['data'] .= '-';
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Emit the comment token. Reconsume the
+ EOF character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-comment-double-dash'
+ ));
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Anything else
+ Parse error. Append two U+002D HYPHEN-MINUS (-)
+ characters and the input character to the comment token's
+ data. Switch to the comment state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-char-in-comment'
+ ));
+ $this->token['data'] .= '--'.$char;
+ $state = 'comment';
+ }
+ break;
+
+ case 'doctype':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the before DOCTYPE name state. */
+ $state = 'beforeDoctypeName';
+
+ } else {
+ /* Anything else
+ Parse error. Reconsume the current character in the
+ before DOCTYPE name state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'need-space-after-doctype'
+ ));
+ $this->stream->unget();
+ $state = 'beforeDoctypeName';
+ }
+ break;
+
+ case 'beforeDoctypeName':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before DOCTYPE name state. */
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Parse error. Create a new DOCTYPE token. Set its
+ force-quirks flag to on. Emit the token. Switch to the
+ data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-doctype-name-but-got-right-bracket'
+ ));
+ $this->emitToken(array(
+ 'name' => '',
+ 'type' => self::DOCTYPE,
+ 'force-quirks' => true,
+ 'error' => true
+ ));
+
+ $state = 'data';
+
+ } elseif('A' <= $char && $char <= 'Z') {
+ /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Create a new DOCTYPE token. Set the token's name to the
+ lowercase version of the input character (add 0x0020 to
+ the character's code point). Switch to the DOCTYPE name
+ state. */
+ $this->token = array(
+ 'name' => strtolower($char),
+ 'type' => self::DOCTYPE,
+ 'error' => true
+ );
+
+ $state = 'doctypeName';
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Create a new DOCTYPE token. Set its
+ force-quirks flag to on. Emit the token. Reconsume the
+ EOF character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-doctype-name-but-got-eof'
+ ));
+ $this->emitToken(array(
+ 'name' => '',
+ 'type' => self::DOCTYPE,
+ 'force-quirks' => true,
+ 'error' => true
+ ));
+
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Anything else
+ Create a new DOCTYPE token. Set the token's name to the
+ current input character. Switch to the DOCTYPE name state. */
+ $this->token = array(
+ 'name' => $char,
+ 'type' => self::DOCTYPE,
+ 'error' => true
+ );
+
+ $state = 'doctypeName';
+ }
+ break;
+
+ case 'doctypeName':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Switch to the after DOCTYPE name state. */
+ $state = 'afterDoctypeName';
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current DOCTYPE token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $state = 'data';
+
+ } elseif('A' <= $char && $char <= 'Z') {
+ /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
+ Append the lowercase version of the input character
+ (add 0x0020 to the character's code point) to the current
+ DOCTYPE token's name. Stay in the DOCTYPE name state. */
+ $this->token['name'] .= strtolower($char);
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Emit that DOCTYPE token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-doctype-name'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Anything else
+ Append the current input character to the current
+ DOCTYPE token's name. Stay in the DOCTYPE name state. */
+ $this->token['name'] .= $char;
+ }
+
+ // XXX this is probably some sort of quirks mode designation,
+ // check tree-builder to be sure. In general 'error' needs
+ // to be specc'ified, this probably means removing it at the end
+ $this->token['error'] = ($this->token['name'] === 'HTML')
+ ? false
+ : true;
+ break;
+
+ case 'afterDoctypeName':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the after DOCTYPE name state. */
+
+ } elseif($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current DOCTYPE token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $state = 'data';
+
+ } elseif($char === false) {
+ /* EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Emit that DOCTYPE token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Anything else */
+
+ $nextSix = strtoupper($char . $this->stream->charsWhile(self::ALPHA, 5));
+ if ($nextSix === 'PUBLIC') {
+ /* If the next six characters are an ASCII
+ case-insensitive match for the word "PUBLIC", then
+ consume those characters and switch to the before
+ DOCTYPE public identifier state. */
+ $state = 'beforeDoctypePublicIdentifier';
+
+ } elseif ($nextSix === 'SYSTEM') {
+ /* Otherwise, if the next six characters are an ASCII
+ case-insensitive match for the word "SYSTEM", then
+ consume those characters and switch to the before
+ DOCTYPE system identifier state. */
+ $state = 'beforeDoctypeSystemIdentifier';
+
+ } else {
+ /* Otherwise, this is the parse error. Set the DOCTYPE
+ token's force-quirks flag to on. Switch to the bogus
+ DOCTYPE state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-space-or-right-bracket-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->token['error'] = true;
+ $state = 'bogusDoctype';
+ }
+ }
+ break;
+
+ case 'beforeDoctypePublicIdentifier':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before DOCTYPE public identifier state. */
+ } elseif ($char === '"') {
+ /* U+0022 QUOTATION MARK (")
+ Set the DOCTYPE token's public identifier to the empty
+ string (not missing), then switch to the DOCTYPE public
+ identifier (double-quoted) state. */
+ $this->token['public'] = '';
+ $state = 'doctypePublicIdentifierDoubleQuoted';
+ } elseif ($char === "'") {
+ /* U+0027 APOSTROPHE (')
+ Set the DOCTYPE token's public identifier to the empty
+ string (not missing), then switch to the DOCTYPE public
+ identifier (single-quoted) state. */
+ $this->token['public'] = '';
+ $state = 'doctypePublicIdentifierSingleQuoted';
+ } elseif ($char === '>') {
+ /* Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Emit that DOCTYPE token. Switch to the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-end-of-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $state = 'data';
+ } elseif ($char === false) {
+ /* Parse error. Set the DOCTYPE token's force-quirks
+ flag to on. Emit that DOCTYPE token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+ } else {
+ /* Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Switch to the bogus DOCTYPE state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-char-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $state = 'bogusDoctype';
+ }
+ break;
+
+ case 'doctypePublicIdentifierDoubleQuoted':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if ($char === '"') {
+ /* U+0022 QUOTATION MARK (")
+ Switch to the after DOCTYPE public identifier state. */
+ $state = 'afterDoctypePublicIdentifier';
+ } elseif ($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Emit that DOCTYPE token. Switch to the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-end-of-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $state = 'data';
+ } elseif ($char === false) {
+ /* EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Emit that DOCTYPE token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+ } else {
+ /* Anything else
+ Append the current input character to the current
+ DOCTYPE token's public identifier. Stay in the DOCTYPE
+ public identifier (double-quoted) state. */
+ $this->token['public'] .= $char;
+ }
+ break;
+
+ case 'doctypePublicIdentifierSingleQuoted':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if ($char === "'") {
+ /* U+0027 APOSTROPHE (')
+ Switch to the after DOCTYPE public identifier state. */
+ $state = 'afterDoctypePublicIdentifier';
+ } elseif ($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Emit that DOCTYPE token. Switch to the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-end-of-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $state = 'data';
+ } elseif ($char === false) {
+ /* EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Emit that DOCTYPE token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+ } else {
+ /* Anything else
+ Append the current input character to the current
+ DOCTYPE token's public identifier. Stay in the DOCTYPE
+ public identifier (double-quoted) state. */
+ $this->token['public'] .= $char;
+ }
+ break;
+
+ case 'afterDoctypePublicIdentifier':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the after DOCTYPE public identifier state. */
+ } elseif ($char === '"') {
+ /* U+0022 QUOTATION MARK (")
+ Set the DOCTYPE token's system identifier to the
+ empty string (not missing), then switch to the DOCTYPE
+ system identifier (double-quoted) state. */
+ $this->token['system'] = '';
+ $state = 'doctypeSystemIdentifierDoubleQuoted';
+ } elseif ($char === "'") {
+ /* U+0027 APOSTROPHE (')
+ Set the DOCTYPE token's system identifier to the
+ empty string (not missing), then switch to the DOCTYPE
+ system identifier (single-quoted) state. */
+ $this->token['system'] = '';
+ $state = 'doctypeSystemIdentifierSingleQuoted';
+ } elseif ($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current DOCTYPE token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $state = 'data';
+ } elseif ($char === false) {
+ /* Parse error. Set the DOCTYPE token's force-quirks
+ flag to on. Emit that DOCTYPE token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+ } else {
+ /* Anything else
+ Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Switch to the bogus DOCTYPE state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-char-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $state = 'bogusDoctype';
+ }
+ break;
+
+ case 'beforeDoctypeSystemIdentifier':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the before DOCTYPE system identifier state. */
+ } elseif ($char === '"') {
+ /* U+0022 QUOTATION MARK (")
+ Set the DOCTYPE token's system identifier to the empty
+ string (not missing), then switch to the DOCTYPE system
+ identifier (double-quoted) state. */
+ $this->token['system'] = '';
+ $state = 'doctypeSystemIdentifierDoubleQuoted';
+ } elseif ($char === "'") {
+ /* U+0027 APOSTROPHE (')
+ Set the DOCTYPE token's system identifier to the empty
+ string (not missing), then switch to the DOCTYPE system
+ identifier (single-quoted) state. */
+ $this->token['system'] = '';
+ $state = 'doctypeSystemIdentifierSingleQuoted';
+ } elseif ($char === '>') {
+ /* Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Emit that DOCTYPE token. Switch to the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-char-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $state = 'data';
+ } elseif ($char === false) {
+ /* Parse error. Set the DOCTYPE token's force-quirks
+ flag to on. Emit that DOCTYPE token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+ } else {
+ /* Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Switch to the bogus DOCTYPE state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-char-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $state = 'bogusDoctype';
+ }
+ break;
+
+ case 'doctypeSystemIdentifierDoubleQuoted':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if ($char === '"') {
+ /* U+0022 QUOTATION MARK (")
+ Switch to the after DOCTYPE system identifier state. */
+ $state = 'afterDoctypeSystemIdentifier';
+ } elseif ($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Emit that DOCTYPE token. Switch to the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-end-of-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $state = 'data';
+ } elseif ($char === false) {
+ /* EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Emit that DOCTYPE token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+ } else {
+ /* Anything else
+ Append the current input character to the current
+ DOCTYPE token's system identifier. Stay in the DOCTYPE
+ system identifier (double-quoted) state. */
+ $this->token['system'] .= $char;
+ }
+ break;
+
+ case 'doctypeSystemIdentifierSingleQuoted':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if ($char === "'") {
+ /* U+0027 APOSTROPHE (')
+ Switch to the after DOCTYPE system identifier state. */
+ $state = 'afterDoctypeSystemIdentifier';
+ } elseif ($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Emit that DOCTYPE token. Switch to the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-end-of-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $state = 'data';
+ } elseif ($char === false) {
+ /* EOF
+ Parse error. Set the DOCTYPE token's force-quirks flag
+ to on. Emit that DOCTYPE token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+ } else {
+ /* Anything else
+ Append the current input character to the current
+ DOCTYPE token's system identifier. Stay in the DOCTYPE
+ system identifier (double-quoted) state. */
+ $this->token['system'] .= $char;
+ }
+ break;
+
+ case 'afterDoctypeSystemIdentifier':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ Stay in the after DOCTYPE system identifier state. */
+ } elseif ($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the current DOCTYPE token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $state = 'data';
+ } elseif ($char === false) {
+ /* Parse error. Set the DOCTYPE token's force-quirks
+ flag to on. Emit that DOCTYPE token. Reconsume the EOF
+ character in the data state. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'eof-in-doctype'
+ ));
+ $this->token['force-quirks'] = true;
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+ } else {
+ /* Anything else
+ Parse error. Switch to the bogus DOCTYPE state.
+ (This does not set the DOCTYPE token's force-quirks
+ flag to on.) */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'unexpected-char-in-doctype'
+ ));
+ $state = 'bogusDoctype';
+ }
+ break;
+
+ case 'bogusDoctype':
+ /* Consume the next input character: */
+ $char = $this->stream->char();
+
+ if ($char === '>') {
+ /* U+003E GREATER-THAN SIGN (>)
+ Emit the DOCTYPE token. Switch to the data state. */
+ $this->emitToken($this->token);
+ $state = 'data';
+
+ } elseif($char === false) {
+ /* EOF
+ Emit the DOCTYPE token. Reconsume the EOF character in
+ the data state. */
+ $this->emitToken($this->token);
+ $this->stream->unget();
+ $state = 'data';
+
+ } else {
+ /* Anything else
+ Stay in the bogus DOCTYPE state. */
+ }
+ break;
+
+ // case 'cdataSection':
+
+ }
+ }
+ }
+
+ /**
+ * Returns a serialized representation of the tree.
+ */
+ public function save() {
+ return $this->tree->save();
+ }
+
+ /**
+ * Returns the input stream.
+ */
+ public function stream() {
+ return $this->stream;
+ }
+
+ private function consumeCharacterReference($allowed = false, $inattr = false) {
+ // This goes quite far against spec, and is far closer to the Python
+ // impl., mainly because we don't do the large unconsuming the spec
+ // requires.
+
+ // All consumed characters.
+ $chars = $this->stream->char();
+
+ /* This section defines how to consume a character
+ reference. This definition is used when parsing character
+ references in text and in attributes.
+
+ The behavior depends on the identity of the next character
+ (the one immediately after the U+0026 AMPERSAND character): */
+
+ if (
+ $chars[0] === "\x09" ||
+ $chars[0] === "\x0A" ||
+ $chars[0] === "\x0C" ||
+ $chars[0] === "\x20" ||
+ $chars[0] === '<' ||
+ $chars[0] === '&' ||
+ $chars === false ||
+ $chars[0] === $allowed
+ ) {
+ /* U+0009 CHARACTER TABULATION
+ U+000A LINE FEED (LF)
+ U+000C FORM FEED (FF)
+ U+0020 SPACE
+ U+003C LESS-THAN SIGN
+ U+0026 AMPERSAND
+ EOF
+ The additional allowed character, if there is one
+ Not a character reference. No characters are consumed,
+ and nothing is returned. (This is not an error, either.) */
+ // We already consumed, so unconsume.
+ $this->stream->unget();
+ return '&';
+ } elseif ($chars[0] === '#') {
+ /* Consume the U+0023 NUMBER SIGN. */
+ // Um, yeah, we already did that.
+ /* The behavior further depends on the character after
+ the U+0023 NUMBER SIGN: */
+ $chars .= $this->stream->char();
+ if (isset($chars[1]) && ($chars[1] === 'x' || $chars[1] === 'X')) {
+ /* U+0078 LATIN SMALL LETTER X
+ U+0058 LATIN CAPITAL LETTER X */
+ /* Consume the X. */
+ // Um, yeah, we already did that.
+ /* Follow the steps below, but using the range of
+ characters U+0030 DIGIT ZERO through to U+0039 DIGIT
+ NINE, U+0061 LATIN SMALL LETTER A through to U+0066
+ LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
+ A, through to U+0046 LATIN CAPITAL LETTER F (in other
+ words, 0123456789, ABCDEF, abcdef). */
+ $char_class = self::HEX;
+ /* When it comes to interpreting the
+ number, interpret it as a hexadecimal number. */
+ $hex = true;
+ } else {
+ /* Anything else */
+ // Unconsume because we shouldn't have consumed this.
+ $chars = $chars[0];
+ $this->stream->unget();
+ /* Follow the steps below, but using the range of
+ characters U+0030 DIGIT ZERO through to U+0039 DIGIT
+ NINE (i.e. just 0123456789). */
+ $char_class = self::DIGIT;
+ /* When it comes to interpreting the number,
+ interpret it as a decimal number. */
+ $hex = false;
+ }
+
+ /* Consume as many characters as match the range of characters given above. */
+ $consumed = $this->stream->charsWhile($char_class);
+ if ($consumed === '' || $consumed === false) {
+ /* If no characters match the range, then don't consume
+ any characters (and unconsume the U+0023 NUMBER SIGN
+ character and, if appropriate, the X character). This
+ is a parse error; nothing is returned. */
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-numeric-entity'
+ ));
+ return '&' . $chars;
+ } else {
+ /* Otherwise, if the next character is a U+003B SEMICOLON,
+ consume that too. If it isn't, there is a parse error. */
+ if ($this->stream->char() !== ';') {
+ $this->stream->unget();
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'numeric-entity-without-semicolon'
+ ));
+ }
+
+ /* If one or more characters match the range, then take
+ them all and interpret the string of characters as a number
+ (either hexadecimal or decimal as appropriate). */
+ $codepoint = $hex ? hexdec($consumed) : (int) $consumed;
+
+ /* If that number is one of the numbers in the first column
+ of the following table, then this is a parse error. Find the
+ row with that number in the first column, and return a
+ character token for the Unicode character given in the
+ second column of that row. */
+ $new_codepoint = HTML5_Data::getRealCodepoint($codepoint);
+ if ($new_codepoint) {
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'illegal-windows-1252-entity'
+ ));
+ $codepoint = $new_codepoint;
+ } else {
+ /* Otherwise, if the number is in the range 0x0000 to 0x0008,
+ U+000B, U+000E to 0x001F, 0x007F to 0x009F, 0xD800 to 0xDFFF ,
+ 0xFDD0 to 0xFDEF, or is one of 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF,
+ 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
+ 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF,
+ 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE,
+ 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
+ 0x10FFFE, or 0x10FFFF, or is higher than 0x10FFFF, then this
+ is a parse error; return a character token for the U+FFFD
+ REPLACEMENT CHARACTER character instead. */
+ // && has higher precedence than ||
+ if (
+ $codepoint >= 0x0000 && $codepoint <= 0x0008 ||
+ $codepoint === 0x000B ||
+ $codepoint >= 0x000E && $codepoint <= 0x001F ||
+ $codepoint >= 0x007F && $codepoint <= 0x009F ||
+ $codepoint >= 0xD800 && $codepoint <= 0xDFFF ||
+ $codepoint >= 0xFDD0 && $codepoint <= 0xFDEF ||
+ ($codepoint & 0xFFFE) === 0xFFFE ||
+ $codepoint > 0x10FFFF
+ ) {
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'illegal-codepoint-for-numeric-entity'
+ ));
+ $codepoint = 0xFFFD;
+ }
+ }
+
+ /* Otherwise, return a character token for the Unicode
+ character whose code point is that number. */
+ return HTML5_Data::utf8chr($codepoint);
+ }
+
+ } else {
+ /* Anything else */
+
+ /* Consume the maximum number of characters possible,
+ with the consumed characters matching one of the
+ identifiers in the first column of the named character
+ references table (in a case-sensitive manner). */
+
+ // we will implement this by matching the longest
+ // alphanumeric + semicolon string, and then working
+ // our way backwards
+ $chars .= $this->stream->charsWhile(self::DIGIT . self::ALPHA . ';', HTML5_Data::getNamedCharacterReferenceMaxLength() - 1);
+ $len = strlen($chars);
+
+ $refs = HTML5_Data::getNamedCharacterReferences();
+ $codepoint = false;
+ for($c = $len; $c > 0; $c--) {
+ $id = substr($chars, 0, $c);
+ if(isset($refs[$id])) {
+ $codepoint = $refs[$id];
+ break;
+ }
+ }
+
+ /* If no match can be made, then this is a parse error.
+ No characters are consumed, and nothing is returned. */
+ if (!$codepoint) {
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'expected-named-entity'
+ ));
+ return '&' . $chars;
+ }
+
+ /* If the last character matched is not a U+003B SEMICOLON
+ (;), there is a parse error. */
+ $semicolon = true;
+ if (substr($id, -1) !== ';') {
+ $this->emitToken(array(
+ 'type' => self::PARSEERROR,
+ 'data' => 'named-entity-without-semicolon'
+ ));
+ $semicolon = false;
+ }
+
+
+ /* If the character reference is being consumed as part of
+ an attribute, and the last character matched is not a
+ U+003B SEMICOLON (;), and the next character is in the
+ range U+0030 DIGIT ZERO to U+0039 DIGIT NINE, U+0041
+ LATIN CAPITAL LETTER A to U+005A LATIN CAPITAL LETTER Z,
+ or U+0061 LATIN SMALL LETTER A to U+007A LATIN SMALL LETTER Z,
+ then, for historical reasons, all the characters that were
+ matched after the U+0026 AMPERSAND (&) must be unconsumed,
+ and nothing is returned. */
+ if (
+ $inattr && !$semicolon &&
+ strspn(substr($chars, $c, 1), self::ALPHA . self::DIGIT)
+ ) {
+ return '&' . $chars;
+ }
+
+ /* Otherwise, return a character token for the character
+ corresponding to the character reference name (as given
+ by the second column of the named character references table). */
+ return HTML5_Data::utf8chr($codepoint) . substr($chars, $c);
+ }
+ }
+
+ private function characterReferenceInAttributeValue($allowed = false) {
+ /* Attempt to consume a character reference. */
+ $entity = $this->consumeCharacterReference($allowed, true);
+
+ /* If nothing is returned, append a U+0026 AMPERSAND
+ character to the current attribute's value.
+
+ Otherwise, append the returned character token to the
+ current attribute's value. */
+ $char = (!$entity)
+ ? '&'
+ : $entity;
+
+ $last = count($this->token['attr']) - 1;
+ $this->token['attr'][$last]['value'] .= $char;
+
+ /* Finally, switch back to the attribute value state that you
+ were in when were switched into this state. */
+ }
+
+ /**
+ * Emits a token, passing it on to the tree builder.
+ */
+ protected function emitToken($token, $checkStream = true) {
+ if ($checkStream) {
+ // Emit errors from input stream.
+ while ($this->stream->errors) {
+ $this->emitToken(array_shift($this->stream->errors), false);
+ }
+ }
+
+ // the current structure of attributes is not a terribly good one
+ $this->tree->emitToken($token);
+
+ if(is_int($this->tree->content_model)) {
+ $this->content_model = $this->tree->content_model;
+ $this->tree->content_model = null;
+
+ } elseif($token['type'] === self::ENDTAG) {
+ $this->content_model = self::PCDATA;
+ }
+ }
+}
+
diff --git a/library/HTML5/TreeBuilder.php b/library/HTML5/TreeBuilder.php
new file mode 100644
index 000000000..03e2ee77f
--- /dev/null
+++ b/library/HTML5/TreeBuilder.php
@@ -0,0 +1,3715 @@
+<?php
+
+/*
+
+Copyright 2007 Jeroen van der Meer <http://jero.net/>
+Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+
+// Tags for FIX ME!!!: (in order of priority)
+// XXX - should be fixed NAO!
+// XERROR - with regards to parse errors
+// XSCRIPT - with regards to scripting mode
+// XENCODING - with regards to encoding (for reparsing tests)
+
+class HTML5_TreeBuilder {
+ public $stack = array();
+ public $content_model;
+
+ private $mode;
+ private $original_mode;
+ private $secondary_mode;
+ private $dom;
+ // Whether or not normal insertion of nodes should actually foster
+ // parent (used in one case in spec)
+ private $foster_parent = false;
+ private $a_formatting = array();
+
+ private $head_pointer = null;
+ private $form_pointer = null;
+
+ private $flag_frameset_ok = true;
+ private $flag_force_quirks = false;
+ private $ignored = false;
+ private $quirks_mode = null;
+ // this gets to 2 when we want to ignore the next lf character, and
+ // is decrement at the beginning of each processed token (this way,
+ // code can check for (bool)$ignore_lf_token, but it phases out
+ // appropriately)
+ private $ignore_lf_token = 0;
+ private $fragment = false;
+ private $root;
+
+ private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject');
+ private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u');
+ private $special = array('address','area','article','aside','base','basefont','bgsound',
+ 'blockquote','body','br','center','col','colgroup','command','dd','details','dialog','dir','div','dl',
+ 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
+ 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
+ 'listing','menu','meta','nav','noembed','noframes','noscript','ol',
+ 'p','param','plaintext','pre','script','select','spacer','style',
+ 'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
+
+ // Tree construction modes
+ const INITIAL = 0;
+ const BEFORE_HTML = 1;
+ const BEFORE_HEAD = 2;
+ const IN_HEAD = 3;
+ const IN_HEAD_NOSCRIPT = 4;
+ const AFTER_HEAD = 5;
+ const IN_BODY = 6;
+ const IN_CDATA_RCDATA = 7;
+ const IN_TABLE = 8;
+ const IN_CAPTION = 9;
+ const IN_COLUMN_GROUP = 10;
+ const IN_TABLE_BODY = 11;
+ const IN_ROW = 12;
+ const IN_CELL = 13;
+ const IN_SELECT = 14;
+ const IN_SELECT_IN_TABLE= 15;
+ const IN_FOREIGN_CONTENT= 16;
+ const AFTER_BODY = 17;
+ const IN_FRAMESET = 18;
+ const AFTER_FRAMESET = 19;
+ const AFTER_AFTER_BODY = 20;
+ const AFTER_AFTER_FRAMESET = 21;
+
+ /**
+ * Converts a magic number to a readable name. Use for debugging.
+ */
+ private function strConst($number) {
+ static $lookup;
+ if (!$lookup) {
+ $r = new ReflectionClass('HTML5_TreeBuilder');
+ $lookup = array_flip($r->getConstants());
+ }
+ return $lookup[$number];
+ }
+
+ // The different types of elements.
+ const SPECIAL = 100;
+ const SCOPING = 101;
+ const FORMATTING = 102;
+ const PHRASING = 103;
+
+ // Quirks modes in $quirks_mode
+ const NO_QUIRKS = 200;
+ const QUIRKS_MODE = 201;
+ const LIMITED_QUIRKS_MODE = 202;
+
+ // Marker to be placed in $a_formatting
+ const MARKER = 300;
+
+ // Namespaces for foreign content
+ const NS_HTML = null; // to prevent DOM from requiring NS on everything
+ const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
+ const NS_SVG = 'http://www.w3.org/2000/svg';
+ const NS_XLINK = 'http://www.w3.org/1999/xlink';
+ const NS_XML = 'http://www.w3.org/XML/1998/namespace';
+ const NS_XMLNS = 'http://www.w3.org/2000/xmlns/';
+
+ public function __construct() {
+ $this->mode = self::INITIAL;
+ $this->dom = new DOMDocument;
+
+ $this->dom->encoding = 'UTF-8';
+ $this->dom->preserveWhiteSpace = true;
+ $this->dom->substituteEntities = true;
+ $this->dom->strictErrorChecking = false;
+ }
+
+ // Process tag tokens
+ public function emitToken($token, $mode = null) {
+ // XXX: ignore parse errors... why are we emitting them, again?
+ if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return;
+ if ($mode === null) $mode = $this->mode;
+
+ /*
+ $backtrace = debug_backtrace();
+ if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
+ echo $this->strConst($mode);
+ if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
+ echo "\n ";
+ token_dump($token);
+ $this->printStack();
+ $this->printActiveFormattingElements();
+ if ($this->foster_parent) echo " -> this is a foster parent mode\n";
+ */
+
+ if ($this->ignore_lf_token) $this->ignore_lf_token--;
+ $this->ignored = false;
+ // indenting is a little wonky, this can be changed later on
+ switch ($mode) {
+
+ case self::INITIAL:
+
+ /* A character token that is one of U+0009 CHARACTER TABULATION,
+ * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */
+ if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
+ /* Ignore the token. */
+ $this->ignored = true;
+ } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ if (
+ $token['name'] !== 'html' || !empty($token['public']) ||
+ !empty($token['system']) || $token !== 'about:legacy-compat'
+ ) {
+ /* If the DOCTYPE token's name is not a case-sensitive match
+ * for the string "html", or if the token's public identifier
+ * is not missing, or if the token's system identifier is
+ * neither missing nor a case-sensitive match for the string
+ * "about:legacy-compat", then there is a parse error (this
+ * is the DOCTYPE parse error). */
+ // DOCTYPE parse error
+ }
+ /* Append a DocumentType node to the Document node, with the name
+ * attribute set to the name given in the DOCTYPE token, or the
+ * empty string if the name was missing; the publicId attribute
+ * set to the public identifier given in the DOCTYPE token, or
+ * the empty string if the public identifier was missing; the
+ * systemId attribute set to the system identifier given in the
+ * DOCTYPE token, or the empty string if the system identifier
+ * was missing; and the other attributes specific to
+ * DocumentType objects set to null and empty lists as
+ * appropriate. Associate the DocumentType node with the
+ * Document object so that it is returned as the value of the
+ * doctype attribute of the Document object. */
+ if (!isset($token['public'])) $token['public'] = null;
+ if (!isset($token['system'])) $token['system'] = null;
+ // Yes this is hacky. I'm kind of annoyed that I can't appendChild
+ // a doctype to DOMDocument. Maybe I haven't chanted the right
+ // syllables.
+ $impl = new DOMImplementation();
+ // This call can fail for particularly pathological cases (namely,
+ // the qualifiedName parameter ($token['name']) could be missing.
+ if ($token['name']) {
+ $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
+ $this->dom->appendChild($doctype);
+ } else {
+ // It looks like libxml's not actually *able* to express this case.
+ // So... don't.
+ $this->dom->emptyDoctype = true;
+ }
+ $public = is_null($token['public']) ? false : strtolower($token['public']);
+ $system = is_null($token['system']) ? false : strtolower($token['system']);
+ $publicStartsWithForQuirks = array(
+ "+//silmaril//dtd html pro v0r11 19970101//",
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+ "-//as//dtd html 3.0 aswedit + extensions//",
+ "-//ietf//dtd html 2.0 level 1//",
+ "-//ietf//dtd html 2.0 level 2//",
+ "-//ietf//dtd html 2.0 strict level 1//",
+ "-//ietf//dtd html 2.0 strict level 2//",
+ "-//ietf//dtd html 2.0 strict//",
+ "-//ietf//dtd html 2.0//",
+ "-//ietf//dtd html 2.1e//",
+ "-//ietf//dtd html 3.0//",
+ "-//ietf//dtd html 3.2 final//",
+ "-//ietf//dtd html 3.2//",
+ "-//ietf//dtd html 3//",
+ "-//ietf//dtd html level 0//",
+ "-//ietf//dtd html level 1//",
+ "-//ietf//dtd html level 2//",
+ "-//ietf//dtd html level 3//",
+ "-//ietf//dtd html strict level 0//",
+ "-//ietf//dtd html strict level 1//",
+ "-//ietf//dtd html strict level 2//",
+ "-//ietf//dtd html strict level 3//",
+ "-//ietf//dtd html strict//",
+ "-//ietf//dtd html//",
+ "-//metrius//dtd metrius presentational//",
+ "-//microsoft//dtd internet explorer 2.0 html strict//",
+ "-//microsoft//dtd internet explorer 2.0 html//",
+ "-//microsoft//dtd internet explorer 2.0 tables//",
+ "-//microsoft//dtd internet explorer 3.0 html strict//",
+ "-//microsoft//dtd internet explorer 3.0 html//",
+ "-//microsoft//dtd internet explorer 3.0 tables//",
+ "-//netscape comm. corp.//dtd html//",
+ "-//netscape comm. corp.//dtd strict html//",
+ "-//o'reilly and associates//dtd html 2.0//",
+ "-//o'reilly and associates//dtd html extended 1.0//",
+ "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+ "-//spyglass//dtd html 2.0 extended//",
+ "-//sq//dtd html 2.0 hotmetal + extensions//",
+ "-//sun microsystems corp.//dtd hotjava html//",
+ "-//sun microsystems corp.//dtd hotjava strict html//",
+ "-//w3c//dtd html 3 1995-03-24//",
+ "-//w3c//dtd html 3.2 draft//",
+ "-//w3c//dtd html 3.2 final//",
+ "-//w3c//dtd html 3.2//",
+ "-//w3c//dtd html 3.2s draft//",
+ "-//w3c//dtd html 4.0 frameset//",
+ "-//w3c//dtd html 4.0 transitional//",
+ "-//w3c//dtd html experimental 19960712//",
+ "-//w3c//dtd html experimental 970421//",
+ "-//w3c//dtd w3 html//",
+ "-//w3o//dtd w3 html 3.0//",
+ "-//webtechs//dtd mozilla html 2.0//",
+ "-//webtechs//dtd mozilla html//",
+ );
+ $publicSetToForQuirks = array(
+ "-//w3o//dtd w3 html strict 3.0//",
+ "-/w3c/dtd html 4.0 transitional/en",
+ "html",
+ );
+ $publicStartsWithAndSystemForQuirks = array(
+ "-//w3c//dtd html 4.01 frameset//",
+ "-//w3c//dtd html 4.01 transitional//",
+ );
+ $publicStartsWithForLimitedQuirks = array(
+ "-//w3c//dtd xhtml 1.0 frameset//",
+ "-//w3c//dtd xhtml 1.0 transitional//",
+ );
+ $publicStartsWithAndSystemForLimitedQuirks = array(
+ "-//w3c//dtd html 4.01 frameset//",
+ "-//w3c//dtd html 4.01 transitional//",
+ );
+ // first, do easy checks
+ if (
+ !empty($token['force-quirks']) ||
+ strtolower($token['name']) !== 'html'
+ ) {
+ $this->quirks_mode = self::QUIRKS_MODE;
+ } else {
+ do {
+ if ($system) {
+ foreach ($publicStartsWithAndSystemForQuirks as $x) {
+ if (strncmp($public, $x, strlen($x)) === 0) {
+ $this->quirks_mode = self::QUIRKS_MODE;
+ break;
+ }
+ }
+ if (!is_null($this->quirks_mode)) break;
+ foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
+ if (strncmp($public, $x, strlen($x)) === 0) {
+ $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
+ break;
+ }
+ }
+ if (!is_null($this->quirks_mode)) break;
+ }
+ foreach ($publicSetToForQuirks as $x) {
+ if ($public === $x) {
+ $this->quirks_mode = self::QUIRKS_MODE;
+ break;
+ }
+ }
+ if (!is_null($this->quirks_mode)) break;
+ foreach ($publicStartsWithForLimitedQuirks as $x) {
+ if (strncmp($public, $x, strlen($x)) === 0) {
+ $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
+ }
+ }
+ if (!is_null($this->quirks_mode)) break;
+ if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
+ $this->quirks_mode = self::QUIRKS_MODE;
+ break;
+ }
+ foreach ($publicStartsWithForQuirks as $x) {
+ if (strncmp($public, $x, strlen($x)) === 0) {
+ $this->quirks_mode = self::QUIRKS_MODE;
+ break;
+ }
+ }
+ if (is_null($this->quirks_mode)) {
+ $this->quirks_mode = self::NO_QUIRKS;
+ }
+ } while (false);
+ }
+ $this->mode = self::BEFORE_HTML;
+ } else {
+ // parse error
+ /* Switch the insertion mode to "before html", then reprocess the
+ * current token. */
+ $this->mode = self::BEFORE_HTML;
+ $this->quirks_mode = self::QUIRKS_MODE;
+ $this->emitToken($token);
+ }
+ break;
+
+ case self::BEFORE_HTML:
+
+ /* A DOCTYPE token */
+ if($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ // Parse error. Ignore the token.
+ $this->ignored = true;
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
+ /* Append a Comment node to the Document object with the data
+ attribute set to the data given in the comment token. */
+ $comment = $this->dom->createComment($token['data']);
+ $this->dom->appendChild($comment);
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
+ /* Ignore the token. */
+ $this->ignored = true;
+
+ /* A start tag whose tag name is "html" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
+ /* Create an element for the token in the HTML namespace. Append it
+ * to the Document object. Put this element in the stack of open
+ * elements. */
+ $html = $this->insertElement($token, false);
+ $this->dom->appendChild($html);
+ $this->stack[] = $html;
+
+ $this->mode = self::BEFORE_HEAD;
+
+ } else {
+ /* Create an html element. Append it to the Document object. Put
+ * this element in the stack of open elements. */
+ $html = $this->dom->createElementNS(self::NS_HTML, 'html');
+ $this->dom->appendChild($html);
+ $this->stack[] = $html;
+
+ /* Switch the insertion mode to "before head", then reprocess the
+ * current token. */
+ $this->mode = self::BEFORE_HEAD;
+ $this->emitToken($token);
+ }
+ break;
+
+ case self::BEFORE_HEAD:
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
+ /* Ignore the token. */
+ $this->ignored = true;
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
+ /* Append a Comment node to the current node with the data attribute
+ set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ /* A DOCTYPE token */
+ } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ /* Parse error. Ignore the token */
+ $this->ignored = true;
+ // parse error
+
+ /* A start tag token with the tag name "html" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
+ /* Process the token using the rules for the "in body"
+ * insertion mode. */
+ $this->processWithRulesFor($token, self::IN_BODY);
+
+ /* A start tag token with the tag name "head" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
+ /* Insert an HTML element for the token. */
+ $element = $this->insertElement($token);
+
+ /* Set the head element pointer to this new element node. */
+ $this->head_pointer = $element;
+
+ /* Change the insertion mode to "in head". */
+ $this->mode = self::IN_HEAD;
+
+ /* An end tag whose tag name is one of: "head", "body", "html", "br" */
+ } elseif(
+ $token['type'] === HTML5_Tokenizer::ENDTAG && (
+ $token['name'] === 'head' || $token['name'] === 'body' ||
+ $token['name'] === 'html' || $token['name'] === 'br'
+ )) {
+ /* Act as if a start tag token with the tag name "head" and no
+ * attributes had been seen, then reprocess the current token. */
+ $this->emitToken(array(
+ 'name' => 'head',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ 'attr' => array()
+ ));
+ $this->emitToken($token);
+
+ /* Any other end tag */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) {
+ /* Parse error. Ignore the token. */
+ $this->ignored = true;
+
+ } else {
+ /* Act as if a start tag token with the tag name "head" and no
+ * attributes had been seen, then reprocess the current token.
+ * Note: This will result in an empty head element being
+ * generated, with the current token being reprocessed in the
+ * "after head" insertion mode. */
+ $this->emitToken(array(
+ 'name' => 'head',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ 'attr' => array()
+ ));
+ $this->emitToken($token);
+ }
+ break;
+
+ case self::IN_HEAD:
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE. */
+ if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
+ /* Insert the character into the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
+ /* Append a Comment node to the current node with the data attribute
+ set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ /* A DOCTYPE token */
+ } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ /* Parse error. Ignore the token. */
+ $this->ignored = true;
+ // parse error
+
+ /* A start tag whose tag name is "html" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ $token['name'] === 'html') {
+ $this->processWithRulesFor($token, self::IN_BODY);
+
+ /* A start tag whose tag name is one of: "base", "command", "link" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ ($token['name'] === 'base' || $token['name'] === 'command' ||
+ $token['name'] === 'link')) {
+ /* Insert an HTML element for the token. Immediately pop the
+ * current node off the stack of open elements. */
+ $this->insertElement($token);
+ array_pop($this->stack);
+
+ // YYY: Acknowledge the token's self-closing flag, if it is set.
+
+ /* A start tag whose tag name is "meta" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
+ /* Insert an HTML element for the token. Immediately pop the
+ * current node off the stack of open elements. */
+ $this->insertElement($token);
+ array_pop($this->stack);
+
+ // XERROR: Acknowledge the token's self-closing flag, if it is set.
+
+ // XENCODING: If the element has a charset attribute, and its value is a
+ // supported encoding, and the confidence is currently tentative,
+ // then change the encoding to the encoding given by the value of
+ // the charset attribute.
+ //
+ // Otherwise, if the element has a content attribute, and applying
+ // the algorithm for extracting an encoding from a Content-Type to
+ // its value returns a supported encoding encoding, and the
+ // confidence is currently tentative, then change the encoding to
+ // the encoding encoding.
+
+ /* A start tag with the tag name "title" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
+ $this->insertRCDATAElement($token);
+
+ /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
+ * A start tag whose tag name is one of: "noframes", "style" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
+ // XSCRIPT: Scripting flag not respected
+ $this->insertCDATAElement($token);
+
+ // XSCRIPT: Scripting flag disable not implemented
+
+ /* A start tag with the tag name "script" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
+ /* 1. Create an element for the token in the HTML namespace. */
+ $node = $this->insertElement($token, false);
+
+ /* 2. Mark the element as being "parser-inserted" */
+ // Uhhh... XSCRIPT
+
+ /* 3. If the parser was originally created for the HTML
+ * fragment parsing algorithm, then mark the script element as
+ * "already executed". (fragment case) */
+ // ditto... XSCRIPT
+
+ /* 4. Append the new element to the current node and push it onto
+ * the stack of open elements. */
+ end($this->stack)->appendChild($node);
+ $this->stack[] = $node;
+ // I guess we could squash these together
+
+ /* 6. Let the original insertion mode be the current insertion mode. */
+ $this->original_mode = $this->mode;
+ /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
+ $this->mode = self::IN_CDATA_RCDATA;
+ /* 5. Switch the tokeniser's content model flag to the CDATA state. */
+ $this->content_model = HTML5_Tokenizer::CDATA;
+
+ /* An end tag with the tag name "head" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
+ /* Pop the current node (which will be the head element) off the stack of open elements. */
+ array_pop($this->stack);
+
+ /* Change the insertion mode to "after head". */
+ $this->mode = self::AFTER_HEAD;
+
+ // Slight logic inversion here to minimize duplication
+ /* A start tag with the tag name "head". */
+ /* An end tag whose tag name is not one of: "body", "html", "br" */
+ } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
+ ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
+ $token['name'] !== 'body' && $token['name'] !== 'br')) {
+ // Parse error. Ignore the token.
+ $this->ignored = true;
+
+ /* Anything else */
+ } else {
+ /* Act as if an end tag token with the tag name "head" had been
+ * seen, and reprocess the current token. */
+ $this->emitToken(array(
+ 'name' => 'head',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+
+ /* Then, reprocess the current token. */
+ $this->emitToken($token);
+ }
+ break;
+
+ case self::IN_HEAD_NOSCRIPT:
+ if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ // parse error
+ } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
+ $this->processWithRulesFor($token, self::IN_BODY);
+ } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
+ /* Pop the current node (which will be a noscript element) from the
+ * stack of open elements; the new current node will be a head
+ * element. */
+ array_pop($this->stack);
+ $this->mode = self::IN_HEAD;
+ } elseif (
+ ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
+ ($token['type'] === HTML5_Tokenizer::COMMENT) ||
+ ($token['type'] === HTML5_Tokenizer::STARTTAG && (
+ $token['name'] === 'link' || $token['name'] === 'meta' ||
+ $token['name'] === 'noframes' || $token['name'] === 'style'))) {
+ $this->processWithRulesFor($token, self::IN_HEAD);
+ // inverted logic
+ } elseif (
+ ($token['type'] === HTML5_Tokenizer::STARTTAG && (
+ $token['name'] === 'head' || $token['name'] === 'noscript')) ||
+ ($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ $token['name'] !== 'br')) {
+ // parse error
+ } else {
+ // parse error
+ $this->emitToken(array(
+ 'type' => HTML5_Tokenizer::ENDTAG,
+ 'name' => 'noscript',
+ ));
+ $this->emitToken($token);
+ }
+ break;
+
+ case self::AFTER_HEAD:
+ /* Handle the token as follows: */
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
+ /* Append the character to the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
+ /* Append a Comment node to the current node with the data attribute
+ set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ // parse error
+
+ } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
+ $this->processWithRulesFor($token, self::IN_BODY);
+
+ /* A start tag token with the tag name "body" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
+ $this->insertElement($token);
+
+ /* Set the frameset-ok flag to "not ok". */
+ $this->flag_frameset_ok = false;
+
+ /* Change the insertion mode to "in body". */
+ $this->mode = self::IN_BODY;
+
+ /* A start tag token with the tag name "frameset" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
+ /* Insert a frameset element for the token. */
+ $this->insertElement($token);
+
+ /* Change the insertion mode to "in frameset". */
+ $this->mode = self::IN_FRAMESET;
+
+ /* A start tag token whose tag name is one of: "base", "link", "meta",
+ "script", "style", "title" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
+ array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) {
+ // parse error
+ /* Push the node pointed to by the head element pointer onto the
+ * stack of open elements. */
+ $this->stack[] = $this->head_pointer;
+ $this->processWithRulesFor($token, self::IN_HEAD);
+ array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
+
+ // inversion of specification
+ } elseif(
+ ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
+ ($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ $token['name'] !== 'body' && $token['name'] !== 'html' &&
+ $token['name'] !== 'br')) {
+ // parse error
+
+ /* Anything else */
+ } else {
+ $this->emitToken(array(
+ 'name' => 'body',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ 'attr' => array()
+ ));
+ $this->flag_frameset_ok = true;
+ $this->emitToken($token);
+ }
+ break;
+
+ case self::IN_BODY:
+ /* Handle the token as follows: */
+
+ switch($token['type']) {
+ /* A character token */
+ case HTML5_Tokenizer::CHARACTER:
+ case HTML5_Tokenizer::SPACECHARACTER:
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Append the token's character to the current node. */
+ $this->insertText($token['data']);
+
+ /* If the token is not one of U+0009 CHARACTER TABULATION,
+ * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020
+ * SPACE, then set the frameset-ok flag to "not ok". */
+ // i.e., if any of the characters is not whitespace
+ if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
+ $this->flag_frameset_ok = false;
+ }
+ break;
+
+ /* A comment token */
+ case HTML5_Tokenizer::COMMENT:
+ /* Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+ break;
+
+ case HTML5_Tokenizer::DOCTYPE:
+ // parse error
+ break;
+
+ case HTML5_Tokenizer::STARTTAG:
+ switch($token['name']) {
+ case 'html':
+ // parse error
+ /* For each attribute on the token, check to see if the
+ * attribute is already present on the top element of the
+ * stack of open elements. If it is not, add the attribute
+ * and its corresponding value to that element. */
+ foreach($token['attr'] as $attr) {
+ if(!$this->stack[0]->hasAttribute($attr['name'])) {
+ $this->stack[0]->setAttribute($attr['name'], $attr['value']);
+ }
+ }
+ break;
+
+ case 'base': case 'command': case 'link': case 'meta': case 'noframes':
+ case 'script': case 'style': case 'title':
+ /* Process the token as if the insertion mode had been "in
+ head". */
+ $this->processWithRulesFor($token, self::IN_HEAD);
+ break;
+
+ /* A start tag token with the tag name "body" */
+ case 'body':
+ /* Parse error. If the second element on the stack of open
+ elements is not a body element, or, if the stack of open
+ elements has only one node on it, then ignore the token.
+ (fragment case) */
+ if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
+ $this->ignored = true;
+ // Ignore
+
+ /* Otherwise, for each attribute on the token, check to see
+ if the attribute is already present on the body element (the
+ second element) on the stack of open elements. If it is not,
+ add the attribute and its corresponding value to that
+ element. */
+ } else {
+ foreach($token['attr'] as $attr) {
+ if(!$this->stack[1]->hasAttribute($attr['name'])) {
+ $this->stack[1]->setAttribute($attr['name'], $attr['value']);
+ }
+ }
+ }
+ break;
+
+ case 'frameset':
+ // parse error
+ /* If the second element on the stack of open elements is
+ * not a body element, or, if the stack of open elements
+ * has only one node on it, then ignore the token.
+ * (fragment case) */
+ if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
+ $this->ignored = true;
+ // Ignore
+ } elseif (!$this->flag_frameset_ok) {
+ $this->ignored = true;
+ // Ignore
+ } else {
+ /* 1. Remove the second element on the stack of open
+ * elements from its parent node, if it has one. */
+ if($this->stack[1]->parentNode) {
+ $this->stack[1]->parentNode->removeChild($this->stack[1]);
+ }
+
+ /* 2. Pop all the nodes from the bottom of the stack of
+ * open elements, from the current node up to the root
+ * html element. */
+ array_splice($this->stack, 1);
+
+ $this->insertElement($token);
+ $this->mode = self::IN_FRAMESET;
+ }
+ break;
+
+ // in spec, there is a diversion here
+
+ case 'address': case 'article': case 'aside': case 'blockquote':
+ case 'center': case 'datagrid': case 'details': case 'dialog': case 'dir':
+ case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
+ case 'header': case 'hgroup': case 'menu': case 'nav':
+ case 'ol': case 'p': case 'section': case 'ul':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been
+ seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+ break;
+
+ /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
+ "h5", "h6" */
+ case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+
+ /* If the current node is an element whose tag name is one
+ * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
+ * parse error; pop the current node off the stack of open
+ * elements. */
+ $peek = array_pop($this->stack);
+ if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) {
+ // parse error
+ } else {
+ $this->stack[] = $peek;
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+ break;
+
+ case 'pre': case 'listing':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+ $this->insertElement($token);
+ /* If the next token is a U+000A LINE FEED (LF) character
+ * token, then ignore that token and move on to the next
+ * one. (Newlines at the start of pre blocks are ignored as
+ * an authoring convenience.) */
+ $this->ignore_lf_token = 2;
+ $this->flag_frameset_ok = false;
+ break;
+
+ /* A start tag whose tag name is "form" */
+ case 'form':
+ /* If the form element pointer is not null, ignore the
+ token with a parse error. */
+ if($this->form_pointer !== null) {
+ $this->ignored = true;
+ // Ignore.
+
+ /* Otherwise: */
+ } else {
+ /* If the stack of open elements has a p element in
+ scope, then act as if an end tag with the tag name p
+ had been seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token, and set the
+ form element pointer to point to the element created. */
+ $element = $this->insertElement($token);
+ $this->form_pointer = $element;
+ }
+ break;
+
+ // condensed specification
+ case 'li': case 'dd': case 'dt':
+ /* 1. Set the frameset-ok flag to "not ok". */
+ $this->flag_frameset_ok = false;
+
+ $stack_length = count($this->stack) - 1;
+ for($n = $stack_length; 0 <= $n; $n--) {
+ /* 2. Initialise node to be the current node (the
+ bottommost node of the stack). */
+ $stop = false;
+ $node = $this->stack[$n];
+ $cat = $this->getElementCategory($node);
+
+ // for case 'li':
+ /* 3. If node is an li element, then act as if an end
+ * tag with the tag name "li" had been seen, then jump
+ * to the last step. */
+ // for case 'dd': case 'dt':
+ /* If node is a dd or dt element, then act as if an end
+ * tag with the same tag name as node had been seen, then
+ * jump to the last step. */
+ if(($token['name'] === 'li' && $node->tagName === 'li') ||
+ ($token['name'] !== 'li' && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { // limited conditional
+ $this->emitToken(array(
+ 'type' => HTML5_Tokenizer::ENDTAG,
+ 'name' => $node->tagName,
+ ));
+ break;
+ }
+
+ /* 4. If node is not in the formatting category, and is
+ not in the phrasing category, and is not an address,
+ div or p element, then stop this algorithm. */
+ if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
+ $node->tagName !== 'address' && $node->tagName !== 'div' &&
+ $node->tagName !== 'p') {
+ break;
+ }
+
+ /* 5. Otherwise, set node to the previous entry in the
+ * stack of open elements and return to step 2. */
+ }
+
+ /* 6. This is the last step. */
+
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been
+ seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+
+ /* Finally, insert an HTML element with the same tag
+ name as the token's. */
+ $this->insertElement($token);
+ break;
+
+ /* A start tag token whose tag name is "plaintext" */
+ case 'plaintext':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been
+ seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ $this->content_model = HTML5_Tokenizer::PLAINTEXT;
+ break;
+
+ // more diversions
+
+ /* A start tag whose tag name is "a" */
+ case 'a':
+ /* If the list of active formatting elements contains
+ an element whose tag name is "a" between the end of the
+ list and the last marker on the list (or the start of
+ the list if there is no marker on the list), then this
+ is a parse error; act as if an end tag with the tag name
+ "a" had been seen, then remove that element from the list
+ of active formatting elements and the stack of open
+ elements if the end tag didn't already remove it (it
+ might not have if the element is not in table scope). */
+ $leng = count($this->a_formatting);
+
+ for($n = $leng - 1; $n >= 0; $n--) {
+ if($this->a_formatting[$n] === self::MARKER) {
+ break;
+
+ } elseif($this->a_formatting[$n]->tagName === 'a') {
+ $a = $this->a_formatting[$n];
+ $this->emitToken(array(
+ 'name' => 'a',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ if (in_array($a, $this->a_formatting)) {
+ $a_i = array_search($a, $this->a_formatting, true);
+ if($a_i !== false) array_splice($this->a_formatting, $a_i, 1);
+ }
+ if (in_array($a, $this->stack)) {
+ $a_i = array_search($a, $this->stack, true);
+ if ($a_i !== false) array_splice($this->stack, $a_i, 1);
+ }
+ break;
+ }
+ }
+
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $el = $this->insertElement($token);
+
+ /* Add that element to the list of active formatting
+ elements. */
+ $this->a_formatting[] = $el;
+ break;
+
+ case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
+ case 's': case 'small': case 'strike':
+ case 'strong': case 'tt': case 'u':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $el = $this->insertElement($token);
+
+ /* Add that element to the list of active formatting
+ elements. */
+ $this->a_formatting[] = $el;
+ break;
+
+ case 'nobr':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* If the stack of open elements has a nobr element in
+ * scope, then this is a parse error; act as if an end tag
+ * with the tag name "nobr" had been seen, then once again
+ * reconstruct the active formatting elements, if any. */
+ if ($this->elementInScope('nobr')) {
+ $this->emitToken(array(
+ 'name' => 'nobr',
+ 'type' => HTML5_Tokenizer::ENDTAG,
+ ));
+ $this->reconstructActiveFormattingElements();
+ }
+
+ /* Insert an HTML element for the token. */
+ $el = $this->insertElement($token);
+
+ /* Add that element to the list of active formatting
+ elements. */
+ $this->a_formatting[] = $el;
+ break;
+
+ // another diversion
+
+ /* A start tag token whose tag name is "button" */
+ case 'button':
+ /* If the stack of open elements has a button element in scope,
+ then this is a parse error; act as if an end tag with the tag
+ name "button" had been seen, then reprocess the token. (We don't
+ do that. Unnecessary.) (I hope you're right! -- ezyang) */
+ if($this->elementInScope('button')) {
+ $this->emitToken(array(
+ 'name' => 'button',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Insert a marker at the end of the list of active
+ formatting elements. */
+ $this->a_formatting[] = self::MARKER;
+
+ $this->flag_frameset_ok = false;
+ break;
+
+ case 'applet': case 'marquee': case 'object':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Insert a marker at the end of the list of active
+ formatting elements. */
+ $this->a_formatting[] = self::MARKER;
+
+ $this->flag_frameset_ok = false;
+ break;
+
+ // spec diversion
+
+ /* A start tag whose tag name is "table" */
+ case 'table':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been seen. */
+ if($this->quirks_mode !== self::QUIRKS_MODE &&
+ $this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ $this->flag_frameset_ok = false;
+
+ /* Change the insertion mode to "in table". */
+ $this->mode = self::IN_TABLE;
+ break;
+
+ /* A start tag whose tag name is one of: "area", "basefont",
+ "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
+ case 'area': case 'basefont': case 'bgsound': case 'br':
+ case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
+ case 'wbr':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Immediately pop the current node off the stack of open elements. */
+ array_pop($this->stack);
+
+ // YYY: Acknowledge the token's self-closing flag, if it is set.
+
+ $this->flag_frameset_ok = false;
+ break;
+
+ case 'param': case 'source':
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Immediately pop the current node off the stack of open elements. */
+ array_pop($this->stack);
+
+ // YYY: Acknowledge the token's self-closing flag, if it is set.
+ break;
+
+ /* A start tag whose tag name is "hr" */
+ case 'hr':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been seen. */
+ if($this->elementInScope('p')) {
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Immediately pop the current node off the stack of open elements. */
+ array_pop($this->stack);
+
+ // YYY: Acknowledge the token's self-closing flag, if it is set.
+
+ $this->flag_frameset_ok = false;
+ break;
+
+ /* A start tag whose tag name is "image" */
+ case 'image':
+ /* Parse error. Change the token's tag name to "img" and
+ reprocess it. (Don't ask.) */
+ $token['name'] = 'img';
+ $this->emitToken($token);
+ break;
+
+ /* A start tag whose tag name is "isindex" */
+ case 'isindex':
+ /* Parse error. */
+
+ /* If the form element pointer is not null,
+ then ignore the token. */
+ if($this->form_pointer === null) {
+ /* Act as if a start tag token with the tag name "form" had
+ been seen. */
+ /* If the token has an attribute called "action", set
+ * the action attribute on the resulting form
+ * element to the value of the "action" attribute of
+ * the token. */
+ $attr = array();
+ $action = $this->getAttr($token, 'action');
+ if ($action !== false) {
+ $attr[] = array('name' => 'action', 'value' => $action);
+ }
+ $this->emitToken(array(
+ 'name' => 'form',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ 'attr' => $attr
+ ));
+
+ /* Act as if a start tag token with the tag name "hr" had
+ been seen. */
+ $this->emitToken(array(
+ 'name' => 'hr',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ 'attr' => array()
+ ));
+
+ /* Act as if a start tag token with the tag name "p" had
+ been seen. */
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ 'attr' => array()
+ ));
+
+ /* Act as if a start tag token with the tag name "label"
+ had been seen. */
+ $this->emitToken(array(
+ 'name' => 'label',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ 'attr' => array()
+ ));
+
+ /* Act as if a stream of character tokens had been seen. */
+ $prompt = $this->getAttr($token, 'prompt');
+ if ($prompt === false) {
+ $prompt = 'This is a searchable index. '.
+ 'Insert your search keywords here: ';
+ }
+ $this->emitToken(array(
+ 'data' => $prompt,
+ 'type' => HTML5_Tokenizer::CHARACTER,
+ ));
+
+ /* Act as if a start tag token with the tag name "input"
+ had been seen, with all the attributes from the "isindex"
+ token, except with the "name" attribute set to the value
+ "isindex" (ignoring any explicit "name" attribute). */
+ $attr = array();
+ foreach ($token['attr'] as $keypair) {
+ if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
+ $keypair['name'] === 'prompt') continue;
+ $attr[] = $keypair;
+ }
+ $attr[] = array('name' => 'name', 'value' => 'isindex');
+
+ $this->emitToken(array(
+ 'name' => 'input',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ 'attr' => $attr
+ ));
+
+ /* Act as if an end tag token with the tag name "label"
+ had been seen. */
+ $this->emitToken(array(
+ 'name' => 'label',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+
+ /* Act as if an end tag token with the tag name "p" had
+ been seen. */
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+
+ /* Act as if a start tag token with the tag name "hr" had
+ been seen. */
+ $this->emitToken(array(
+ 'name' => 'hr',
+ 'type' => HTML5_Tokenizer::STARTTAG
+ ));
+
+ /* Act as if an end tag token with the tag name "form" had
+ been seen. */
+ $this->emitToken(array(
+ 'name' => 'form',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ } else {
+ $this->ignored = true;
+ }
+ break;
+
+ /* A start tag whose tag name is "textarea" */
+ case 'textarea':
+ $this->insertElement($token);
+
+ /* If the next token is a U+000A LINE FEED (LF)
+ * character token, then ignore that token and move on to
+ * the next one. (Newlines at the start of textarea
+ * elements are ignored as an authoring convenience.)
+ * need flag, see also <pre> */
+ $this->ignore_lf_token = 2;
+
+ $this->original_mode = $this->mode;
+ $this->flag_frameset_ok = false;
+ $this->mode = self::IN_CDATA_RCDATA;
+
+ /* Switch the tokeniser's content model flag to the
+ RCDATA state. */
+ $this->content_model = HTML5_Tokenizer::RCDATA;
+ break;
+
+ /* A start tag token whose tag name is "xmp" */
+ case 'xmp':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ $this->flag_frameset_ok = false;
+
+ $this->insertCDATAElement($token);
+ break;
+
+ case 'iframe':
+ $this->flag_frameset_ok = false;
+ $this->insertCDATAElement($token);
+ break;
+
+ case 'noembed': case 'noscript':
+ // XSCRIPT: should check scripting flag
+ $this->insertCDATAElement($token);
+ break;
+
+ /* A start tag whose tag name is "select" */
+ case 'select':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ $this->flag_frameset_ok = false;
+
+ /* If the insertion mode is one of in table", "in caption",
+ * "in column group", "in table body", "in row", or "in
+ * cell", then switch the insertion mode to "in select in
+ * table". Otherwise, switch the insertion mode to "in
+ * select". */
+ if (
+ $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
+ $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
+ $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
+ ) {
+ $this->mode = self::IN_SELECT_IN_TABLE;
+ } else {
+ $this->mode = self::IN_SELECT;
+ }
+ break;
+
+ case 'option': case 'optgroup':
+ if ($this->elementInScope('option')) {
+ $this->emitToken(array(
+ 'name' => 'option',
+ 'type' => HTML5_Tokenizer::ENDTAG,
+ ));
+ }
+ $this->reconstructActiveFormattingElements();
+ $this->insertElement($token);
+ break;
+
+ case 'rp': case 'rt':
+ /* If the stack of open elements has a ruby element in scope, then generate
+ * implied end tags. If the current node is not then a ruby element, this is
+ * a parse error; pop all the nodes from the current node up to the node
+ * immediately before the bottommost ruby element on the stack of open elements.
+ */
+ if ($this->elementInScope('ruby')) {
+ $this->generateImpliedEndTags();
+ }
+ $peek = false;
+ do {
+ if ($peek) {
+ // parse error
+ }
+ $peek = array_pop($this->stack);
+ } while ($peek->tagName !== 'ruby');
+ $this->stack[] = $peek; // we popped one too many
+ $this->insertElement($token);
+ break;
+
+ // spec diversion
+
+ case 'math':
+ $this->reconstructActiveFormattingElements();
+ $token = $this->adjustMathMLAttributes($token);
+ $token = $this->adjustForeignAttributes($token);
+ $this->insertForeignElement($token, self::NS_MATHML);
+ if (isset($token['self-closing'])) {
+ // XERROR: acknowledge the token's self-closing flag
+ array_pop($this->stack);
+ }
+ if ($this->mode !== self::IN_FOREIGN_CONTENT) {
+ $this->secondary_mode = $this->mode;
+ $this->mode = self::IN_FOREIGN_CONTENT;
+ }
+ break;
+
+ case 'svg':
+ $this->reconstructActiveFormattingElements();
+ $token = $this->adjustSVGAttributes($token);
+ $token = $this->adjustForeignAttributes($token);
+ $this->insertForeignElement($token, self::NS_SVG);
+ if (isset($token['self-closing'])) {
+ // XERROR: acknowledge the token's self-closing flag
+ array_pop($this->stack);
+ }
+ if ($this->mode !== self::IN_FOREIGN_CONTENT) {
+ $this->secondary_mode = $this->mode;
+ $this->mode = self::IN_FOREIGN_CONTENT;
+ }
+ break;
+
+ case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
+ case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
+ // parse error
+ break;
+
+ /* A start tag token not covered by the previous entries */
+ default:
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ $this->insertElement($token);
+ /* This element will be a phrasing element. */
+ break;
+ }
+ break;
+
+ case HTML5_Tokenizer::ENDTAG:
+ switch($token['name']) {
+ /* An end tag with the tag name "body" */
+ case 'body':
+ /* If the second element in the stack of open elements is
+ not a body element, this is a parse error. Ignore the token.
+ (innerHTML case) */
+ if(count($this->stack) < 2 || $this->stack[1]->tagName !== 'body') {
+ $this->ignored = true;
+
+ /* Otherwise, if there is a node in the stack of open
+ * elements that is not either a dd element, a dt
+ * element, an li element, an optgroup element, an
+ * option element, a p element, an rp element, an rt
+ * element, a tbody element, a td element, a tfoot
+ * element, a th element, a thead element, a tr element,
+ * the body element, or the html element, then this is a
+ * parse error. */
+ } else {
+ // XERROR: implement this check for parse error
+ }
+
+ /* Change the insertion mode to "after body". */
+ $this->mode = self::AFTER_BODY;
+ break;
+
+ /* An end tag with the tag name "html" */
+ case 'html':
+ /* Act as if an end tag with tag name "body" had been seen,
+ then, if that token wasn't ignored, reprocess the current
+ token. */
+ $this->emitToken(array(
+ 'name' => 'body',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+
+ if (!$this->ignored) $this->emitToken($token);
+ break;
+
+ case 'address': case 'article': case 'aside': case 'blockquote':
+ case 'center': case 'datagrid': case 'details': case 'dir':
+ case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
+ case 'header': case 'hgroup': case 'listing': case 'menu':
+ case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
+ /* If the stack of open elements has an element in scope
+ with the same tag name as that of the token, then generate
+ implied end tags. */
+ if($this->elementInScope($token['name'])) {
+ $this->generateImpliedEndTags();
+
+ /* Now, if the current node is not an element with
+ the same tag name as that of the token, then this
+ is a parse error. */
+ // XERROR: implement parse error logic
+
+ /* If the stack of open elements has an element in
+ scope with the same tag name as that of the token,
+ then pop elements from this stack until an element
+ with that tag name has been popped from the stack. */
+ do {
+ $node = array_pop($this->stack);
+ } while ($node->tagName !== $token['name']);
+ } else {
+ // parse error
+ }
+ break;
+
+ /* An end tag whose tag name is "form" */
+ case 'form':
+ /* Let node be the element that the form element pointer is set to. */
+ $node = $this->form_pointer;
+ /* Set the form element pointer to null. */
+ $this->form_pointer = null;
+ /* If node is null or the stack of open elements does not
+ * have node in scope, then this is a parse error; ignore the token. */
+ if ($node === null || !in_array($node, $this->stack)) {
+ // parse error
+ $this->ignored = true;
+ } else {
+ /* 1. Generate implied end tags. */
+ $this->generateImpliedEndTags();
+ /* 2. If the current node is not node, then this is a parse error. */
+ if (end($this->stack) !== $node) {
+ // parse error
+ }
+ /* 3. Remove node from the stack of open elements. */
+ array_splice($this->stack, array_search($node, $this->stack, true), 1);
+ }
+
+ break;
+
+ /* An end tag whose tag name is "p" */
+ case 'p':
+ /* If the stack of open elements has a p element in scope,
+ then generate implied end tags, except for p elements. */
+ if($this->elementInScope('p')) {
+ /* Generate implied end tags, except for elements with
+ * the same tag name as the token. */
+ $this->generateImpliedEndTags(array('p'));
+
+ /* If the current node is not a p element, then this is
+ a parse error. */
+ // XERROR: implement
+
+ /* Pop elements from the stack of open elements until
+ * an element with the same tag name as the token has
+ * been popped from the stack. */
+ do {
+ $node = array_pop($this->stack);
+ } while ($node->tagName !== 'p');
+
+ } else {
+ // parse error
+ $this->emitToken(array(
+ 'name' => 'p',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ ));
+ $this->emitToken($token);
+ }
+ break;
+
+ /* An end tag whose tag name is "dd", "dt", or "li" */
+ case 'dd': case 'dt': case 'li':
+ if($this->elementInScope($token['name'])) {
+ $this->generateImpliedEndTags(array($token['name']));
+
+ /* If the current node is not an element with the same
+ tag name as the token, then this is a parse error. */
+ // XERROR: implement parse error
+
+ /* Pop elements from the stack of open elements until
+ * an element with the same tag name as the token has
+ * been popped from the stack. */
+ do {
+ $node = array_pop($this->stack);
+ } while ($node->tagName !== $token['name']);
+
+ } else {
+ // parse error
+ }
+ break;
+
+ /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
+ "h5", "h6" */
+ case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
+ $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
+
+ /* If the stack of open elements has in scope an element whose
+ tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
+ generate implied end tags. */
+ if($this->elementInScope($elements)) {
+ $this->generateImpliedEndTags();
+
+ /* Now, if the current node is not an element with the same
+ tag name as that of the token, then this is a parse error. */
+ // XERROR: implement parse error
+
+ /* If the stack of open elements has in scope an element
+ whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
+ "h6", then pop elements from the stack until an element
+ with one of those tag names has been popped from the stack. */
+ do {
+ $node = array_pop($this->stack);
+ } while (!in_array($node->tagName, $elements));
+ } else {
+ // parse error
+ }
+ break;
+
+ /* An end tag whose tag name is one of: "a", "b", "big", "em",
+ "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
+ case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
+ case 'i': case 'nobr': case 's': case 'small': case 'strike':
+ case 'strong': case 'tt': case 'u':
+ // XERROR: generally speaking this needs parse error logic
+ /* 1. Let the formatting element be the last element in
+ the list of active formatting elements that:
+ * is between the end of the list and the last scope
+ marker in the list, if any, or the start of the list
+ otherwise, and
+ * has the same tag name as the token.
+ */
+ while(true) {
+ for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
+ if($this->a_formatting[$a] === self::MARKER) {
+ break;
+
+ } elseif($this->a_formatting[$a]->tagName === $token['name']) {
+ $formatting_element = $this->a_formatting[$a];
+ $in_stack = in_array($formatting_element, $this->stack, true);
+ $fe_af_pos = $a;
+ break;
+ }
+ }
+
+ /* If there is no such node, or, if that node is
+ also in the stack of open elements but the element
+ is not in scope, then this is a parse error. Abort
+ these steps. The token is ignored. */
+ if(!isset($formatting_element) || ($in_stack &&
+ !$this->elementInScope($token['name']))) {
+ $this->ignored = true;
+ break;
+
+ /* Otherwise, if there is such a node, but that node
+ is not in the stack of open elements, then this is a
+ parse error; remove the element from the list, and
+ abort these steps. */
+ } elseif(isset($formatting_element) && !$in_stack) {
+ unset($this->a_formatting[$fe_af_pos]);
+ $this->a_formatting = array_merge($this->a_formatting);
+ break;
+ }
+
+ /* Otherwise, there is a formatting element and that
+ * element is in the stack and is in scope. If the
+ * element is not the current node, this is a parse
+ * error. In any case, proceed with the algorithm as
+ * written in the following steps. */
+ // XERROR: implement me
+
+ /* 2. Let the furthest block be the topmost node in the
+ stack of open elements that is lower in the stack
+ than the formatting element, and is not an element in
+ the phrasing or formatting categories. There might
+ not be one. */
+ $fe_s_pos = array_search($formatting_element, $this->stack, true);
+ $length = count($this->stack);
+
+ for($s = $fe_s_pos + 1; $s < $length; $s++) {
+ $category = $this->getElementCategory($this->stack[$s]);
+
+ if($category !== self::PHRASING && $category !== self::FORMATTING) {
+ $furthest_block = $this->stack[$s];
+ break;
+ }
+ }
+
+ /* 3. If there is no furthest block, then the UA must
+ skip the subsequent steps and instead just pop all
+ the nodes from the bottom of the stack of open
+ elements, from the current node up to the formatting
+ element, and remove the formatting element from the
+ list of active formatting elements. */
+ if(!isset($furthest_block)) {
+ for($n = $length - 1; $n >= $fe_s_pos; $n--) {
+ array_pop($this->stack);
+ }
+
+ unset($this->a_formatting[$fe_af_pos]);
+ $this->a_formatting = array_merge($this->a_formatting);
+ break;
+ }
+
+ /* 4. Let the common ancestor be the element
+ immediately above the formatting element in the stack
+ of open elements. */
+ $common_ancestor = $this->stack[$fe_s_pos - 1];
+
+ /* 5. Let a bookmark note the position of the
+ formatting element in the list of active formatting
+ elements relative to the elements on either side
+ of it in the list. */
+ $bookmark = $fe_af_pos;
+
+ /* 6. Let node and last node be the furthest block.
+ Follow these steps: */
+ $node = $furthest_block;
+ $last_node = $furthest_block;
+
+ while(true) {
+ for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
+ /* 6.1 Let node be the element immediately
+ prior to node in the stack of open elements. */
+ $node = $this->stack[$n];
+
+ /* 6.2 If node is not in the list of active
+ formatting elements, then remove node from
+ the stack of open elements and then go back
+ to step 1. */
+ if(!in_array($node, $this->a_formatting, true)) {
+ array_splice($this->stack, $n, 1);
+
+ } else {
+ break;
+ }
+ }
+
+ /* 6.3 Otherwise, if node is the formatting
+ element, then go to the next step in the overall
+ algorithm. */
+ if($node === $formatting_element) {
+ break;
+
+ /* 6.4 Otherwise, if last node is the furthest
+ block, then move the aforementioned bookmark to
+ be immediately after the node in the list of
+ active formatting elements. */
+ } elseif($last_node === $furthest_block) {
+ $bookmark = array_search($node, $this->a_formatting, true) + 1;
+ }
+
+ /* 6.5 Create an element for the token for which
+ * the element node was created, replace the entry
+ * for node in the list of active formatting
+ * elements with an entry for the new element,
+ * replace the entry for node in the stack of open
+ * elements with an entry for the new element, and
+ * let node be the new element. */
+ // we don't know what the token is anymore
+ $clone = $node->cloneNode();
+ $a_pos = array_search($node, $this->a_formatting, true);
+ $s_pos = array_search($node, $this->stack, true);
+ $this->a_formatting[$a_pos] = $clone;
+ $this->stack[$s_pos] = $clone;
+ $node = $clone;
+
+ /* 6.6 Insert last node into node, first removing
+ it from its previous parent node if any. */
+ if($last_node->parentNode !== null) {
+ $last_node->parentNode->removeChild($last_node);
+ }
+
+ $node->appendChild($last_node);
+
+ /* 6.7 Let last node be node. */
+ $last_node = $node;
+
+ /* 6.8 Return to step 1 of this inner set of steps. */
+ }
+
+ /* 7. If the common ancestor node is a table, tbody,
+ * tfoot, thead, or tr element, then, foster parent
+ * whatever last node ended up being in the previous
+ * step, first removing it from its previous parent
+ * node if any. */
+ if ($last_node->parentNode) { // common step
+ $last_node->parentNode->removeChild($last_node);
+ }
+ if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
+ $this->fosterParent($last_node);
+ /* Otherwise, append whatever last node ended up being
+ * in the previous step to the common ancestor node,
+ * first removing it from its previous parent node if
+ * any. */
+ } else {
+ $common_ancestor->appendChild($last_node);
+ }
+
+ /* 8. Create an element for the token for which the
+ * formatting element was created. */
+ $clone = $formatting_element->cloneNode();
+
+ /* 9. Take all of the child nodes of the furthest
+ block and append them to the element created in the
+ last step. */
+ while($furthest_block->hasChildNodes()) {
+ $child = $furthest_block->firstChild;
+ $furthest_block->removeChild($child);
+ $clone->appendChild($child);
+ }
+
+ /* 10. Append that clone to the furthest block. */
+ $furthest_block->appendChild($clone);
+
+ /* 11. Remove the formatting element from the list
+ of active formatting elements, and insert the new element
+ into the list of active formatting elements at the
+ position of the aforementioned bookmark. */
+ $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
+ array_splice($this->a_formatting, $fe_af_pos, 1);
+
+ $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
+ $af_part2 = array_slice($this->a_formatting, $bookmark);
+ $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
+
+ /* 12. Remove the formatting element from the stack
+ of open elements, and insert the new element into the stack
+ of open elements immediately below the position of the
+ furthest block in that stack. */
+ $fe_s_pos = array_search($formatting_element, $this->stack, true);
+ array_splice($this->stack, $fe_s_pos, 1);
+
+ $fb_s_pos = array_search($furthest_block, $this->stack, true);
+ $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
+ $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
+ $this->stack = array_merge($s_part1, array($clone), $s_part2);
+
+ /* 13. Jump back to step 1 in this series of steps. */
+ unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
+ }
+ break;
+
+ case 'applet': case 'button': case 'marquee': case 'object':
+ /* If the stack of open elements has an element in scope whose
+ tag name matches the tag name of the token, then generate implied
+ tags. */
+ if($this->elementInScope($token['name'])) {
+ $this->generateImpliedEndTags();
+
+ /* Now, if the current node is not an element with the same
+ tag name as the token, then this is a parse error. */
+ // XERROR: implement logic
+
+ /* Pop elements from the stack of open elements until
+ * an element with the same tag name as the token has
+ * been popped from the stack. */
+ do {
+ $node = array_pop($this->stack);
+ } while ($node->tagName !== $token['name']);
+
+ /* Clear the list of active formatting elements up to the
+ * last marker. */
+ $keys = array_keys($this->a_formatting, self::MARKER, true);
+ $marker = end($keys);
+
+ for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
+ array_pop($this->a_formatting);
+ }
+ } else {
+ // parse error
+ }
+ break;
+
+ case 'br':
+ // Parse error
+ $this->emitToken(array(
+ 'name' => 'br',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ ));
+ break;
+
+ /* An end tag token not covered by the previous entries */
+ default:
+ for($n = count($this->stack) - 1; $n >= 0; $n--) {
+ /* Initialise node to be the current node (the bottommost
+ node of the stack). */
+ $node = $this->stack[$n];
+
+ /* If node has the same tag name as the end tag token,
+ then: */
+ if($token['name'] === $node->tagName) {
+ /* Generate implied end tags. */
+ $this->generateImpliedEndTags();
+
+ /* If the tag name of the end tag token does not
+ match the tag name of the current node, this is a
+ parse error. */
+ // XERROR: implement this
+
+ /* Pop all the nodes from the current node up to
+ node, including node, then stop these steps. */
+ // XSKETCHY
+ do {
+ $pop = array_pop($this->stack);
+ } while ($pop !== $node);
+ break;
+
+ } else {
+ $category = $this->getElementCategory($node);
+
+ if($category !== self::FORMATTING && $category !== self::PHRASING) {
+ /* Otherwise, if node is in neither the formatting
+ category nor the phrasing category, then this is a
+ parse error. Stop this algorithm. The end tag token
+ is ignored. */
+ $this->ignored = true;
+ break;
+ // parse error
+ }
+ }
+ /* Set node to the previous entry in the stack of open elements. Loop. */
+ }
+ break;
+ }
+ break;
+ }
+ break;
+
+ case self::IN_CDATA_RCDATA:
+ if (
+ $token['type'] === HTML5_Tokenizer::CHARACTER ||
+ $token['type'] === HTML5_Tokenizer::SPACECHARACTER
+ ) {
+ $this->insertText($token['data']);
+ } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
+ // parse error
+ /* If the current node is a script element, mark the script
+ * element as "already executed". */
+ // probably not necessary
+ array_pop($this->stack);
+ $this->mode = $this->original_mode;
+ $this->emitToken($token);
+ } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
+ array_pop($this->stack);
+ $this->mode = $this->original_mode;
+ // we're ignoring all of the execution stuff
+ } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
+ array_pop($this->stack);
+ $this->mode = $this->original_mode;
+ }
+ break;
+
+ case self::IN_TABLE:
+ $clear = array('html', 'table');
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ if($token['type'] === HTML5_Tokenizer::SPACECHARACTER &&
+ /* If the current table is tainted, then act as described in
+ * the "anything else" entry below. */
+ // Note: hsivonen has a test that fails due to this line
+ // because he wants to convince Hixie not to do taint
+ !$this->currentTableIsTainted()) {
+ /* Append the character to the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
+ /* Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ // parse error
+
+ /* A start tag whose tag name is "caption" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ $token['name'] === 'caption') {
+ /* Clear the stack back to a table context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Insert a marker at the end of the list of active
+ formatting elements. */
+ $this->a_formatting[] = self::MARKER;
+
+ /* Insert an HTML element for the token, then switch the
+ insertion mode to "in caption". */
+ $this->insertElement($token);
+ $this->mode = self::IN_CAPTION;
+
+ /* A start tag whose tag name is "colgroup" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ $token['name'] === 'colgroup') {
+ /* Clear the stack back to a table context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Insert an HTML element for the token, then switch the
+ insertion mode to "in column group". */
+ $this->insertElement($token);
+ $this->mode = self::IN_COLUMN_GROUP;
+
+ /* A start tag whose tag name is "col" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ $token['name'] === 'col') {
+ $this->emitToken(array(
+ 'name' => 'colgroup',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ 'attr' => array()
+ ));
+
+ $this->emitToken($token);
+
+ /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
+ array('tbody', 'tfoot', 'thead'))) {
+ /* Clear the stack back to a table context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Insert an HTML element for the token, then switch the insertion
+ mode to "in table body". */
+ $this->insertElement($token);
+ $this->mode = self::IN_TABLE_BODY;
+
+ /* A start tag whose tag name is one of: "td", "th", "tr" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ in_array($token['name'], array('td', 'th', 'tr'))) {
+ /* Act as if a start tag token with the tag name "tbody" had been
+ seen, then reprocess the current token. */
+ $this->emitToken(array(
+ 'name' => 'tbody',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ 'attr' => array()
+ ));
+
+ $this->emitToken($token);
+
+ /* A start tag whose tag name is "table" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ $token['name'] === 'table') {
+ /* Parse error. Act as if an end tag token with the tag name "table"
+ had been seen, then, if that token wasn't ignored, reprocess the
+ current token. */
+ $this->emitToken(array(
+ 'name' => 'table',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+
+ if (!$this->ignored) $this->emitToken($token);
+
+ /* An end tag whose tag name is "table" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ $token['name'] === 'table') {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse error.
+ Ignore the token. (fragment case) */
+ if(!$this->elementInScope($token['name'], true)) {
+ $this->ignored = true;
+
+ /* Otherwise: */
+ } else {
+ do {
+ $node = array_pop($this->stack);
+ } while ($node->tagName !== 'table');
+
+ /* Reset the insertion mode appropriately. */
+ $this->resetInsertionMode();
+ }
+
+ /* An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
+ array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
+ 'tfoot', 'th', 'thead', 'tr'))) {
+ // Parse error. Ignore the token.
+
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ ($token['name'] === 'style' || $token['name'] === 'script')) {
+ $this->processWithRulesFor($token, self::IN_HEAD);
+
+ } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
+ // assignment is intentional
+ /* If the token does not have an attribute with the name "type", or
+ * if it does, but that attribute's value is not an ASCII
+ * case-insensitive match for the string "hidden", then: act as
+ * described in the "anything else" entry below. */
+ ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
+ // I.e., if its an input with the type attribute == 'hidden'
+ /* Otherwise */
+ // parse error
+ $this->insertElement($token);
+ array_pop($this->stack);
+ } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
+ /* If the current node is not the root html element, then this is a parse error. */
+ if (end($this->stack)->tagName !== 'html') {
+ // Note: It can only be the current node in the fragment case.
+ // parse error
+ }
+ /* Stop parsing. */
+ /* Anything else */
+ } else {
+ /* Parse error. Process the token as if the insertion mode was "in
+ body", with the following exception: */
+
+ $old = $this->foster_parent;
+ $this->foster_parent = true;
+ $this->processWithRulesFor($token, self::IN_BODY);
+ $this->foster_parent = $old;
+ }
+ break;
+
+ case self::IN_CAPTION:
+ /* An end tag whose tag name is "caption" */
+ if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse error.
+ Ignore the token. (fragment case) */
+ if(!$this->elementInScope($token['name'], true)) {
+ $this->ignored = true;
+ // Ignore
+
+ /* Otherwise: */
+ } else {
+ /* Generate implied end tags. */
+ $this->generateImpliedEndTags();
+
+ /* Now, if the current node is not a caption element, then this
+ is a parse error. */
+ // XERROR: implement
+
+ /* Pop elements from this stack until a caption element has
+ been popped from the stack. */
+ do {
+ $node = array_pop($this->stack);
+ } while ($node->tagName !== 'caption');
+
+ /* Clear the list of active formatting elements up to the last
+ marker. */
+ $this->clearTheActiveFormattingElementsUpToTheLastMarker();
+
+ /* Switch the insertion mode to "in table". */
+ $this->mode = self::IN_TABLE;
+ }
+
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
+ name is "table" */
+ } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
+ array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
+ 'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ $token['name'] === 'table')) {
+ /* Parse error. Act as if an end tag with the tag name "caption"
+ had been seen, then, if that token wasn't ignored, reprocess the
+ current token. */
+ $this->emitToken(array(
+ 'name' => 'caption',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+
+ if (!$this->ignored) $this->emitToken($token);
+
+ /* An end tag whose tag name is one of: "body", "col", "colgroup",
+ "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
+ array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
+ 'thead', 'tr'))) {
+ // Parse error. Ignore the token.
+ $this->ignored = true;
+
+ /* Anything else */
+ } else {
+ /* Process the token as if the insertion mode was "in body". */
+ $this->processWithRulesFor($token, self::IN_BODY);
+ }
+ break;
+
+ case self::IN_COLUMN_GROUP:
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
+ /* Append the character to the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
+ /* Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token. */
+ $this->insertToken($token['data']);
+
+ } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ // parse error
+
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
+ $this->processWithRulesFor($token, self::IN_BODY);
+
+ /* A start tag whose tag name is "col" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
+ /* Insert a col element for the token. Immediately pop the current
+ node off the stack of open elements. */
+ $this->insertElement($token);
+ array_pop($this->stack);
+ // XERROR: Acknowledge the token's self-closing flag, if it is set.
+
+ /* An end tag whose tag name is "colgroup" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ $token['name'] === 'colgroup') {
+ /* If the current node is the root html element, then this is a
+ parse error, ignore the token. (fragment case) */
+ if(end($this->stack)->tagName === 'html') {
+ $this->ignored = true;
+
+ /* Otherwise, pop the current node (which will be a colgroup
+ element) from the stack of open elements. Switch the insertion
+ mode to "in table". */
+ } else {
+ array_pop($this->stack);
+ $this->mode = self::IN_TABLE;
+ }
+
+ /* An end tag whose tag name is "col" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
+ /* Parse error. Ignore the token. */
+ $this->ignored = true;
+
+ /* An end-of-file token */
+ /* If the current node is the root html element */
+ } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
+ /* Stop parsing */
+
+ /* Anything else */
+ } else {
+ /* Act as if an end tag with the tag name "colgroup" had been seen,
+ and then, if that token wasn't ignored, reprocess the current token. */
+ $this->emitToken(array(
+ 'name' => 'colgroup',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+
+ if (!$this->ignored) $this->emitToken($token);
+ }
+ break;
+
+ case self::IN_TABLE_BODY:
+ $clear = array('tbody', 'tfoot', 'thead', 'html');
+
+ /* A start tag whose tag name is "tr" */
+ if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
+ /* Clear the stack back to a table body context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Insert a tr element for the token, then switch the insertion
+ mode to "in row". */
+ $this->insertElement($token);
+ $this->mode = self::IN_ROW;
+
+ /* A start tag whose tag name is one of: "th", "td" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ ($token['name'] === 'th' || $token['name'] === 'td')) {
+ /* Parse error. Act as if a start tag with the tag name "tr" had
+ been seen, then reprocess the current token. */
+ $this->emitToken(array(
+ 'name' => 'tr',
+ 'type' => HTML5_Tokenizer::STARTTAG,
+ 'attr' => array()
+ ));
+
+ $this->emitToken($token);
+
+ /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse error.
+ Ignore the token. */
+ if(!$this->elementInScope($token['name'], true)) {
+ // Parse error
+ $this->ignored = true;
+
+ /* Otherwise: */
+ } else {
+ /* Clear the stack back to a table body context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Pop the current node from the stack of open elements. Switch
+ the insertion mode to "in table". */
+ array_pop($this->stack);
+ $this->mode = self::IN_TABLE;
+ }
+
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
+ } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
+ array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
+ ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
+ /* If the stack of open elements does not have a tbody, thead, or
+ tfoot element in table scope, this is a parse error. Ignore the
+ token. (fragment case) */
+ if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
+ // parse error
+ $this->ignored = true;
+
+ /* Otherwise: */
+ } else {
+ /* Clear the stack back to a table body context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Act as if an end tag with the same tag name as the current
+ node ("tbody", "tfoot", or "thead") had been seen, then
+ reprocess the current token. */
+ $this->emitToken(array(
+ 'name' => end($this->stack)->tagName,
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+
+ $this->emitToken($token);
+ }
+
+ /* An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "td", "th", "tr" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
+ array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
+ /* Parse error. Ignore the token. */
+ $this->ignored = true;
+
+ /* Anything else */
+ } else {
+ /* Process the token as if the insertion mode was "in table". */
+ $this->processWithRulesFor($token, self::IN_TABLE);
+ }
+ break;
+
+ case self::IN_ROW:
+ $clear = array('tr', 'html');
+
+ /* A start tag whose tag name is one of: "th", "td" */
+ if($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ ($token['name'] === 'th' || $token['name'] === 'td')) {
+ /* Clear the stack back to a table row context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Insert an HTML element for the token, then switch the insertion
+ mode to "in cell". */
+ $this->insertElement($token);
+ $this->mode = self::IN_CELL;
+
+ /* Insert a marker at the end of the list of active formatting
+ elements. */
+ $this->a_formatting[] = self::MARKER;
+
+ /* An end tag whose tag name is "tr" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse error.
+ Ignore the token. (fragment case) */
+ if(!$this->elementInScope($token['name'], true)) {
+ // Ignore.
+ $this->ignored = true;
+
+ /* Otherwise: */
+ } else {
+ /* Clear the stack back to a table row context. */
+ $this->clearStackToTableContext($clear);
+
+ /* Pop the current node (which will be a tr element) from the
+ stack of open elements. Switch the insertion mode to "in table
+ body". */
+ array_pop($this->stack);
+ $this->mode = self::IN_TABLE_BODY;
+ }
+
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
+ } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
+ array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
+ ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
+ /* Act as if an end tag with the tag name "tr" had been seen, then,
+ if that token wasn't ignored, reprocess the current token. */
+ $this->emitToken(array(
+ 'name' => 'tr',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ if (!$this->ignored) $this->emitToken($token);
+
+ /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse error.
+ Ignore the token. */
+ if(!$this->elementInScope($token['name'], true)) {
+ $this->ignored = true;
+
+ /* Otherwise: */
+ } else {
+ /* Otherwise, act as if an end tag with the tag name "tr" had
+ been seen, then reprocess the current token. */
+ $this->emitToken(array(
+ 'name' => 'tr',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+
+ $this->emitToken($token);
+ }
+
+ /* An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "td", "th" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
+ array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
+ /* Parse error. Ignore the token. */
+ $this->ignored = true;
+
+ /* Anything else */
+ } else {
+ /* Process the token as if the insertion mode was "in table". */
+ $this->processWithRulesFor($token, self::IN_TABLE);
+ }
+ break;
+
+ case self::IN_CELL:
+ /* An end tag whose tag name is one of: "td", "th" */
+ if($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ ($token['name'] === 'td' || $token['name'] === 'th')) {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as that of the token, then this is a
+ parse error and the token must be ignored. */
+ if(!$this->elementInScope($token['name'], true)) {
+ $this->ignored = true;
+
+ /* Otherwise: */
+ } else {
+ /* Generate implied end tags, except for elements with the same
+ tag name as the token. */
+ $this->generateImpliedEndTags(array($token['name']));
+
+ /* Now, if the current node is not an element with the same tag
+ name as the token, then this is a parse error. */
+ // XERROR: Implement parse error code
+
+ /* Pop elements from this stack until an element with the same
+ tag name as the token has been popped from the stack. */
+ do {
+ $node = array_pop($this->stack);
+ } while ($node->tagName !== $token['name']);
+
+ /* Clear the list of active formatting elements up to the last
+ marker. */
+ $this->clearTheActiveFormattingElementsUpToTheLastMarker();
+
+ /* Switch the insertion mode to "in row". (The current node
+ will be a tr element at this point.) */
+ $this->mode = self::IN_ROW;
+ }
+
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
+ array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
+ 'thead', 'tr'))) {
+ /* If the stack of open elements does not have a td or th element
+ in table scope, then this is a parse error; ignore the token.
+ (fragment case) */
+ if(!$this->elementInScope(array('td', 'th'), true)) {
+ // parse error
+ $this->ignored = true;
+
+ /* Otherwise, close the cell (see below) and reprocess the current
+ token. */
+ } else {
+ $this->closeCell();
+ $this->emitToken($token);
+ }
+
+ /* An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
+ array('body', 'caption', 'col', 'colgroup', 'html'))) {
+ /* Parse error. Ignore the token. */
+ $this->ignored = true;
+
+ /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
+ "thead", "tr" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
+ array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
+ /* If the stack of open elements does not have a td or th element
+ in table scope, then this is a parse error; ignore the token.
+ (innerHTML case) */
+ if(!$this->elementInScope(array('td', 'th'), true)) {
+ // Parse error
+ $this->ignored = true;
+
+ /* Otherwise, close the cell (see below) and reprocess the current
+ token. */
+ } else {
+ $this->closeCell();
+ $this->emitToken($token);
+ }
+
+ /* Anything else */
+ } else {
+ /* Process the token as if the insertion mode was "in body". */
+ $this->processWithRulesFor($token, self::IN_BODY);
+ }
+ break;
+
+ case self::IN_SELECT:
+ /* Handle the token as follows: */
+
+ /* A character token */
+ if(
+ $token['type'] === HTML5_Tokenizer::CHARACTER ||
+ $token['type'] === HTML5_Tokenizer::SPACECHARACTER
+ ) {
+ /* Append the token's character to the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
+ /* Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ // parse error
+
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
+ $this->processWithRulesFor($token, self::INBODY);
+
+ /* A start tag token whose tag name is "option" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ $token['name'] === 'option') {
+ /* If the current node is an option element, act as if an end tag
+ with the tag name "option" had been seen. */
+ if(end($this->stack)->tagName === 'option') {
+ $this->emitToken(array(
+ 'name' => 'option',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* A start tag token whose tag name is "optgroup" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ $token['name'] === 'optgroup') {
+ /* If the current node is an option element, act as if an end tag
+ with the tag name "option" had been seen. */
+ if(end($this->stack)->tagName === 'option') {
+ $this->emitToken(array(
+ 'name' => 'option',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+
+ /* If the current node is an optgroup element, act as if an end tag
+ with the tag name "optgroup" had been seen. */
+ if(end($this->stack)->tagName === 'optgroup') {
+ $this->emitToken(array(
+ 'name' => 'optgroup',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* An end tag token whose tag name is "optgroup" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ $token['name'] === 'optgroup') {
+ /* First, if the current node is an option element, and the node
+ immediately before it in the stack of open elements is an optgroup
+ element, then act as if an end tag with the tag name "option" had
+ been seen. */
+ $elements_in_stack = count($this->stack);
+
+ if($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
+ $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
+ $this->emitToken(array(
+ 'name' => 'option',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ }
+
+ /* If the current node is an optgroup element, then pop that node
+ from the stack of open elements. Otherwise, this is a parse error,
+ ignore the token. */
+ if(end($this->stack)->tagName === 'optgroup') {
+ array_pop($this->stack);
+ } else {
+ // parse error
+ $this->ignored = true;
+ }
+
+ /* An end tag token whose tag name is "option" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ $token['name'] === 'option') {
+ /* If the current node is an option element, then pop that node
+ from the stack of open elements. Otherwise, this is a parse error,
+ ignore the token. */
+ if(end($this->stack)->tagName === 'option') {
+ array_pop($this->stack);
+ } else {
+ // parse error
+ $this->ignored = true;
+ }
+
+ /* An end tag whose tag name is "select" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ $token['name'] === 'select') {
+ /* If the stack of open elements does not have an element in table
+ scope with the same tag name as the token, this is a parse error.
+ Ignore the token. (fragment case) */
+ if(!$this->elementInScope($token['name'], true)) {
+ $this->ignored = true;
+ // parse error
+
+ /* Otherwise: */
+ } else {
+ /* Pop elements from the stack of open elements until a select
+ element has been popped from the stack. */
+ do {
+ $node = array_pop($this->stack);
+ } while ($node->tagName !== 'select');
+
+ /* Reset the insertion mode appropriately. */
+ $this->resetInsertionMode();
+ }
+
+ /* A start tag whose tag name is "select" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
+ /* Parse error. Act as if the token had been an end tag with the
+ tag name "select" instead. */
+ $this->emitToken(array(
+ 'name' => 'select',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ ($token['name'] === 'input' || $token['name'] === 'textarea')) {
+ // parse error
+ $this->emitToken(array(
+ 'name' => 'select',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+ $this->emitToken($token);
+
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
+ $this->processWithRulesFor($token, self::IN_HEAD);
+
+ } elseif($token['type'] === HTML5_Tokenizer::EOF) {
+ // XERROR: If the current node is not the root html element, then this is a parse error.
+ /* Stop parsing */
+
+ /* Anything else */
+ } else {
+ /* Parse error. Ignore the token. */
+ $this->ignored = true;
+ }
+ break;
+
+ case self::IN_SELECT_IN_TABLE:
+
+ if($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ in_array($token['name'], array('caption', 'table', 'tbody',
+ 'tfoot', 'thead', 'tr', 'td', 'th'))) {
+ // parse error
+ $this->emitToken(array(
+ 'name' => 'select',
+ 'type' => HTML5_Tokenizer::ENDTAG,
+ ));
+ $this->emitToken($token);
+
+ /* An end tag whose tag name is one of: "caption", "table", "tbody",
+ "tfoot", "thead", "tr", "td", "th" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th'))) {
+ /* Parse error. */
+ // parse error
+
+ /* If the stack of open elements has an element in table scope with
+ the same tag name as that of the token, then act as if an end tag
+ with the tag name "select" had been seen, and reprocess the token.
+ Otherwise, ignore the token. */
+ if($this->elementInScope($token['name'], true)) {
+ $this->emitToken(array(
+ 'name' => 'select',
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+
+ $this->emitToken($token);
+ } else {
+ $this->ignored = true;
+ }
+ } else {
+ $this->processWithRulesFor($token, self::IN_SELECT);
+ }
+ break;
+
+ case self::IN_FOREIGN_CONTENT:
+ if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
+ $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
+ $this->insertText($token['data']);
+ } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
+ $this->insertComment($token['data']);
+ } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ // XERROR: parse error
+ } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
+ end($this->stack)->namespaceURI === self::NS_SVG) {
+ array_pop($this->stack);
+ // a bunch of script running mumbo jumbo
+ } elseif (
+ ($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ ((
+ $token['name'] !== 'mglyph' &&
+ $token['name'] !== 'malignmark' &&
+ end($this->stack)->namespaceURI === self::NS_MATHML &&
+ in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext'))
+ ) ||
+ (
+ $token['name'] === 'svg' &&
+ end($this->stack)->namespaceURI === self::NS_MATHML &&
+ end($this->stack)->tagName === 'annotation-xml'
+ ) ||
+ (
+ end($this->stack)->namespaceURI === self::NS_SVG &&
+ in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title'))
+ ) ||
+ (
+ // XSKETCHY
+ end($this->stack)->namespaceURI === self::NS_HTML
+ ))
+ ) || $token['type'] === HTML5_Tokenizer::ENDTAG
+ ) {
+ $this->processWithRulesFor($token, $this->secondary_mode);
+ /* If, after doing so, the insertion mode is still "in foreign
+ * content", but there is no element in scope that has a namespace
+ * other than the HTML namespace, switch the insertion mode to the
+ * secondary insertion mode. */
+ if ($this->mode === self::IN_FOREIGN_CONTENT) {
+ $found = false;
+ // this basically duplicates elementInScope()
+ for ($i = count($this->stack) - 1; $i >= 0; $i--) {
+ $node = $this->stack[$i];
+ if ($node->namespaceURI !== self::NS_HTML) {
+ $found = true;
+ break;
+ } elseif (in_array($node->tagName, array('table', 'html',
+ 'applet', 'caption', 'td', 'th', 'button', 'marquee',
+ 'object')) || ($node->tagName === 'foreignObject' &&
+ $node->namespaceURI === self::NS_SVG)) {
+ break;
+ }
+ }
+ if (!$found) {
+ $this->mode = $this->secondary_mode;
+ }
+ }
+ } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
+ $token['type'] === HTML5_Tokenizer::STARTTAG &&
+ (in_array($token['name'], array('b', "big", "blockquote", "body", "br",
+ "center", "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2",
+ "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing",
+ "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small",
+ "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul",
+ "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
+ $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
+ // XERROR: parse error
+ do {
+ $node = array_pop($this->stack);
+ } while ($node->namespaceURI !== self::NS_HTML);
+ $this->stack[] = $node;
+ $this->mode = $this->secondary_mode;
+ $this->emitToken($token);
+ } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
+ static $svg_lookup = array(
+ 'altglyph' => 'altGlyph',
+ 'altglyphdef' => 'altGlyphDef',
+ 'altglyphitem' => 'altGlyphItem',
+ 'animatecolor' => 'animateColor',
+ 'animatemotion' => 'animateMotion',
+ 'animatetransform' => 'animateTransform',
+ 'clippath' => 'clipPath',
+ 'feblend' => 'feBlend',
+ 'fecolormatrix' => 'feColorMatrix',
+ 'fecomponenttransfer' => 'feComponentTransfer',
+ 'fecomposite' => 'feComposite',
+ 'feconvolvematrix' => 'feConvolveMatrix',
+ 'fediffuselighting' => 'feDiffuseLighting',
+ 'fedisplacementmap' => 'feDisplacementMap',
+ 'fedistantlight' => 'feDistantLight',
+ 'feflood' => 'feFlood',
+ 'fefunca' => 'feFuncA',
+ 'fefuncb' => 'feFuncB',
+ 'fefuncg' => 'feFuncG',
+ 'fefuncr' => 'feFuncR',
+ 'fegaussianblur' => 'feGaussianBlur',
+ 'feimage' => 'feImage',
+ 'femerge' => 'feMerge',
+ 'femergenode' => 'feMergeNode',
+ 'femorphology' => 'feMorphology',
+ 'feoffset' => 'feOffset',
+ 'fepointlight' => 'fePointLight',
+ 'fespecularlighting' => 'feSpecularLighting',
+ 'fespotlight' => 'feSpotLight',
+ 'fetile' => 'feTile',
+ 'feturbulence' => 'feTurbulence',
+ 'foreignobject' => 'foreignObject',
+ 'glyphref' => 'glyphRef',
+ 'lineargradient' => 'linearGradient',
+ 'radialgradient' => 'radialGradient',
+ 'textpath' => 'textPath',
+ );
+ $current = end($this->stack);
+ if ($current->namespaceURI === self::NS_MATHML) {
+ $token = $this->adjustMathMLAttributes($token);
+ }
+ if ($current->namespaceURI === self::NS_SVG &&
+ isset($svg_lookup[$token['name']])) {
+ $token['name'] = $svg_lookup[$token['name']];
+ }
+ if ($current->namespaceURI === self::NS_SVG) {
+ $token = $this->adjustSVGAttributes($token);
+ }
+ $token = $this->adjustForeignAttributes($token);
+ $this->insertForeignElement($token, $current->namespaceURI);
+ if (isset($token['self-closing'])) {
+ array_pop($this->stack);
+ // XERROR: acknowledge self-closing flag
+ }
+ }
+ break;
+
+ case self::AFTER_BODY:
+ /* Handle the token as follows: */
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
+ /* Process the token as it would be processed if the insertion mode
+ was "in body". */
+ $this->processWithRulesFor($token, self::IN_BODY);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
+ /* Append a Comment node to the first element in the stack of open
+ elements (the html element), with the data attribute set to the
+ data given in the comment token. */
+ $comment = $this->dom->createComment($token['data']);
+ $this->stack[0]->appendChild($comment);
+
+ } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ // parse error
+
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
+ $this->processWithRulesFor($token, self::IN_BODY);
+
+ /* An end tag with the tag name "html" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
+ /* If the parser was originally created as part of the HTML
+ * fragment parsing algorithm, this is a parse error; ignore
+ * the token. (fragment case) */
+ $this->ignored = true;
+ // XERROR: implement this
+
+ $this->mode = self::AFTER_AFTER_BODY;
+
+ } elseif($token['type'] === HTML5_Tokenizer::EOF) {
+ /* Stop parsing */
+
+ /* Anything else */
+ } else {
+ /* Parse error. Set the insertion mode to "in body" and reprocess
+ the token. */
+ $this->mode = self::IN_BODY;
+ $this->emitToken($token);
+ }
+ break;
+
+ case self::IN_FRAMESET:
+ /* Handle the token as follows: */
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
+ if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
+ /* Append the character to the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
+ /* Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ // parse error
+
+ /* A start tag with the tag name "frameset" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ $token['name'] === 'frameset') {
+ $this->insertElement($token);
+
+ /* An end tag with the tag name "frameset" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ $token['name'] === 'frameset') {
+ /* If the current node is the root html element, then this is a
+ parse error; ignore the token. (fragment case) */
+ if(end($this->stack)->tagName === 'html') {
+ $this->ignored = true;
+ // Parse error
+
+ } else {
+ /* Otherwise, pop the current node from the stack of open
+ elements. */
+ array_pop($this->stack);
+
+ /* If the parser was not originally created as part of the HTML
+ * fragment parsing algorithm (fragment case), and the current
+ * node is no longer a frameset element, then switch the
+ * insertion mode to "after frameset". */
+ $this->mode = self::AFTER_FRAMESET;
+ }
+
+ /* A start tag with the tag name "frame" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ $token['name'] === 'frame') {
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Immediately pop the current node off the stack of open elements. */
+ array_pop($this->stack);
+
+ // XERROR: Acknowledge the token's self-closing flag, if it is set.
+
+ /* A start tag with the tag name "noframes" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ $token['name'] === 'noframes') {
+ /* Process the token using the rules for the "in head" insertion mode. */
+ $this->processwithRulesFor($token, self::IN_HEAD);
+
+ } elseif($token['type'] === HTML5_Tokenizer::EOF) {
+ // XERROR: If the current node is not the root html element, then this is a parse error.
+ /* Stop parsing */
+ /* Anything else */
+ } else {
+ /* Parse error. Ignore the token. */
+ $this->ignored = true;
+ }
+ break;
+
+ case self::AFTER_FRAMESET:
+ /* Handle the token as follows: */
+
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
+ if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
+ /* Append the character to the current node. */
+ $this->insertText($token['data']);
+
+ /* A comment token */
+ } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
+ /* Append a Comment node to the current node with the data
+ attribute set to the data given in the comment token. */
+ $this->insertComment($token['data']);
+
+ } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
+ // parse error
+
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
+ $this->processWithRulesFor($token, self::IN_BODY);
+
+ /* An end tag with the tag name "html" */
+ } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
+ $token['name'] === 'html') {
+ $this->mode = self::AFTER_AFTER_FRAMESET;
+
+ /* A start tag with the tag name "noframes" */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
+ $token['name'] === 'noframes') {
+ $this->processWithRulesFor($token, self::IN_HEAD);
+
+ } elseif($token['type'] === HTML5_Tokenizer::EOF) {
+ /* Stop parsing */
+
+ /* Anything else */
+ } else {
+ /* Parse error. Ignore the token. */
+ $this->ignored = true;
+ }
+ break;
+
+ case self::AFTER_AFTER_BODY:
+ /* A comment token */
+ if($token['type'] === HTML5_Tokenizer::COMMENT) {
+ /* Append a Comment node to the Document object with the data
+ attribute set to the data given in the comment token. */
+ $comment = $this->dom->createComment($token['data']);
+ $this->dom->appendChild($comment);
+
+ } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
+ $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
+ ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
+ $this->processWithRulesFor($token, self::IN_BODY);
+
+ /* An end-of-file token */
+ } elseif($token['type'] === HTML5_Tokenizer::EOF) {
+ /* OMG DONE!! */
+ } else {
+ // parse error
+ $this->mode = self::IN_BODY;
+ $this->emitToken($token);
+ }
+ break;
+
+ case self::AFTER_AFTER_FRAMESET:
+ /* A comment token */
+ if($token['type'] === HTML5_Tokenizer::COMMENT) {
+ /* Append a Comment node to the Document object with the data
+ attribute set to the data given in the comment token. */
+ $comment = $this->dom->createComment($token['data']);
+ $this->dom->appendChild($comment);
+
+ } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
+ $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
+ ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
+ $this->processWithRulesFor($token, self::IN_BODY);
+
+ /* An end-of-file token */
+ } elseif($token['type'] === HTML5_Tokenizer::EOF) {
+ /* OMG DONE!! */
+ } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
+ $this->processWithRulesFor($token, self::IN_HEAD);
+ } else {
+ // parse error
+ }
+ break;
+ }
+ // end funky indenting
+ }
+
+ private function insertElement($token, $append = true) {
+ $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
+
+ if (!empty($token['attr'])) {
+ foreach($token['attr'] as $attr) {
+ if(!$el->hasAttribute($attr['name'])) {
+ $el->setAttribute($attr['name'], $attr['value']);
+ }
+ }
+ }
+ if ($append) {
+ $this->appendToRealParent($el);
+ $this->stack[] = $el;
+ }
+
+ return $el;
+ }
+
+ private function insertText($data) {
+ if ($data === '') return;
+ if ($this->ignore_lf_token) {
+ if ($data[0] === "\n") {
+ $data = substr($data, 1);
+ if ($data === false) return;
+ }
+ }
+ $text = $this->dom->createTextNode($data);
+ $this->appendToRealParent($text);
+ }
+
+ private function insertComment($data) {
+ $comment = $this->dom->createComment($data);
+ $this->appendToRealParent($comment);
+ }
+
+ private function appendToRealParent($node) {
+ // this is only for the foster_parent case
+ /* If the current node is a table, tbody, tfoot, thead, or tr
+ element, then, whenever a node would be inserted into the current
+ node, it must instead be inserted into the foster parent element. */
+ if(!$this->foster_parent || !in_array(end($this->stack)->tagName,
+ array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
+ end($this->stack)->appendChild($node);
+ } else {
+ $this->fosterParent($node);
+ }
+ }
+
+ private function elementInScope($el, $table = false) {
+ if(is_array($el)) {
+ foreach($el as $element) {
+ if($this->elementInScope($element, $table)) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ $leng = count($this->stack);
+
+ for($n = 0; $n < $leng; $n++) {
+ /* 1. Initialise node to be the current node (the bottommost node of
+ the stack). */
+ $node = $this->stack[$leng - 1 - $n];
+
+ if($node->tagName === $el) {
+ /* 2. If node is the target node, terminate in a match state. */
+ return true;
+
+ // these are the common states for "in scope" and "in table scope"
+ } elseif($node->tagName === 'table' || $node->tagName === 'html') {
+ return false;
+
+ // these are only valid for "in scope"
+ } elseif(!$table &&
+ (in_array($node->tagName, array('applet', 'caption', 'td',
+ 'th', 'button', 'marquee', 'object')) ||
+ $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
+ return false;
+ }
+
+ /* Otherwise, set node to the previous entry in the stack of open
+ elements and return to step 2. (This will never fail, since the loop
+ will always terminate in the previous step if the top of the stack
+ is reached.) */
+ }
+ }
+
+ private function reconstructActiveFormattingElements() {
+ /* 1. If there are no entries in the list of active formatting elements,
+ then there is nothing to reconstruct; stop this algorithm. */
+ $formatting_elements = count($this->a_formatting);
+
+ if($formatting_elements === 0) {
+ return false;
+ }
+
+ /* 3. Let entry be the last (most recently added) element in the list
+ of active formatting elements. */
+ $entry = end($this->a_formatting);
+
+ /* 2. If the last (most recently added) entry in the list of active
+ formatting elements is a marker, or if it is an element that is in the
+ stack of open elements, then there is nothing to reconstruct; stop this
+ algorithm. */
+ if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
+ return false;
+ }
+
+ for($a = $formatting_elements - 1; $a >= 0; true) {
+ /* 4. If there are no entries before entry in the list of active
+ formatting elements, then jump to step 8. */
+ if($a === 0) {
+ $step_seven = false;
+ break;
+ }
+
+ /* 5. Let entry be the entry one earlier than entry in the list of
+ active formatting elements. */
+ $a--;
+ $entry = $this->a_formatting[$a];
+
+ /* 6. If entry is neither a marker nor an element that is also in
+ thetack of open elements, go to step 4. */
+ if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
+ break;
+ }
+ }
+
+ while(true) {
+ /* 7. Let entry be the element one later than entry in the list of
+ active formatting elements. */
+ if(isset($step_seven) && $step_seven === true) {
+ $a++;
+ $entry = $this->a_formatting[$a];
+ }
+
+ /* 8. Perform a shallow clone of the element entry to obtain clone. */
+ $clone = $entry->cloneNode();
+
+ /* 9. Append clone to the current node and push it onto the stack
+ of open elements so that it is the new current node. */
+ $this->appendToRealParent($clone);
+ $this->stack[] = $clone;
+
+ /* 10. Replace the entry for entry in the list with an entry for
+ clone. */
+ $this->a_formatting[$a] = $clone;
+
+ /* 11. If the entry for clone in the list of active formatting
+ elements is not the last entry in the list, return to step 7. */
+ if(end($this->a_formatting) !== $clone) {
+ $step_seven = true;
+ } else {
+ break;
+ }
+ }
+ }
+
+ private function clearTheActiveFormattingElementsUpToTheLastMarker() {
+ /* When the steps below require the UA to clear the list of active
+ formatting elements up to the last marker, the UA must perform the
+ following steps: */
+
+ while(true) {
+ /* 1. Let entry be the last (most recently added) entry in the list
+ of active formatting elements. */
+ $entry = end($this->a_formatting);
+
+ /* 2. Remove entry from the list of active formatting elements. */
+ array_pop($this->a_formatting);
+
+ /* 3. If entry was a marker, then stop the algorithm at this point.
+ The list has been cleared up to the last marker. */
+ if($entry === self::MARKER) {
+ break;
+ }
+ }
+ }
+
+ private function generateImpliedEndTags($exclude = array()) {
+ /* When the steps below require the UA to generate implied end tags,
+ then, if the current node is a dd element, a dt element, an li element,
+ a p element, a td element, a th element, or a tr element, the UA must
+ act as if an end tag with the respective tag name had been seen and
+ then generate implied end tags again. */
+ $node = end($this->stack);
+ $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
+
+ while(in_array(end($this->stack)->tagName, $elements)) {
+ array_pop($this->stack);
+ }
+ }
+
+ private function getElementCategory($node) {
+ if (!is_object($node)) debug_print_backtrace();
+ $name = $node->tagName;
+ if(in_array($name, $this->special))
+ return self::SPECIAL;
+
+ elseif(in_array($name, $this->scoping))
+ return self::SCOPING;
+
+ elseif(in_array($name, $this->formatting))
+ return self::FORMATTING;
+
+ else
+ return self::PHRASING;
+ }
+
+ private function clearStackToTableContext($elements) {
+ /* When the steps above require the UA to clear the stack back to a
+ table context, it means that the UA must, while the current node is not
+ a table element or an html element, pop elements from the stack of open
+ elements. */
+ while(true) {
+ $name = end($this->stack)->tagName;
+
+ if(in_array($name, $elements)) {
+ break;
+ } else {
+ array_pop($this->stack);
+ }
+ }
+ }
+
+ private function resetInsertionMode($context = null) {
+ /* 1. Let last be false. */
+ $last = false;
+ $leng = count($this->stack);
+
+ for($n = $leng - 1; $n >= 0; $n--) {
+ /* 2. Let node be the last node in the stack of open elements. */
+ $node = $this->stack[$n];
+
+ /* 3. If node is the first node in the stack of open elements, then
+ * set last to true and set node to the context element. (fragment
+ * case) */
+ if($this->stack[0]->isSameNode($node)) {
+ $last = true;
+ $node = $context;
+ }
+
+ /* 4. If node is a select element, then switch the insertion mode to
+ "in select" and abort these steps. (fragment case) */
+ if($node->tagName === 'select') {
+ $this->mode = self::IN_SELECT;
+ break;
+
+ /* 5. If node is a td or th element, then switch the insertion mode
+ to "in cell" and abort these steps. */
+ } elseif($node->tagName === 'td' || $node->nodeName === 'th') {
+ $this->mode = self::IN_CELL;
+ break;
+
+ /* 6. If node is a tr element, then switch the insertion mode to
+ "in row" and abort these steps. */
+ } elseif($node->tagName === 'tr') {
+ $this->mode = self::IN_ROW;
+ break;
+
+ /* 7. If node is a tbody, thead, or tfoot element, then switch the
+ insertion mode to "in table body" and abort these steps. */
+ } elseif(in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) {
+ $this->mode = self::IN_TABLE_BODY;
+ break;
+
+ /* 8. If node is a caption element, then switch the insertion mode
+ to "in caption" and abort these steps. */
+ } elseif($node->tagName === 'caption') {
+ $this->mode = self::IN_CAPTION;
+ break;
+
+ /* 9. If node is a colgroup element, then switch the insertion mode
+ to "in column group" and abort these steps. (innerHTML case) */
+ } elseif($node->tagName === 'colgroup') {
+ $this->mode = self::IN_COLUMN_GROUP;
+ break;
+
+ /* 10. If node is a table element, then switch the insertion mode
+ to "in table" and abort these steps. */
+ } elseif($node->tagName === 'table') {
+ $this->mode = self::IN_TABLE;
+ break;
+
+ /* 11. If node is an element from the MathML namespace or the SVG
+ * namespace, then switch the insertion mode to "in foreign
+ * content", let the secondary insertion mode be "in body", and
+ * abort these steps. */
+ } elseif($node->namespaceURI === self::NS_SVG ||
+ $node->namespaceURI === self::NS_MATHML) {
+ $this->mode = self::IN_FOREIGN_CONTENT;
+ $this->secondary_mode = self::IN_BODY;
+ break;
+
+ /* 12. If node is a head element, then switch the insertion mode
+ to "in body" ("in body"! not "in head"!) and abort these steps.
+ (fragment case) */
+ } elseif($node->tagName === 'head') {
+ $this->mode = self::IN_BODY;
+ break;
+
+ /* 13. If node is a body element, then switch the insertion mode to
+ "in body" and abort these steps. */
+ } elseif($node->tagName === 'body') {
+ $this->mode = self::IN_BODY;
+ break;
+
+ /* 14. If node is a frameset element, then switch the insertion
+ mode to "in frameset" and abort these steps. (fragment case) */
+ } elseif($node->tagName === 'frameset') {
+ $this->mode = self::IN_FRAMESET;
+ break;
+
+ /* 15. If node is an html element, then: if the head element
+ pointer is null, switch the insertion mode to "before head",
+ otherwise, switch the insertion mode to "after head". In either
+ case, abort these steps. (fragment case) */
+ } elseif($node->tagName === 'html') {
+ $this->mode = ($this->head_pointer === null)
+ ? self::BEFORE_HEAD
+ : self::AFTER_HEAD;
+
+ break;
+
+ /* 16. If last is true, then set the insertion mode to "in body"
+ and abort these steps. (fragment case) */
+ } elseif($last) {
+ $this->mode = self::IN_BODY;
+ break;
+ }
+ }
+ }
+
+ private function closeCell() {
+ /* If the stack of open elements has a td or th element in table scope,
+ then act as if an end tag token with that tag name had been seen. */
+ foreach(array('td', 'th') as $cell) {
+ if($this->elementInScope($cell, true)) {
+ $this->emitToken(array(
+ 'name' => $cell,
+ 'type' => HTML5_Tokenizer::ENDTAG
+ ));
+
+ break;
+ }
+ }
+ }
+
+ private function processWithRulesFor($token, $mode) {
+ /* "using the rules for the m insertion mode", where m is one of these
+ * modes, the user agent must use the rules described under the m
+ * insertion mode's section, but must leave the insertion mode
+ * unchanged unless the rules in m themselves switch the insertion mode
+ * to a new value. */
+ return $this->emitToken($token, $mode);
+ }
+
+ private function insertCDATAElement($token) {
+ $this->insertElement($token);
+ $this->original_mode = $this->mode;
+ $this->mode = self::IN_CDATA_RCDATA;
+ $this->content_model = HTML5_Tokenizer::CDATA;
+ }
+
+ private function insertRCDATAElement($token) {
+ $this->insertElement($token);
+ $this->original_mode = $this->mode;
+ $this->mode = self::IN_CDATA_RCDATA;
+ $this->content_model = HTML5_Tokenizer::RCDATA;
+ }
+
+ private function getAttr($token, $key) {
+ if (!isset($token['attr'])) return false;
+ $ret = false;
+ foreach ($token['attr'] as $keypair) {
+ if ($keypair['name'] === $key) $ret = $keypair['value'];
+ }
+ return $ret;
+ }
+
+ private function getCurrentTable() {
+ /* The current table is the last table element in the stack of open
+ * elements, if there is one. If there is no table element in the stack
+ * of open elements (fragment case), then the current table is the
+ * first element in the stack of open elements (the html element). */
+ for ($i = count($this->stack) - 1; $i >= 0; $i--) {
+ if ($this->stack[$i]->tagName === 'table') {
+ return $this->stack[$i];
+ }
+ }
+ return $this->stack[0];
+ }
+
+ private function getFosterParent() {
+ /* The foster parent element is the parent element of the last
+ table element in the stack of open elements, if there is a
+ table element and it has such a parent element. If there is no
+ table element in the stack of open elements (innerHTML case),
+ then the foster parent element is the first element in the
+ stack of open elements (the html element). Otherwise, if there
+ is a table element in the stack of open elements, but the last
+ table element in the stack of open elements has no parent, or
+ its parent node is not an element, then the foster parent
+ element is the element before the last table element in the
+ stack of open elements. */
+ for($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if($this->stack[$n]->tagName === 'table') {
+ $table = $this->stack[$n];
+ break;
+ }
+ }
+
+ if(isset($table) && $table->parentNode !== null) {
+ return $table->parentNode;
+
+ } elseif(!isset($table)) {
+ return $this->stack[0];
+
+ } elseif(isset($table) && ($table->parentNode === null ||
+ $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
+ return $this->stack[$n - 1];
+ }
+ }
+
+ public function fosterParent($node) {
+ $foster_parent = $this->getFosterParent();
+ $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
+ /* When a node node is to be foster parented, the node node must be
+ * inserted into the foster parent element, and the current table must
+ * be marked as tainted. (Once the current table has been tainted,
+ * whitespace characters are inserted into the foster parent element
+ * instead of the current node.) */
+ $table->tainted = true;
+ /* If the foster parent element is the parent element of the last table
+ * element in the stack of open elements, then node must be inserted
+ * immediately before the last table element in the stack of open
+ * elements in the foster parent element; otherwise, node must be
+ * appended to the foster parent element. */
+ if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
+ $foster_parent->insertBefore($node, $table);
+ } else {
+ $foster_parent->appendChild($node);
+ }
+ }
+
+ /**
+ * For debugging, prints the stack
+ */
+ private function printStack() {
+ $names = array();
+ foreach ($this->stack as $i => $element) {
+ $names[] = $element->tagName;
+ }
+ echo " -> stack [" . implode(', ', $names) . "]\n";
+ }
+
+ /**
+ * For debugging, prints active formatting elements
+ */
+ private function printActiveFormattingElements() {
+ if (!$this->a_formatting) return;
+ $names = array();
+ foreach ($this->a_formatting as $node) {
+ if ($node === self::MARKER) $names[] = 'MARKER';
+ else $names[] = $node->tagName;
+ }
+ echo " -> active formatting [" . implode(', ', $names) . "]\n";
+ }
+
+ public function currentTableIsTainted() {
+ return !empty($this->getCurrentTable()->tainted);
+ }
+
+ /**
+ * Sets up the tree constructor for building a fragment.
+ */
+ public function setupContext($context = null) {
+ $this->fragment = true;
+ if ($context) {
+ $context = $this->dom->createElementNS(self::NS_HTML, $context);
+ /* 4.1. Set the HTML parser's tokenization stage's content model
+ * flag according to the context element, as follows: */
+ switch ($context->tagName) {
+ case 'title': case 'textarea':
+ $this->content_model = HTML5_Tokenizer::RCDATA;
+ break;
+ case 'style': case 'script': case 'xmp': case 'iframe':
+ case 'noembed': case 'noframes':
+ $this->content_model = HTML5_Tokenizer::CDATA;
+ break;
+ case 'noscript':
+ // XSCRIPT: assuming scripting is enabled
+ $this->content_model = HTML5_Tokenizer::CDATA;
+ break;
+ case 'plaintext':
+ $this->content_model = HTML5_Tokenizer::PLAINTEXT;
+ break;
+ }
+ /* 4.2. Let root be a new html element with no attributes. */
+ $root = $this->dom->createElementNS(self::NS_HTML, 'html');
+ $this->root = $root;
+ /* 4.3 Append the element root to the Document node created above. */
+ $this->dom->appendChild($root);
+ /* 4.4 Set up the parser's stack of open elements so that it
+ * contains just the single element root. */
+ $this->stack = array($root);
+ /* 4.5 Reset the parser's insertion mode appropriately. */
+ $this->resetInsertionMode($context);
+ /* 4.6 Set the parser's form element pointer to the nearest node
+ * to the context element that is a form element (going straight up
+ * the ancestor chain, and including the element itself, if it is a
+ * form element), or, if there is no such form element, to null. */
+ $node = $context;
+ do {
+ if ($node->tagName === 'form') {
+ $this->form_pointer = $node;
+ break;
+ }
+ } while ($node = $node->parentNode);
+ }
+ }
+
+ public function adjustMathMLAttributes($token) {
+ foreach ($token['attr'] as &$kp) {
+ if ($kp['name'] === 'definitionurl') {
+ $kp['name'] = 'definitionURL';
+ }
+ }
+ return $token;
+ }
+
+ public function adjustSVGAttributes($token) {
+ static $lookup = array(
+ 'attributename' => 'attributeName',
+ 'attributetype' => 'attributeType',
+ 'basefrequency' => 'baseFrequency',
+ 'baseprofile' => 'baseProfile',
+ 'calcmode' => 'calcMode',
+ 'clippathunits' => 'clipPathUnits',
+ 'contentscripttype' => 'contentScriptType',
+ 'contentstyletype' => 'contentStyleType',
+ 'diffuseconstant' => 'diffuseConstant',
+ 'edgemode' => 'edgeMode',
+ 'externalresourcesrequired' => 'externalResourcesRequired',
+ 'filterres' => 'filterRes',
+ 'filterunits' => 'filterUnits',
+ 'glyphref' => 'glyphRef',
+ 'gradienttransform' => 'gradientTransform',
+ 'gradientunits' => 'gradientUnits',
+ 'kernelmatrix' => 'kernelMatrix',
+ 'kernelunitlength' => 'kernelUnitLength',
+ 'keypoints' => 'keyPoints',
+ 'keysplines' => 'keySplines',
+ 'keytimes' => 'keyTimes',
+ 'lengthadjust' => 'lengthAdjust',
+ 'limitingconeangle' => 'limitingConeAngle',
+ 'markerheight' => 'markerHeight',
+ 'markerunits' => 'markerUnits',
+ 'markerwidth' => 'markerWidth',
+ 'maskcontentunits' => 'maskContentUnits',
+ 'maskunits' => 'maskUnits',
+ 'numoctaves' => 'numOctaves',
+ 'pathlength' => 'pathLength',
+ 'patterncontentunits' => 'patternContentUnits',
+ 'patterntransform' => 'patternTransform',
+ 'patternunits' => 'patternUnits',
+ 'pointsatx' => 'pointsAtX',
+ 'pointsaty' => 'pointsAtY',
+ 'pointsatz' => 'pointsAtZ',
+ 'preservealpha' => 'preserveAlpha',
+ 'preserveaspectratio' => 'preserveAspectRatio',
+ 'primitiveunits' => 'primitiveUnits',
+ 'refx' => 'refX',
+ 'refy' => 'refY',
+ 'repeatcount' => 'repeatCount',
+ 'repeatdur' => 'repeatDur',
+ 'requiredextensions' => 'requiredExtensions',
+ 'requiredfeatures' => 'requiredFeatures',
+ 'specularconstant' => 'specularConstant',
+ 'specularexponent' => 'specularExponent',
+ 'spreadmethod' => 'spreadMethod',
+ 'startoffset' => 'startOffset',
+ 'stddeviation' => 'stdDeviation',
+ 'stitchtiles' => 'stitchTiles',
+ 'surfacescale' => 'surfaceScale',
+ 'systemlanguage' => 'systemLanguage',
+ 'tablevalues' => 'tableValues',
+ 'targetx' => 'targetX',
+ 'targety' => 'targetY',
+ 'textlength' => 'textLength',
+ 'viewbox' => 'viewBox',
+ 'viewtarget' => 'viewTarget',
+ 'xchannelselector' => 'xChannelSelector',
+ 'ychannelselector' => 'yChannelSelector',
+ 'zoomandpan' => 'zoomAndPan',
+ );
+ foreach ($token['attr'] as &$kp) {
+ if (isset($lookup[$kp['name']])) {
+ $kp['name'] = $lookup[$kp['name']];
+ }
+ }
+ return $token;
+ }
+
+ public function adjustForeignAttributes($token) {
+ static $lookup = array(
+ 'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK),
+ 'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK),
+ 'xlink:href' => array('xlink', 'href', self::NS_XLINK),
+ 'xlink:role' => array('xlink', 'role', self::NS_XLINK),
+ 'xlink:show' => array('xlink', 'show', self::NS_XLINK),
+ 'xlink:title' => array('xlink', 'title', self::NS_XLINK),
+ 'xlink:type' => array('xlink', 'type', self::NS_XLINK),
+ 'xml:base' => array('xml', 'base', self::NS_XML),
+ 'xml:lang' => array('xml', 'lang', self::NS_XML),
+ 'xml:space' => array('xml', 'space', self::NS_XML),
+ 'xmlns' => array(null, 'xmlns', self::NS_XMLNS),
+ 'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS),
+ );
+ foreach ($token['attr'] as &$kp) {
+ if (isset($lookup[$kp['name']])) {
+ $kp['name'] = $lookup[$kp['name']];
+ }
+ }
+ return $token;
+ }
+
+ public function insertForeignElement($token, $namespaceURI) {
+ $el = $this->dom->createElementNS($namespaceURI, $token['name']);
+ if (!empty($token['attr'])) {
+ foreach ($token['attr'] as $kp) {
+ $attr = $kp['name'];
+ if (is_array($attr)) {
+ $ns = $attr[2];
+ $attr = $attr[1];
+ } else {
+ $ns = self::NS_HTML;
+ }
+ if (!$el->hasAttributeNS($ns, $attr)) {
+ // XSKETCHY: work around godawful libxml bug
+ if ($ns === self::NS_XLINK) {
+ $el->setAttribute('xlink:'.$attr, $kp['value']);
+ } elseif ($ns === self::NS_HTML) {
+ // Another godawful libxml bug
+ $el->setAttribute($attr, $kp['value']);
+ } else {
+ $el->setAttributeNS($ns, $attr, $kp['value']);
+ }
+ }
+ }
+ }
+ $this->appendToRealParent($el);
+ $this->stack[] = $el;
+ // XERROR: see below
+ /* If the newly created element has an xmlns attribute in the XMLNS
+ * namespace whose value is not exactly the same as the element's
+ * namespace, that is a parse error. Similarly, if the newly created
+ * element has an xmlns:xlink attribute in the XMLNS namespace whose
+ * value is not the XLink Namespace, that is a parse error. */
+ }
+
+ public function save() {
+ $this->dom->normalize();
+ if (!$this->fragment) {
+ return $this->dom;
+ } else {
+ if ($this->root) {
+ return $this->root->childNodes;
+ } else {
+ return $this->dom->childNodes;
+ }
+ }
+ }
+}
+
diff --git a/library/HTML5/named-character-references.ser b/library/HTML5/named-character-references.ser
new file mode 100644
index 000000000..3004c4b91
--- /dev/null
+++ b/library/HTML5/named-character-references.ser
@@ -0,0 +1 @@
+a:2137:{s:6:"AElig;";i:198;s:5:"AElig";i:198;s:4:"AMP;";i:38;s:3:"AMP";i:38;s:7:"Aacute;";i:193;s:6:"Aacute";i:193;s:7:"Abreve;";i:258;s:6:"Acirc;";i:194;s:5:"Acirc";i:194;s:4:"Acy;";i:1040;s:4:"Afr;";i:120068;s:7:"Agrave;";i:192;s:6:"Agrave";i:192;s:6:"Alpha;";i:913;s:6:"Amacr;";i:256;s:4:"And;";i:10835;s:6:"Aogon;";i:260;s:5:"Aopf;";i:120120;s:14:"ApplyFunction;";i:8289;s:6:"Aring;";i:197;s:5:"Aring";i:197;s:5:"Ascr;";i:119964;s:7:"Assign;";i:8788;s:7:"Atilde;";i:195;s:6:"Atilde";i:195;s:5:"Auml;";i:196;s:4:"Auml";i:196;s:10:"Backslash;";i:8726;s:5:"Barv;";i:10983;s:7:"Barwed;";i:8966;s:4:"Bcy;";i:1041;s:8:"Because;";i:8757;s:11:"Bernoullis;";i:8492;s:5:"Beta;";i:914;s:4:"Bfr;";i:120069;s:5:"Bopf;";i:120121;s:6:"Breve;";i:728;s:5:"Bscr;";i:8492;s:7:"Bumpeq;";i:8782;s:5:"CHcy;";i:1063;s:5:"COPY;";i:169;s:4:"COPY";i:169;s:7:"Cacute;";i:262;s:4:"Cap;";i:8914;s:21:"CapitalDifferentialD;";i:8517;s:8:"Cayleys;";i:8493;s:7:"Ccaron;";i:268;s:7:"Ccedil;";i:199;s:6:"Ccedil";i:199;s:6:"Ccirc;";i:264;s:8:"Cconint;";i:8752;s:5:"Cdot;";i:266;s:8:"Cedilla;";i:184;s:10:"CenterDot;";i:183;s:4:"Cfr;";i:8493;s:4:"Chi;";i:935;s:10:"CircleDot;";i:8857;s:12:"CircleMinus;";i:8854;s:11:"CirclePlus;";i:8853;s:12:"CircleTimes;";i:8855;s:25:"ClockwiseContourIntegral;";i:8754;s:22:"CloseCurlyDoubleQuote;";i:8221;s:16:"CloseCurlyQuote;";i:8217;s:6:"Colon;";i:8759;s:7:"Colone;";i:10868;s:10:"Congruent;";i:8801;s:7:"Conint;";i:8751;s:16:"ContourIntegral;";i:8750;s:5:"Copf;";i:8450;s:10:"Coproduct;";i:8720;s:32:"CounterClockwiseContourIntegral;";i:8755;s:6:"Cross;";i:10799;s:5:"Cscr;";i:119966;s:4:"Cup;";i:8915;s:7:"CupCap;";i:8781;s:3:"DD;";i:8517;s:9:"DDotrahd;";i:10513;s:5:"DJcy;";i:1026;s:5:"DScy;";i:1029;s:5:"DZcy;";i:1039;s:7:"Dagger;";i:8225;s:5:"Darr;";i:8609;s:6:"Dashv;";i:10980;s:7:"Dcaron;";i:270;s:4:"Dcy;";i:1044;s:4:"Del;";i:8711;s:6:"Delta;";i:916;s:4:"Dfr;";i:120071;s:17:"DiacriticalAcute;";i:180;s:15:"DiacriticalDot;";i:729;s:23:"DiacriticalDoubleAcute;";i:733;s:17:"DiacriticalGrave;";i:96;s:17:"DiacriticalTilde;";i:732;s:8:"Diamond;";i:8900;s:14:"DifferentialD;";i:8518;s:5:"Dopf;";i:120123;s:4:"Dot;";i:168;s:7:"DotDot;";i:8412;s:9:"DotEqual;";i:8784;s:22:"DoubleContourIntegral;";i:8751;s:10:"DoubleDot;";i:168;s:16:"DoubleDownArrow;";i:8659;s:16:"DoubleLeftArrow;";i:8656;s:21:"DoubleLeftRightArrow;";i:8660;s:14:"DoubleLeftTee;";i:10980;s:20:"DoubleLongLeftArrow;";i:10232;s:25:"DoubleLongLeftRightArrow;";i:10234;s:21:"DoubleLongRightArrow;";i:10233;s:17:"DoubleRightArrow;";i:8658;s:15:"DoubleRightTee;";i:8872;s:14:"DoubleUpArrow;";i:8657;s:18:"DoubleUpDownArrow;";i:8661;s:18:"DoubleVerticalBar;";i:8741;s:10:"DownArrow;";i:8595;s:13:"DownArrowBar;";i:10515;s:17:"DownArrowUpArrow;";i:8693;s:10:"DownBreve;";i:785;s:20:"DownLeftRightVector;";i:10576;s:18:"DownLeftTeeVector;";i:10590;s:15:"DownLeftVector;";i:8637;s:18:"DownLeftVectorBar;";i:10582;s:19:"DownRightTeeVector;";i:10591;s:16:"DownRightVector;";i:8641;s:19:"DownRightVectorBar;";i:10583;s:8:"DownTee;";i:8868;s:13:"DownTeeArrow;";i:8615;s:10:"Downarrow;";i:8659;s:5:"Dscr;";i:119967;s:7:"Dstrok;";i:272;s:4:"ENG;";i:330;s:4:"ETH;";i:208;s:3:"ETH";i:208;s:7:"Eacute;";i:201;s:6:"Eacute";i:201;s:7:"Ecaron;";i:282;s:6:"Ecirc;";i:202;s:5:"Ecirc";i:202;s:4:"Ecy;";i:1069;s:5:"Edot;";i:278;s:4:"Efr;";i:120072;s:7:"Egrave;";i:200;s:6:"Egrave";i:200;s:8:"Element;";i:8712;s:6:"Emacr;";i:274;s:17:"EmptySmallSquare;";i:9723;s:21:"EmptyVerySmallSquare;";i:9643;s:6:"Eogon;";i:280;s:5:"Eopf;";i:120124;s:8:"Epsilon;";i:917;s:6:"Equal;";i:10869;s:11:"EqualTilde;";i:8770;s:12:"Equilibrium;";i:8652;s:5:"Escr;";i:8496;s:5:"Esim;";i:10867;s:4:"Eta;";i:919;s:5:"Euml;";i:203;s:4:"Euml";i:203;s:7:"Exists;";i:8707;s:13:"ExponentialE;";i:8519;s:4:"Fcy;";i:1060;s:4:"Ffr;";i:120073;s:18:"FilledSmallSquare;";i:9724;s:22:"FilledVerySmallSquare;";i:9642;s:5:"Fopf;";i:120125;s:7:"ForAll;";i:8704;s:11:"Fouriertrf;";i:8497;s:5:"Fscr;";i:8497;s:5:"GJcy;";i:1027;s:3:"GT;";i:62;s:2:"GT";i:62;s:6:"Gamma;";i:915;s:7:"Gammad;";i:988;s:7:"Gbreve;";i:286;s:7:"Gcedil;";i:290;s:6:"Gcirc;";i:284;s:4:"Gcy;";i:1043;s:5:"Gdot;";i:288;s:4:"Gfr;";i:120074;s:3:"Gg;";i:8921;s:5:"Gopf;";i:120126;s:13:"GreaterEqual;";i:8805;s:17:"GreaterEqualLess;";i:8923;s:17:"GreaterFullEqual;";i:8807;s:15:"GreaterGreater;";i:10914;s:12:"GreaterLess;";i:8823;s:18:"GreaterSlantEqual;";i:10878;s:13:"GreaterTilde;";i:8819;s:5:"Gscr;";i:119970;s:3:"Gt;";i:8811;s:7:"HARDcy;";i:1066;s:6:"Hacek;";i:711;s:4:"Hat;";i:94;s:6:"Hcirc;";i:292;s:4:"Hfr;";i:8460;s:13:"HilbertSpace;";i:8459;s:5:"Hopf;";i:8461;s:15:"HorizontalLine;";i:9472;s:5:"Hscr;";i:8459;s:7:"Hstrok;";i:294;s:13:"HumpDownHump;";i:8782;s:10:"HumpEqual;";i:8783;s:5:"IEcy;";i:1045;s:6:"IJlig;";i:306;s:5:"IOcy;";i:1025;s:7:"Iacute;";i:205;s:6:"Iacute";i:205;s:6:"Icirc;";i:206;s:5:"Icirc";i:206;s:4:"Icy;";i:1048;s:5:"Idot;";i:304;s:4:"Ifr;";i:8465;s:7:"Igrave;";i:204;s:6:"Igrave";i:204;s:3:"Im;";i:8465;s:6:"Imacr;";i:298;s:11:"ImaginaryI;";i:8520;s:8:"Implies;";i:8658;s:4:"Int;";i:8748;s:9:"Integral;";i:8747;s:13:"Intersection;";i:8898;s:15:"InvisibleComma;";i:8291;s:15:"InvisibleTimes;";i:8290;s:6:"Iogon;";i:302;s:5:"Iopf;";i:120128;s:5:"Iota;";i:921;s:5:"Iscr;";i:8464;s:7:"Itilde;";i:296;s:6:"Iukcy;";i:1030;s:5:"Iuml;";i:207;s:4:"Iuml";i:207;s:6:"Jcirc;";i:308;s:4:"Jcy;";i:1049;s:4:"Jfr;";i:120077;s:5:"Jopf;";i:120129;s:5:"Jscr;";i:119973;s:7:"Jsercy;";i:1032;s:6:"Jukcy;";i:1028;s:5:"KHcy;";i:1061;s:5:"KJcy;";i:1036;s:6:"Kappa;";i:922;s:7:"Kcedil;";i:310;s:4:"Kcy;";i:1050;s:4:"Kfr;";i:120078;s:5:"Kopf;";i:120130;s:5:"Kscr;";i:119974;s:5:"LJcy;";i:1033;s:3:"LT;";i:60;s:2:"LT";i:60;s:7:"Lacute;";i:313;s:7:"Lambda;";i:923;s:5:"Lang;";i:10218;s:11:"Laplacetrf;";i:8466;s:5:"Larr;";i:8606;s:7:"Lcaron;";i:317;s:7:"Lcedil;";i:315;s:4:"Lcy;";i:1051;s:17:"LeftAngleBracket;";i:10216;s:10:"LeftArrow;";i:8592;s:13:"LeftArrowBar;";i:8676;s:20:"LeftArrowRightArrow;";i:8646;s:12:"LeftCeiling;";i:8968;s:18:"LeftDoubleBracket;";i:10214;s:18:"LeftDownTeeVector;";i:10593;s:15:"LeftDownVector;";i:8643;s:18:"LeftDownVectorBar;";i:10585;s:10:"LeftFloor;";i:8970;s:15:"LeftRightArrow;";i:8596;s:16:"LeftRightVector;";i:10574;s:8:"LeftTee;";i:8867;s:13:"LeftTeeArrow;";i:8612;s:14:"LeftTeeVector;";i:10586;s:13:"LeftTriangle;";i:8882;s:16:"LeftTriangleBar;";i:10703;s:18:"LeftTriangleEqual;";i:8884;s:17:"LeftUpDownVector;";i:10577;s:16:"LeftUpTeeVector;";i:10592;s:13:"LeftUpVector;";i:8639;s:16:"LeftUpVectorBar;";i:10584;s:11:"LeftVector;";i:8636;s:14:"LeftVectorBar;";i:10578;s:10:"Leftarrow;";i:8656;s:15:"Leftrightarrow;";i:8660;s:17:"LessEqualGreater;";i:8922;s:14:"LessFullEqual;";i:8806;s:12:"LessGreater;";i:8822;s:9:"LessLess;";i:10913;s:15:"LessSlantEqual;";i:10877;s:10:"LessTilde;";i:8818;s:4:"Lfr;";i:120079;s:3:"Ll;";i:8920;s:11:"Lleftarrow;";i:8666;s:7:"Lmidot;";i:319;s:14:"LongLeftArrow;";i:10229;s:19:"LongLeftRightArrow;";i:10231;s:15:"LongRightArrow;";i:10230;s:14:"Longleftarrow;";i:10232;s:19:"Longleftrightarrow;";i:10234;s:15:"Longrightarrow;";i:10233;s:5:"Lopf;";i:120131;s:15:"LowerLeftArrow;";i:8601;s:16:"LowerRightArrow;";i:8600;s:5:"Lscr;";i:8466;s:4:"Lsh;";i:8624;s:7:"Lstrok;";i:321;s:3:"Lt;";i:8810;s:4:"Map;";i:10501;s:4:"Mcy;";i:1052;s:12:"MediumSpace;";i:8287;s:10:"Mellintrf;";i:8499;s:4:"Mfr;";i:120080;s:10:"MinusPlus;";i:8723;s:5:"Mopf;";i:120132;s:5:"Mscr;";i:8499;s:3:"Mu;";i:924;s:5:"NJcy;";i:1034;s:7:"Nacute;";i:323;s:7:"Ncaron;";i:327;s:7:"Ncedil;";i:325;s:4:"Ncy;";i:1053;s:20:"NegativeMediumSpace;";i:8203;s:19:"NegativeThickSpace;";i:8203;s:18:"NegativeThinSpace;";i:8203;s:22:"NegativeVeryThinSpace;";i:8203;s:21:"NestedGreaterGreater;";i:8811;s:15:"NestedLessLess;";i:8810;s:8:"NewLine;";i:10;s:4:"Nfr;";i:120081;s:8:"NoBreak;";i:8288;s:17:"NonBreakingSpace;";i:160;s:5:"Nopf;";i:8469;s:4:"Not;";i:10988;s:13:"NotCongruent;";i:8802;s:10:"NotCupCap;";i:8813;s:21:"NotDoubleVerticalBar;";i:8742;s:11:"NotElement;";i:8713;s:9:"NotEqual;";i:8800;s:10:"NotExists;";i:8708;s:11:"NotGreater;";i:8815;s:16:"NotGreaterEqual;";i:8817;s:15:"NotGreaterLess;";i:8825;s:16:"NotGreaterTilde;";i:8821;s:16:"NotLeftTriangle;";i:8938;s:21:"NotLeftTriangleEqual;";i:8940;s:8:"NotLess;";i:8814;s:13:"NotLessEqual;";i:8816;s:15:"NotLessGreater;";i:8824;s:13:"NotLessTilde;";i:8820;s:12:"NotPrecedes;";i:8832;s:22:"NotPrecedesSlantEqual;";i:8928;s:18:"NotReverseElement;";i:8716;s:17:"NotRightTriangle;";i:8939;s:22:"NotRightTriangleEqual;";i:8941;s:21:"NotSquareSubsetEqual;";i:8930;s:23:"NotSquareSupersetEqual;";i:8931;s:15:"NotSubsetEqual;";i:8840;s:12:"NotSucceeds;";i:8833;s:22:"NotSucceedsSlantEqual;";i:8929;s:17:"NotSupersetEqual;";i:8841;s:9:"NotTilde;";i:8769;s:14:"NotTildeEqual;";i:8772;s:18:"NotTildeFullEqual;";i:8775;s:14:"NotTildeTilde;";i:8777;s:15:"NotVerticalBar;";i:8740;s:5:"Nscr;";i:119977;s:7:"Ntilde;";i:209;s:6:"Ntilde";i:209;s:3:"Nu;";i:925;s:6:"OElig;";i:338;s:7:"Oacute;";i:211;s:6:"Oacute";i:211;s:6:"Ocirc;";i:212;s:5:"Ocirc";i:212;s:4:"Ocy;";i:1054;s:7:"Odblac;";i:336;s:4:"Ofr;";i:120082;s:7:"Ograve;";i:210;s:6:"Ograve";i:210;s:6:"Omacr;";i:332;s:6:"Omega;";i:937;s:8:"Omicron;";i:927;s:5:"Oopf;";i:120134;s:21:"OpenCurlyDoubleQuote;";i:8220;s:15:"OpenCurlyQuote;";i:8216;s:3:"Or;";i:10836;s:5:"Oscr;";i:119978;s:7:"Oslash;";i:216;s:6:"Oslash";i:216;s:7:"Otilde;";i:213;s:6:"Otilde";i:213;s:7:"Otimes;";i:10807;s:5:"Ouml;";i:214;s:4:"Ouml";i:214;s:8:"OverBar;";i:175;s:10:"OverBrace;";i:9182;s:12:"OverBracket;";i:9140;s:16:"OverParenthesis;";i:9180;s:9:"PartialD;";i:8706;s:4:"Pcy;";i:1055;s:4:"Pfr;";i:120083;s:4:"Phi;";i:934;s:3:"Pi;";i:928;s:10:"PlusMinus;";i:177;s:14:"Poincareplane;";i:8460;s:5:"Popf;";i:8473;s:3:"Pr;";i:10939;s:9:"Precedes;";i:8826;s:14:"PrecedesEqual;";i:10927;s:19:"PrecedesSlantEqual;";i:8828;s:14:"PrecedesTilde;";i:8830;s:6:"Prime;";i:8243;s:8:"Product;";i:8719;s:11:"Proportion;";i:8759;s:13:"Proportional;";i:8733;s:5:"Pscr;";i:119979;s:4:"Psi;";i:936;s:5:"QUOT;";i:34;s:4:"QUOT";i:34;s:4:"Qfr;";i:120084;s:5:"Qopf;";i:8474;s:5:"Qscr;";i:119980;s:6:"RBarr;";i:10512;s:4:"REG;";i:174;s:3:"REG";i:174;s:7:"Racute;";i:340;s:5:"Rang;";i:10219;s:5:"Rarr;";i:8608;s:7:"Rarrtl;";i:10518;s:7:"Rcaron;";i:344;s:7:"Rcedil;";i:342;s:4:"Rcy;";i:1056;s:3:"Re;";i:8476;s:15:"ReverseElement;";i:8715;s:19:"ReverseEquilibrium;";i:8651;s:21:"ReverseUpEquilibrium;";i:10607;s:4:"Rfr;";i:8476;s:4:"Rho;";i:929;s:18:"RightAngleBracket;";i:10217;s:11:"RightArrow;";i:8594;s:14:"RightArrowBar;";i:8677;s:20:"RightArrowLeftArrow;";i:8644;s:13:"RightCeiling;";i:8969;s:19:"RightDoubleBracket;";i:10215;s:19:"RightDownTeeVector;";i:10589;s:16:"RightDownVector;";i:8642;s:19:"RightDownVectorBar;";i:10581;s:11:"RightFloor;";i:8971;s:9:"RightTee;";i:8866;s:14:"RightTeeArrow;";i:8614;s:15:"RightTeeVector;";i:10587;s:14:"RightTriangle;";i:8883;s:17:"RightTriangleBar;";i:10704;s:19:"RightTriangleEqual;";i:8885;s:18:"RightUpDownVector;";i:10575;s:17:"RightUpTeeVector;";i:10588;s:14:"RightUpVector;";i:8638;s:17:"RightUpVectorBar;";i:10580;s:12:"RightVector;";i:8640;s:15:"RightVectorBar;";i:10579;s:11:"Rightarrow;";i:8658;s:5:"Ropf;";i:8477;s:13:"RoundImplies;";i:10608;s:12:"Rrightarrow;";i:8667;s:5:"Rscr;";i:8475;s:4:"Rsh;";i:8625;s:12:"RuleDelayed;";i:10740;s:7:"SHCHcy;";i:1065;s:5:"SHcy;";i:1064;s:7:"SOFTcy;";i:1068;s:7:"Sacute;";i:346;s:3:"Sc;";i:10940;s:7:"Scaron;";i:352;s:7:"Scedil;";i:350;s:6:"Scirc;";i:348;s:4:"Scy;";i:1057;s:4:"Sfr;";i:120086;s:15:"ShortDownArrow;";i:8595;s:15:"ShortLeftArrow;";i:8592;s:16:"ShortRightArrow;";i:8594;s:13:"ShortUpArrow;";i:8593;s:6:"Sigma;";i:931;s:12:"SmallCircle;";i:8728;s:5:"Sopf;";i:120138;s:5:"Sqrt;";i:8730;s:7:"Square;";i:9633;s:19:"SquareIntersection;";i:8851;s:13:"SquareSubset;";i:8847;s:18:"SquareSubsetEqual;";i:8849;s:15:"SquareSuperset;";i:8848;s:20:"SquareSupersetEqual;";i:8850;s:12:"SquareUnion;";i:8852;s:5:"Sscr;";i:119982;s:5:"Star;";i:8902;s:4:"Sub;";i:8912;s:7:"Subset;";i:8912;s:12:"SubsetEqual;";i:8838;s:9:"Succeeds;";i:8827;s:14:"SucceedsEqual;";i:10928;s:19:"SucceedsSlantEqual;";i:8829;s:14:"SucceedsTilde;";i:8831;s:9:"SuchThat;";i:8715;s:4:"Sum;";i:8721;s:4:"Sup;";i:8913;s:9:"Superset;";i:8835;s:14:"SupersetEqual;";i:8839;s:7:"Supset;";i:8913;s:6:"THORN;";i:222;s:5:"THORN";i:222;s:6:"TRADE;";i:8482;s:6:"TSHcy;";i:1035;s:5:"TScy;";i:1062;s:4:"Tab;";i:9;s:4:"Tau;";i:932;s:7:"Tcaron;";i:356;s:7:"Tcedil;";i:354;s:4:"Tcy;";i:1058;s:4:"Tfr;";i:120087;s:10:"Therefore;";i:8756;s:6:"Theta;";i:920;s:10:"ThinSpace;";i:8201;s:6:"Tilde;";i:8764;s:11:"TildeEqual;";i:8771;s:15:"TildeFullEqual;";i:8773;s:11:"TildeTilde;";i:8776;s:5:"Topf;";i:120139;s:10:"TripleDot;";i:8411;s:5:"Tscr;";i:119983;s:7:"Tstrok;";i:358;s:7:"Uacute;";i:218;s:6:"Uacute";i:218;s:5:"Uarr;";i:8607;s:9:"Uarrocir;";i:10569;s:6:"Ubrcy;";i:1038;s:7:"Ubreve;";i:364;s:6:"Ucirc;";i:219;s:5:"Ucirc";i:219;s:4:"Ucy;";i:1059;s:7:"Udblac;";i:368;s:4:"Ufr;";i:120088;s:7:"Ugrave;";i:217;s:6:"Ugrave";i:217;s:6:"Umacr;";i:362;s:9:"UnderBar;";i:818;s:11:"UnderBrace;";i:9183;s:13:"UnderBracket;";i:9141;s:17:"UnderParenthesis;";i:9181;s:6:"Union;";i:8899;s:10:"UnionPlus;";i:8846;s:6:"Uogon;";i:370;s:5:"Uopf;";i:120140;s:8:"UpArrow;";i:8593;s:11:"UpArrowBar;";i:10514;s:17:"UpArrowDownArrow;";i:8645;s:12:"UpDownArrow;";i:8597;s:14:"UpEquilibrium;";i:10606;s:6:"UpTee;";i:8869;s:11:"UpTeeArrow;";i:8613;s:8:"Uparrow;";i:8657;s:12:"Updownarrow;";i:8661;s:15:"UpperLeftArrow;";i:8598;s:16:"UpperRightArrow;";i:8599;s:5:"Upsi;";i:978;s:8:"Upsilon;";i:933;s:6:"Uring;";i:366;s:5:"Uscr;";i:119984;s:7:"Utilde;";i:360;s:5:"Uuml;";i:220;s:4:"Uuml";i:220;s:6:"VDash;";i:8875;s:5:"Vbar;";i:10987;s:4:"Vcy;";i:1042;s:6:"Vdash;";i:8873;s:7:"Vdashl;";i:10982;s:4:"Vee;";i:8897;s:7:"Verbar;";i:8214;s:5:"Vert;";i:8214;s:12:"VerticalBar;";i:8739;s:13:"VerticalLine;";i:124;s:18:"VerticalSeparator;";i:10072;s:14:"VerticalTilde;";i:8768;s:14:"VeryThinSpace;";i:8202;s:4:"Vfr;";i:120089;s:5:"Vopf;";i:120141;s:5:"Vscr;";i:119985;s:7:"Vvdash;";i:8874;s:6:"Wcirc;";i:372;s:6:"Wedge;";i:8896;s:4:"Wfr;";i:120090;s:5:"Wopf;";i:120142;s:5:"Wscr;";i:119986;s:4:"Xfr;";i:120091;s:3:"Xi;";i:926;s:5:"Xopf;";i:120143;s:5:"Xscr;";i:119987;s:5:"YAcy;";i:1071;s:5:"YIcy;";i:1031;s:5:"YUcy;";i:1070;s:7:"Yacute;";i:221;s:6:"Yacute";i:221;s:6:"Ycirc;";i:374;s:4:"Ycy;";i:1067;s:4:"Yfr;";i:120092;s:5:"Yopf;";i:120144;s:5:"Yscr;";i:119988;s:5:"Yuml;";i:376;s:5:"ZHcy;";i:1046;s:7:"Zacute;";i:377;s:7:"Zcaron;";i:381;s:4:"Zcy;";i:1047;s:5:"Zdot;";i:379;s:15:"ZeroWidthSpace;";i:8203;s:5:"Zeta;";i:918;s:4:"Zfr;";i:8488;s:5:"Zopf;";i:8484;s:5:"Zscr;";i:119989;s:7:"aacute;";i:225;s:6:"aacute";i:225;s:7:"abreve;";i:259;s:3:"ac;";i:8766;s:4:"acd;";i:8767;s:6:"acirc;";i:226;s:5:"acirc";i:226;s:6:"acute;";i:180;s:5:"acute";i:180;s:4:"acy;";i:1072;s:6:"aelig;";i:230;s:5:"aelig";i:230;s:3:"af;";i:8289;s:4:"afr;";i:120094;s:7:"agrave;";i:224;s:6:"agrave";i:224;s:8:"alefsym;";i:8501;s:6:"aleph;";i:8501;s:6:"alpha;";i:945;s:6:"amacr;";i:257;s:6:"amalg;";i:10815;s:4:"amp;";i:38;s:3:"amp";i:38;s:4:"and;";i:8743;s:7:"andand;";i:10837;s:5:"andd;";i:10844;s:9:"andslope;";i:10840;s:5:"andv;";i:10842;s:4:"ang;";i:8736;s:5:"ange;";i:10660;s:6:"angle;";i:8736;s:7:"angmsd;";i:8737;s:9:"angmsdaa;";i:10664;s:9:"angmsdab;";i:10665;s:9:"angmsdac;";i:10666;s:9:"angmsdad;";i:10667;s:9:"angmsdae;";i:10668;s:9:"angmsdaf;";i:10669;s:9:"angmsdag;";i:10670;s:9:"angmsdah;";i:10671;s:6:"angrt;";i:8735;s:8:"angrtvb;";i:8894;s:9:"angrtvbd;";i:10653;s:7:"angsph;";i:8738;s:6:"angst;";i:8491;s:8:"angzarr;";i:9084;s:6:"aogon;";i:261;s:5:"aopf;";i:120146;s:3:"ap;";i:8776;s:4:"apE;";i:10864;s:7:"apacir;";i:10863;s:4:"ape;";i:8778;s:5:"apid;";i:8779;s:5:"apos;";i:39;s:7:"approx;";i:8776;s:9:"approxeq;";i:8778;s:6:"aring;";i:229;s:5:"aring";i:229;s:5:"ascr;";i:119990;s:4:"ast;";i:42;s:6:"asymp;";i:8776;s:8:"asympeq;";i:8781;s:7:"atilde;";i:227;s:6:"atilde";i:227;s:5:"auml;";i:228;s:4:"auml";i:228;s:9:"awconint;";i:8755;s:6:"awint;";i:10769;s:5:"bNot;";i:10989;s:9:"backcong;";i:8780;s:12:"backepsilon;";i:1014;s:10:"backprime;";i:8245;s:8:"backsim;";i:8765;s:10:"backsimeq;";i:8909;s:7:"barvee;";i:8893;s:7:"barwed;";i:8965;s:9:"barwedge;";i:8965;s:5:"bbrk;";i:9141;s:9:"bbrktbrk;";i:9142;s:6:"bcong;";i:8780;s:4:"bcy;";i:1073;s:6:"bdquo;";i:8222;s:7:"becaus;";i:8757;s:8:"because;";i:8757;s:8:"bemptyv;";i:10672;s:6:"bepsi;";i:1014;s:7:"bernou;";i:8492;s:5:"beta;";i:946;s:5:"beth;";i:8502;s:8:"between;";i:8812;s:4:"bfr;";i:120095;s:7:"bigcap;";i:8898;s:8:"bigcirc;";i:9711;s:7:"bigcup;";i:8899;s:8:"bigodot;";i:10752;s:9:"bigoplus;";i:10753;s:10:"bigotimes;";i:10754;s:9:"bigsqcup;";i:10758;s:8:"bigstar;";i:9733;s:16:"bigtriangledown;";i:9661;s:14:"bigtriangleup;";i:9651;s:9:"biguplus;";i:10756;s:7:"bigvee;";i:8897;s:9:"bigwedge;";i:8896;s:7:"bkarow;";i:10509;s:13:"blacklozenge;";i:10731;s:12:"blacksquare;";i:9642;s:14:"blacktriangle;";i:9652;s:18:"blacktriangledown;";i:9662;s:18:"blacktriangleleft;";i:9666;s:19:"blacktriangleright;";i:9656;s:6:"blank;";i:9251;s:6:"blk12;";i:9618;s:6:"blk14;";i:9617;s:6:"blk34;";i:9619;s:6:"block;";i:9608;s:5:"bnot;";i:8976;s:5:"bopf;";i:120147;s:4:"bot;";i:8869;s:7:"bottom;";i:8869;s:7:"bowtie;";i:8904;s:6:"boxDL;";i:9559;s:6:"boxDR;";i:9556;s:6:"boxDl;";i:9558;s:6:"boxDr;";i:9555;s:5:"boxH;";i:9552;s:6:"boxHD;";i:9574;s:6:"boxHU;";i:9577;s:6:"boxHd;";i:9572;s:6:"boxHu;";i:9575;s:6:"boxUL;";i:9565;s:6:"boxUR;";i:9562;s:6:"boxUl;";i:9564;s:6:"boxUr;";i:9561;s:5:"boxV;";i:9553;s:6:"boxVH;";i:9580;s:6:"boxVL;";i:9571;s:6:"boxVR;";i:9568;s:6:"boxVh;";i:9579;s:6:"boxVl;";i:9570;s:6:"boxVr;";i:9567;s:7:"boxbox;";i:10697;s:6:"boxdL;";i:9557;s:6:"boxdR;";i:9554;s:6:"boxdl;";i:9488;s:6:"boxdr;";i:9484;s:5:"boxh;";i:9472;s:6:"boxhD;";i:9573;s:6:"boxhU;";i:9576;s:6:"boxhd;";i:9516;s:6:"boxhu;";i:9524;s:9:"boxminus;";i:8863;s:8:"boxplus;";i:8862;s:9:"boxtimes;";i:8864;s:6:"boxuL;";i:9563;s:6:"boxuR;";i:9560;s:6:"boxul;";i:9496;s:6:"boxur;";i:9492;s:5:"boxv;";i:9474;s:6:"boxvH;";i:9578;s:6:"boxvL;";i:9569;s:6:"boxvR;";i:9566;s:6:"boxvh;";i:9532;s:6:"boxvl;";i:9508;s:6:"boxvr;";i:9500;s:7:"bprime;";i:8245;s:6:"breve;";i:728;s:7:"brvbar;";i:166;s:6:"brvbar";i:166;s:5:"bscr;";i:119991;s:6:"bsemi;";i:8271;s:5:"bsim;";i:8765;s:6:"bsime;";i:8909;s:5:"bsol;";i:92;s:6:"bsolb;";i:10693;s:5:"bull;";i:8226;s:7:"bullet;";i:8226;s:5:"bump;";i:8782;s:6:"bumpE;";i:10926;s:6:"bumpe;";i:8783;s:7:"bumpeq;";i:8783;s:7:"cacute;";i:263;s:4:"cap;";i:8745;s:7:"capand;";i:10820;s:9:"capbrcup;";i:10825;s:7:"capcap;";i:10827;s:7:"capcup;";i:10823;s:7:"capdot;";i:10816;s:6:"caret;";i:8257;s:6:"caron;";i:711;s:6:"ccaps;";i:10829;s:7:"ccaron;";i:269;s:7:"ccedil;";i:231;s:6:"ccedil";i:231;s:6:"ccirc;";i:265;s:6:"ccups;";i:10828;s:8:"ccupssm;";i:10832;s:5:"cdot;";i:267;s:6:"cedil;";i:184;s:5:"cedil";i:184;s:8:"cemptyv;";i:10674;s:5:"cent;";i:162;s:4:"cent";i:162;s:10:"centerdot;";i:183;s:4:"cfr;";i:120096;s:5:"chcy;";i:1095;s:6:"check;";i:10003;s:10:"checkmark;";i:10003;s:4:"chi;";i:967;s:4:"cir;";i:9675;s:5:"cirE;";i:10691;s:5:"circ;";i:710;s:7:"circeq;";i:8791;s:16:"circlearrowleft;";i:8634;s:17:"circlearrowright;";i:8635;s:9:"circledR;";i:174;s:9:"circledS;";i:9416;s:11:"circledast;";i:8859;s:12:"circledcirc;";i:8858;s:12:"circleddash;";i:8861;s:5:"cire;";i:8791;s:9:"cirfnint;";i:10768;s:7:"cirmid;";i:10991;s:8:"cirscir;";i:10690;s:6:"clubs;";i:9827;s:9:"clubsuit;";i:9827;s:6:"colon;";i:58;s:7:"colone;";i:8788;s:8:"coloneq;";i:8788;s:6:"comma;";i:44;s:7:"commat;";i:64;s:5:"comp;";i:8705;s:7:"compfn;";i:8728;s:11:"complement;";i:8705;s:10:"complexes;";i:8450;s:5:"cong;";i:8773;s:8:"congdot;";i:10861;s:7:"conint;";i:8750;s:5:"copf;";i:120148;s:7:"coprod;";i:8720;s:5:"copy;";i:169;s:4:"copy";i:169;s:7:"copysr;";i:8471;s:6:"crarr;";i:8629;s:6:"cross;";i:10007;s:5:"cscr;";i:119992;s:5:"csub;";i:10959;s:6:"csube;";i:10961;s:5:"csup;";i:10960;s:6:"csupe;";i:10962;s:6:"ctdot;";i:8943;s:8:"cudarrl;";i:10552;s:8:"cudarrr;";i:10549;s:6:"cuepr;";i:8926;s:6:"cuesc;";i:8927;s:7:"cularr;";i:8630;s:8:"cularrp;";i:10557;s:4:"cup;";i:8746;s:9:"cupbrcap;";i:10824;s:7:"cupcap;";i:10822;s:7:"cupcup;";i:10826;s:7:"cupdot;";i:8845;s:6:"cupor;";i:10821;s:7:"curarr;";i:8631;s:8:"curarrm;";i:10556;s:12:"curlyeqprec;";i:8926;s:12:"curlyeqsucc;";i:8927;s:9:"curlyvee;";i:8910;s:11:"curlywedge;";i:8911;s:7:"curren;";i:164;s:6:"curren";i:164;s:15:"curvearrowleft;";i:8630;s:16:"curvearrowright;";i:8631;s:6:"cuvee;";i:8910;s:6:"cuwed;";i:8911;s:9:"cwconint;";i:8754;s:6:"cwint;";i:8753;s:7:"cylcty;";i:9005;s:5:"dArr;";i:8659;s:5:"dHar;";i:10597;s:7:"dagger;";i:8224;s:7:"daleth;";i:8504;s:5:"darr;";i:8595;s:5:"dash;";i:8208;s:6:"dashv;";i:8867;s:8:"dbkarow;";i:10511;s:6:"dblac;";i:733;s:7:"dcaron;";i:271;s:4:"dcy;";i:1076;s:3:"dd;";i:8518;s:8:"ddagger;";i:8225;s:6:"ddarr;";i:8650;s:8:"ddotseq;";i:10871;s:4:"deg;";i:176;s:3:"deg";i:176;s:6:"delta;";i:948;s:8:"demptyv;";i:10673;s:7:"dfisht;";i:10623;s:4:"dfr;";i:120097;s:6:"dharl;";i:8643;s:6:"dharr;";i:8642;s:5:"diam;";i:8900;s:8:"diamond;";i:8900;s:12:"diamondsuit;";i:9830;s:6:"diams;";i:9830;s:4:"die;";i:168;s:8:"digamma;";i:989;s:6:"disin;";i:8946;s:4:"div;";i:247;s:7:"divide;";i:247;s:6:"divide";i:247;s:14:"divideontimes;";i:8903;s:7:"divonx;";i:8903;s:5:"djcy;";i:1106;s:7:"dlcorn;";i:8990;s:7:"dlcrop;";i:8973;s:7:"dollar;";i:36;s:5:"dopf;";i:120149;s:4:"dot;";i:729;s:6:"doteq;";i:8784;s:9:"doteqdot;";i:8785;s:9:"dotminus;";i:8760;s:8:"dotplus;";i:8724;s:10:"dotsquare;";i:8865;s:15:"doublebarwedge;";i:8966;s:10:"downarrow;";i:8595;s:15:"downdownarrows;";i:8650;s:16:"downharpoonleft;";i:8643;s:17:"downharpoonright;";i:8642;s:9:"drbkarow;";i:10512;s:7:"drcorn;";i:8991;s:7:"drcrop;";i:8972;s:5:"dscr;";i:119993;s:5:"dscy;";i:1109;s:5:"dsol;";i:10742;s:7:"dstrok;";i:273;s:6:"dtdot;";i:8945;s:5:"dtri;";i:9663;s:6:"dtrif;";i:9662;s:6:"duarr;";i:8693;s:6:"duhar;";i:10607;s:8:"dwangle;";i:10662;s:5:"dzcy;";i:1119;s:9:"dzigrarr;";i:10239;s:6:"eDDot;";i:10871;s:5:"eDot;";i:8785;s:7:"eacute;";i:233;s:6:"eacute";i:233;s:7:"easter;";i:10862;s:7:"ecaron;";i:283;s:5:"ecir;";i:8790;s:6:"ecirc;";i:234;s:5:"ecirc";i:234;s:7:"ecolon;";i:8789;s:4:"ecy;";i:1101;s:5:"edot;";i:279;s:3:"ee;";i:8519;s:6:"efDot;";i:8786;s:4:"efr;";i:120098;s:3:"eg;";i:10906;s:7:"egrave;";i:232;s:6:"egrave";i:232;s:4:"egs;";i:10902;s:7:"egsdot;";i:10904;s:3:"el;";i:10905;s:9:"elinters;";i:9191;s:4:"ell;";i:8467;s:4:"els;";i:10901;s:7:"elsdot;";i:10903;s:6:"emacr;";i:275;s:6:"empty;";i:8709;s:9:"emptyset;";i:8709;s:7:"emptyv;";i:8709;s:7:"emsp13;";i:8196;s:7:"emsp14;";i:8197;s:5:"emsp;";i:8195;s:4:"eng;";i:331;s:5:"ensp;";i:8194;s:6:"eogon;";i:281;s:5:"eopf;";i:120150;s:5:"epar;";i:8917;s:7:"eparsl;";i:10723;s:6:"eplus;";i:10865;s:5:"epsi;";i:1013;s:8:"epsilon;";i:949;s:6:"epsiv;";i:949;s:7:"eqcirc;";i:8790;s:8:"eqcolon;";i:8789;s:6:"eqsim;";i:8770;s:11:"eqslantgtr;";i:10902;s:12:"eqslantless;";i:10901;s:7:"equals;";i:61;s:7:"equest;";i:8799;s:6:"equiv;";i:8801;s:8:"equivDD;";i:10872;s:9:"eqvparsl;";i:10725;s:6:"erDot;";i:8787;s:6:"erarr;";i:10609;s:5:"escr;";i:8495;s:6:"esdot;";i:8784;s:5:"esim;";i:8770;s:4:"eta;";i:951;s:4:"eth;";i:240;s:3:"eth";i:240;s:5:"euml;";i:235;s:4:"euml";i:235;s:5:"euro;";i:8364;s:5:"excl;";i:33;s:6:"exist;";i:8707;s:12:"expectation;";i:8496;s:13:"exponentiale;";i:8519;s:14:"fallingdotseq;";i:8786;s:4:"fcy;";i:1092;s:7:"female;";i:9792;s:7:"ffilig;";i:64259;s:6:"fflig;";i:64256;s:7:"ffllig;";i:64260;s:4:"ffr;";i:120099;s:6:"filig;";i:64257;s:5:"flat;";i:9837;s:6:"fllig;";i:64258;s:6:"fltns;";i:9649;s:5:"fnof;";i:402;s:5:"fopf;";i:120151;s:7:"forall;";i:8704;s:5:"fork;";i:8916;s:6:"forkv;";i:10969;s:9:"fpartint;";i:10765;s:7:"frac12;";i:189;s:6:"frac12";i:189;s:7:"frac13;";i:8531;s:7:"frac14;";i:188;s:6:"frac14";i:188;s:7:"frac15;";i:8533;s:7:"frac16;";i:8537;s:7:"frac18;";i:8539;s:7:"frac23;";i:8532;s:7:"frac25;";i:8534;s:7:"frac34;";i:190;s:6:"frac34";i:190;s:7:"frac35;";i:8535;s:7:"frac38;";i:8540;s:7:"frac45;";i:8536;s:7:"frac56;";i:8538;s:7:"frac58;";i:8541;s:7:"frac78;";i:8542;s:6:"frasl;";i:8260;s:6:"frown;";i:8994;s:5:"fscr;";i:119995;s:3:"gE;";i:8807;s:4:"gEl;";i:10892;s:7:"gacute;";i:501;s:6:"gamma;";i:947;s:7:"gammad;";i:989;s:4:"gap;";i:10886;s:7:"gbreve;";i:287;s:6:"gcirc;";i:285;s:4:"gcy;";i:1075;s:5:"gdot;";i:289;s:3:"ge;";i:8805;s:4:"gel;";i:8923;s:4:"geq;";i:8805;s:5:"geqq;";i:8807;s:9:"geqslant;";i:10878;s:4:"ges;";i:10878;s:6:"gescc;";i:10921;s:7:"gesdot;";i:10880;s:8:"gesdoto;";i:10882;s:9:"gesdotol;";i:10884;s:7:"gesles;";i:10900;s:4:"gfr;";i:120100;s:3:"gg;";i:8811;s:4:"ggg;";i:8921;s:6:"gimel;";i:8503;s:5:"gjcy;";i:1107;s:3:"gl;";i:8823;s:4:"glE;";i:10898;s:4:"gla;";i:10917;s:4:"glj;";i:10916;s:4:"gnE;";i:8809;s:5:"gnap;";i:10890;s:9:"gnapprox;";i:10890;s:4:"gne;";i:10888;s:5:"gneq;";i:10888;s:6:"gneqq;";i:8809;s:6:"gnsim;";i:8935;s:5:"gopf;";i:120152;s:6:"grave;";i:96;s:5:"gscr;";i:8458;s:5:"gsim;";i:8819;s:6:"gsime;";i:10894;s:6:"gsiml;";i:10896;s:3:"gt;";i:62;s:2:"gt";i:62;s:5:"gtcc;";i:10919;s:6:"gtcir;";i:10874;s:6:"gtdot;";i:8919;s:7:"gtlPar;";i:10645;s:8:"gtquest;";i:10876;s:10:"gtrapprox;";i:10886;s:7:"gtrarr;";i:10616;s:7:"gtrdot;";i:8919;s:10:"gtreqless;";i:8923;s:11:"gtreqqless;";i:10892;s:8:"gtrless;";i:8823;s:7:"gtrsim;";i:8819;s:5:"hArr;";i:8660;s:7:"hairsp;";i:8202;s:5:"half;";i:189;s:7:"hamilt;";i:8459;s:7:"hardcy;";i:1098;s:5:"harr;";i:8596;s:8:"harrcir;";i:10568;s:6:"harrw;";i:8621;s:5:"hbar;";i:8463;s:6:"hcirc;";i:293;s:7:"hearts;";i:9829;s:10:"heartsuit;";i:9829;s:7:"hellip;";i:8230;s:7:"hercon;";i:8889;s:4:"hfr;";i:120101;s:9:"hksearow;";i:10533;s:9:"hkswarow;";i:10534;s:6:"hoarr;";i:8703;s:7:"homtht;";i:8763;s:14:"hookleftarrow;";i:8617;s:15:"hookrightarrow;";i:8618;s:5:"hopf;";i:120153;s:7:"horbar;";i:8213;s:5:"hscr;";i:119997;s:7:"hslash;";i:8463;s:7:"hstrok;";i:295;s:7:"hybull;";i:8259;s:7:"hyphen;";i:8208;s:7:"iacute;";i:237;s:6:"iacute";i:237;s:3:"ic;";i:8291;s:6:"icirc;";i:238;s:5:"icirc";i:238;s:4:"icy;";i:1080;s:5:"iecy;";i:1077;s:6:"iexcl;";i:161;s:5:"iexcl";i:161;s:4:"iff;";i:8660;s:4:"ifr;";i:120102;s:7:"igrave;";i:236;s:6:"igrave";i:236;s:3:"ii;";i:8520;s:7:"iiiint;";i:10764;s:6:"iiint;";i:8749;s:7:"iinfin;";i:10716;s:6:"iiota;";i:8489;s:6:"ijlig;";i:307;s:6:"imacr;";i:299;s:6:"image;";i:8465;s:9:"imagline;";i:8464;s:9:"imagpart;";i:8465;s:6:"imath;";i:305;s:5:"imof;";i:8887;s:6:"imped;";i:437;s:3:"in;";i:8712;s:7:"incare;";i:8453;s:6:"infin;";i:8734;s:9:"infintie;";i:10717;s:7:"inodot;";i:305;s:4:"int;";i:8747;s:7:"intcal;";i:8890;s:9:"integers;";i:8484;s:9:"intercal;";i:8890;s:9:"intlarhk;";i:10775;s:8:"intprod;";i:10812;s:5:"iocy;";i:1105;s:6:"iogon;";i:303;s:5:"iopf;";i:120154;s:5:"iota;";i:953;s:6:"iprod;";i:10812;s:7:"iquest;";i:191;s:6:"iquest";i:191;s:5:"iscr;";i:119998;s:5:"isin;";i:8712;s:6:"isinE;";i:8953;s:8:"isindot;";i:8949;s:6:"isins;";i:8948;s:7:"isinsv;";i:8947;s:6:"isinv;";i:8712;s:3:"it;";i:8290;s:7:"itilde;";i:297;s:6:"iukcy;";i:1110;s:5:"iuml;";i:239;s:4:"iuml";i:239;s:6:"jcirc;";i:309;s:4:"jcy;";i:1081;s:4:"jfr;";i:120103;s:6:"jmath;";i:567;s:5:"jopf;";i:120155;s:5:"jscr;";i:119999;s:7:"jsercy;";i:1112;s:6:"jukcy;";i:1108;s:6:"kappa;";i:954;s:7:"kappav;";i:1008;s:7:"kcedil;";i:311;s:4:"kcy;";i:1082;s:4:"kfr;";i:120104;s:7:"kgreen;";i:312;s:5:"khcy;";i:1093;s:5:"kjcy;";i:1116;s:5:"kopf;";i:120156;s:5:"kscr;";i:120000;s:6:"lAarr;";i:8666;s:5:"lArr;";i:8656;s:7:"lAtail;";i:10523;s:6:"lBarr;";i:10510;s:3:"lE;";i:8806;s:4:"lEg;";i:10891;s:5:"lHar;";i:10594;s:7:"lacute;";i:314;s:9:"laemptyv;";i:10676;s:7:"lagran;";i:8466;s:7:"lambda;";i:955;s:5:"lang;";i:10216;s:6:"langd;";i:10641;s:7:"langle;";i:10216;s:4:"lap;";i:10885;s:6:"laquo;";i:171;s:5:"laquo";i:171;s:5:"larr;";i:8592;s:6:"larrb;";i:8676;s:8:"larrbfs;";i:10527;s:7:"larrfs;";i:10525;s:7:"larrhk;";i:8617;s:7:"larrlp;";i:8619;s:7:"larrpl;";i:10553;s:8:"larrsim;";i:10611;s:7:"larrtl;";i:8610;s:4:"lat;";i:10923;s:7:"latail;";i:10521;s:5:"late;";i:10925;s:6:"lbarr;";i:10508;s:6:"lbbrk;";i:10098;s:7:"lbrace;";i:123;s:7:"lbrack;";i:91;s:6:"lbrke;";i:10635;s:8:"lbrksld;";i:10639;s:8:"lbrkslu;";i:10637;s:7:"lcaron;";i:318;s:7:"lcedil;";i:316;s:6:"lceil;";i:8968;s:5:"lcub;";i:123;s:4:"lcy;";i:1083;s:5:"ldca;";i:10550;s:6:"ldquo;";i:8220;s:7:"ldquor;";i:8222;s:8:"ldrdhar;";i:10599;s:9:"ldrushar;";i:10571;s:5:"ldsh;";i:8626;s:3:"le;";i:8804;s:10:"leftarrow;";i:8592;s:14:"leftarrowtail;";i:8610;s:16:"leftharpoondown;";i:8637;s:14:"leftharpoonup;";i:8636;s:15:"leftleftarrows;";i:8647;s:15:"leftrightarrow;";i:8596;s:16:"leftrightarrows;";i:8646;s:18:"leftrightharpoons;";i:8651;s:20:"leftrightsquigarrow;";i:8621;s:15:"leftthreetimes;";i:8907;s:4:"leg;";i:8922;s:4:"leq;";i:8804;s:5:"leqq;";i:8806;s:9:"leqslant;";i:10877;s:4:"les;";i:10877;s:6:"lescc;";i:10920;s:7:"lesdot;";i:10879;s:8:"lesdoto;";i:10881;s:9:"lesdotor;";i:10883;s:7:"lesges;";i:10899;s:11:"lessapprox;";i:10885;s:8:"lessdot;";i:8918;s:10:"lesseqgtr;";i:8922;s:11:"lesseqqgtr;";i:10891;s:8:"lessgtr;";i:8822;s:8:"lesssim;";i:8818;s:7:"lfisht;";i:10620;s:7:"lfloor;";i:8970;s:4:"lfr;";i:120105;s:3:"lg;";i:8822;s:4:"lgE;";i:10897;s:6:"lhard;";i:8637;s:6:"lharu;";i:8636;s:7:"lharul;";i:10602;s:6:"lhblk;";i:9604;s:5:"ljcy;";i:1113;s:3:"ll;";i:8810;s:6:"llarr;";i:8647;s:9:"llcorner;";i:8990;s:7:"llhard;";i:10603;s:6:"lltri;";i:9722;s:7:"lmidot;";i:320;s:7:"lmoust;";i:9136;s:11:"lmoustache;";i:9136;s:4:"lnE;";i:8808;s:5:"lnap;";i:10889;s:9:"lnapprox;";i:10889;s:4:"lne;";i:10887;s:5:"lneq;";i:10887;s:6:"lneqq;";i:8808;s:6:"lnsim;";i:8934;s:6:"loang;";i:10220;s:6:"loarr;";i:8701;s:6:"lobrk;";i:10214;s:14:"longleftarrow;";i:10229;s:19:"longleftrightarrow;";i:10231;s:11:"longmapsto;";i:10236;s:15:"longrightarrow;";i:10230;s:14:"looparrowleft;";i:8619;s:15:"looparrowright;";i:8620;s:6:"lopar;";i:10629;s:5:"lopf;";i:120157;s:7:"loplus;";i:10797;s:8:"lotimes;";i:10804;s:7:"lowast;";i:8727;s:7:"lowbar;";i:95;s:4:"loz;";i:9674;s:8:"lozenge;";i:9674;s:5:"lozf;";i:10731;s:5:"lpar;";i:40;s:7:"lparlt;";i:10643;s:6:"lrarr;";i:8646;s:9:"lrcorner;";i:8991;s:6:"lrhar;";i:8651;s:7:"lrhard;";i:10605;s:4:"lrm;";i:8206;s:6:"lrtri;";i:8895;s:7:"lsaquo;";i:8249;s:5:"lscr;";i:120001;s:4:"lsh;";i:8624;s:5:"lsim;";i:8818;s:6:"lsime;";i:10893;s:6:"lsimg;";i:10895;s:5:"lsqb;";i:91;s:6:"lsquo;";i:8216;s:7:"lsquor;";i:8218;s:7:"lstrok;";i:322;s:3:"lt;";i:60;s:2:"lt";i:60;s:5:"ltcc;";i:10918;s:6:"ltcir;";i:10873;s:6:"ltdot;";i:8918;s:7:"lthree;";i:8907;s:7:"ltimes;";i:8905;s:7:"ltlarr;";i:10614;s:8:"ltquest;";i:10875;s:7:"ltrPar;";i:10646;s:5:"ltri;";i:9667;s:6:"ltrie;";i:8884;s:6:"ltrif;";i:9666;s:9:"lurdshar;";i:10570;s:8:"luruhar;";i:10598;s:6:"mDDot;";i:8762;s:5:"macr;";i:175;s:4:"macr";i:175;s:5:"male;";i:9794;s:5:"malt;";i:10016;s:8:"maltese;";i:10016;s:4:"map;";i:8614;s:7:"mapsto;";i:8614;s:11:"mapstodown;";i:8615;s:11:"mapstoleft;";i:8612;s:9:"mapstoup;";i:8613;s:7:"marker;";i:9646;s:7:"mcomma;";i:10793;s:4:"mcy;";i:1084;s:6:"mdash;";i:8212;s:14:"measuredangle;";i:8737;s:4:"mfr;";i:120106;s:4:"mho;";i:8487;s:6:"micro;";i:181;s:5:"micro";i:181;s:4:"mid;";i:8739;s:7:"midast;";i:42;s:7:"midcir;";i:10992;s:7:"middot;";i:183;s:6:"middot";i:183;s:6:"minus;";i:8722;s:7:"minusb;";i:8863;s:7:"minusd;";i:8760;s:8:"minusdu;";i:10794;s:5:"mlcp;";i:10971;s:5:"mldr;";i:8230;s:7:"mnplus;";i:8723;s:7:"models;";i:8871;s:5:"mopf;";i:120158;s:3:"mp;";i:8723;s:5:"mscr;";i:120002;s:7:"mstpos;";i:8766;s:3:"mu;";i:956;s:9:"multimap;";i:8888;s:6:"mumap;";i:8888;s:11:"nLeftarrow;";i:8653;s:16:"nLeftrightarrow;";i:8654;s:12:"nRightarrow;";i:8655;s:7:"nVDash;";i:8879;s:7:"nVdash;";i:8878;s:6:"nabla;";i:8711;s:7:"nacute;";i:324;s:4:"nap;";i:8777;s:6:"napos;";i:329;s:8:"napprox;";i:8777;s:6:"natur;";i:9838;s:8:"natural;";i:9838;s:9:"naturals;";i:8469;s:5:"nbsp;";i:160;s:4:"nbsp";i:160;s:5:"ncap;";i:10819;s:7:"ncaron;";i:328;s:7:"ncedil;";i:326;s:6:"ncong;";i:8775;s:5:"ncup;";i:10818;s:4:"ncy;";i:1085;s:6:"ndash;";i:8211;s:3:"ne;";i:8800;s:6:"neArr;";i:8663;s:7:"nearhk;";i:10532;s:6:"nearr;";i:8599;s:8:"nearrow;";i:8599;s:7:"nequiv;";i:8802;s:7:"nesear;";i:10536;s:7:"nexist;";i:8708;s:8:"nexists;";i:8708;s:4:"nfr;";i:120107;s:4:"nge;";i:8817;s:5:"ngeq;";i:8817;s:6:"ngsim;";i:8821;s:4:"ngt;";i:8815;s:5:"ngtr;";i:8815;s:6:"nhArr;";i:8654;s:6:"nharr;";i:8622;s:6:"nhpar;";i:10994;s:3:"ni;";i:8715;s:4:"nis;";i:8956;s:5:"nisd;";i:8954;s:4:"niv;";i:8715;s:5:"njcy;";i:1114;s:6:"nlArr;";i:8653;s:6:"nlarr;";i:8602;s:5:"nldr;";i:8229;s:4:"nle;";i:8816;s:11:"nleftarrow;";i:8602;s:16:"nleftrightarrow;";i:8622;s:5:"nleq;";i:8816;s:6:"nless;";i:8814;s:6:"nlsim;";i:8820;s:4:"nlt;";i:8814;s:6:"nltri;";i:8938;s:7:"nltrie;";i:8940;s:5:"nmid;";i:8740;s:5:"nopf;";i:120159;s:4:"not;";i:172;s:3:"not";i:172;s:6:"notin;";i:8713;s:8:"notinva;";i:8713;s:8:"notinvb;";i:8951;s:8:"notinvc;";i:8950;s:6:"notni;";i:8716;s:8:"notniva;";i:8716;s:8:"notnivb;";i:8958;s:8:"notnivc;";i:8957;s:5:"npar;";i:8742;s:10:"nparallel;";i:8742;s:8:"npolint;";i:10772;s:4:"npr;";i:8832;s:7:"nprcue;";i:8928;s:6:"nprec;";i:8832;s:6:"nrArr;";i:8655;s:6:"nrarr;";i:8603;s:12:"nrightarrow;";i:8603;s:6:"nrtri;";i:8939;s:7:"nrtrie;";i:8941;s:4:"nsc;";i:8833;s:7:"nsccue;";i:8929;s:5:"nscr;";i:120003;s:10:"nshortmid;";i:8740;s:15:"nshortparallel;";i:8742;s:5:"nsim;";i:8769;s:6:"nsime;";i:8772;s:7:"nsimeq;";i:8772;s:6:"nsmid;";i:8740;s:6:"nspar;";i:8742;s:8:"nsqsube;";i:8930;s:8:"nsqsupe;";i:8931;s:5:"nsub;";i:8836;s:6:"nsube;";i:8840;s:10:"nsubseteq;";i:8840;s:6:"nsucc;";i:8833;s:5:"nsup;";i:8837;s:6:"nsupe;";i:8841;s:10:"nsupseteq;";i:8841;s:5:"ntgl;";i:8825;s:7:"ntilde;";i:241;s:6:"ntilde";i:241;s:5:"ntlg;";i:8824;s:14:"ntriangleleft;";i:8938;s:16:"ntrianglelefteq;";i:8940;s:15:"ntriangleright;";i:8939;s:17:"ntrianglerighteq;";i:8941;s:3:"nu;";i:957;s:4:"num;";i:35;s:7:"numero;";i:8470;s:6:"numsp;";i:8199;s:7:"nvDash;";i:8877;s:7:"nvHarr;";i:10500;s:7:"nvdash;";i:8876;s:8:"nvinfin;";i:10718;s:7:"nvlArr;";i:10498;s:7:"nvrArr;";i:10499;s:6:"nwArr;";i:8662;s:7:"nwarhk;";i:10531;s:6:"nwarr;";i:8598;s:8:"nwarrow;";i:8598;s:7:"nwnear;";i:10535;s:3:"oS;";i:9416;s:7:"oacute;";i:243;s:6:"oacute";i:243;s:5:"oast;";i:8859;s:5:"ocir;";i:8858;s:6:"ocirc;";i:244;s:5:"ocirc";i:244;s:4:"ocy;";i:1086;s:6:"odash;";i:8861;s:7:"odblac;";i:337;s:5:"odiv;";i:10808;s:5:"odot;";i:8857;s:7:"odsold;";i:10684;s:6:"oelig;";i:339;s:6:"ofcir;";i:10687;s:4:"ofr;";i:120108;s:5:"ogon;";i:731;s:7:"ograve;";i:242;s:6:"ograve";i:242;s:4:"ogt;";i:10689;s:6:"ohbar;";i:10677;s:4:"ohm;";i:8486;s:5:"oint;";i:8750;s:6:"olarr;";i:8634;s:6:"olcir;";i:10686;s:8:"olcross;";i:10683;s:6:"oline;";i:8254;s:4:"olt;";i:10688;s:6:"omacr;";i:333;s:6:"omega;";i:969;s:8:"omicron;";i:959;s:5:"omid;";i:10678;s:7:"ominus;";i:8854;s:5:"oopf;";i:120160;s:5:"opar;";i:10679;s:6:"operp;";i:10681;s:6:"oplus;";i:8853;s:3:"or;";i:8744;s:6:"orarr;";i:8635;s:4:"ord;";i:10845;s:6:"order;";i:8500;s:8:"orderof;";i:8500;s:5:"ordf;";i:170;s:4:"ordf";i:170;s:5:"ordm;";i:186;s:4:"ordm";i:186;s:7:"origof;";i:8886;s:5:"oror;";i:10838;s:8:"orslope;";i:10839;s:4:"orv;";i:10843;s:5:"oscr;";i:8500;s:7:"oslash;";i:248;s:6:"oslash";i:248;s:5:"osol;";i:8856;s:7:"otilde;";i:245;s:6:"otilde";i:245;s:7:"otimes;";i:8855;s:9:"otimesas;";i:10806;s:5:"ouml;";i:246;s:4:"ouml";i:246;s:6:"ovbar;";i:9021;s:4:"par;";i:8741;s:5:"para;";i:182;s:4:"para";i:182;s:9:"parallel;";i:8741;s:7:"parsim;";i:10995;s:6:"parsl;";i:11005;s:5:"part;";i:8706;s:4:"pcy;";i:1087;s:7:"percnt;";i:37;s:7:"period;";i:46;s:7:"permil;";i:8240;s:5:"perp;";i:8869;s:8:"pertenk;";i:8241;s:4:"pfr;";i:120109;s:4:"phi;";i:966;s:5:"phiv;";i:966;s:7:"phmmat;";i:8499;s:6:"phone;";i:9742;s:3:"pi;";i:960;s:10:"pitchfork;";i:8916;s:4:"piv;";i:982;s:7:"planck;";i:8463;s:8:"planckh;";i:8462;s:7:"plankv;";i:8463;s:5:"plus;";i:43;s:9:"plusacir;";i:10787;s:6:"plusb;";i:8862;s:8:"pluscir;";i:10786;s:7:"plusdo;";i:8724;s:7:"plusdu;";i:10789;s:6:"pluse;";i:10866;s:7:"plusmn;";i:177;s:6:"plusmn";i:177;s:8:"plussim;";i:10790;s:8:"plustwo;";i:10791;s:3:"pm;";i:177;s:9:"pointint;";i:10773;s:5:"popf;";i:120161;s:6:"pound;";i:163;s:5:"pound";i:163;s:3:"pr;";i:8826;s:4:"prE;";i:10931;s:5:"prap;";i:10935;s:6:"prcue;";i:8828;s:4:"pre;";i:10927;s:5:"prec;";i:8826;s:11:"precapprox;";i:10935;s:12:"preccurlyeq;";i:8828;s:7:"preceq;";i:10927;s:12:"precnapprox;";i:10937;s:9:"precneqq;";i:10933;s:9:"precnsim;";i:8936;s:8:"precsim;";i:8830;s:6:"prime;";i:8242;s:7:"primes;";i:8473;s:5:"prnE;";i:10933;s:6:"prnap;";i:10937;s:7:"prnsim;";i:8936;s:5:"prod;";i:8719;s:9:"profalar;";i:9006;s:9:"profline;";i:8978;s:9:"profsurf;";i:8979;s:5:"prop;";i:8733;s:7:"propto;";i:8733;s:6:"prsim;";i:8830;s:7:"prurel;";i:8880;s:5:"pscr;";i:120005;s:4:"psi;";i:968;s:7:"puncsp;";i:8200;s:4:"qfr;";i:120110;s:5:"qint;";i:10764;s:5:"qopf;";i:120162;s:7:"qprime;";i:8279;s:5:"qscr;";i:120006;s:12:"quaternions;";i:8461;s:8:"quatint;";i:10774;s:6:"quest;";i:63;s:8:"questeq;";i:8799;s:5:"quot;";i:34;s:4:"quot";i:34;s:6:"rAarr;";i:8667;s:5:"rArr;";i:8658;s:7:"rAtail;";i:10524;s:6:"rBarr;";i:10511;s:5:"rHar;";i:10596;s:5:"race;";i:10714;s:7:"racute;";i:341;s:6:"radic;";i:8730;s:9:"raemptyv;";i:10675;s:5:"rang;";i:10217;s:6:"rangd;";i:10642;s:6:"range;";i:10661;s:7:"rangle;";i:10217;s:6:"raquo;";i:187;s:5:"raquo";i:187;s:5:"rarr;";i:8594;s:7:"rarrap;";i:10613;s:6:"rarrb;";i:8677;s:8:"rarrbfs;";i:10528;s:6:"rarrc;";i:10547;s:7:"rarrfs;";i:10526;s:7:"rarrhk;";i:8618;s:7:"rarrlp;";i:8620;s:7:"rarrpl;";i:10565;s:8:"rarrsim;";i:10612;s:7:"rarrtl;";i:8611;s:6:"rarrw;";i:8605;s:7:"ratail;";i:10522;s:6:"ratio;";i:8758;s:10:"rationals;";i:8474;s:6:"rbarr;";i:10509;s:6:"rbbrk;";i:10099;s:7:"rbrace;";i:125;s:7:"rbrack;";i:93;s:6:"rbrke;";i:10636;s:8:"rbrksld;";i:10638;s:8:"rbrkslu;";i:10640;s:7:"rcaron;";i:345;s:7:"rcedil;";i:343;s:6:"rceil;";i:8969;s:5:"rcub;";i:125;s:4:"rcy;";i:1088;s:5:"rdca;";i:10551;s:8:"rdldhar;";i:10601;s:6:"rdquo;";i:8221;s:7:"rdquor;";i:8221;s:5:"rdsh;";i:8627;s:5:"real;";i:8476;s:8:"realine;";i:8475;s:9:"realpart;";i:8476;s:6:"reals;";i:8477;s:5:"rect;";i:9645;s:4:"reg;";i:174;s:3:"reg";i:174;s:7:"rfisht;";i:10621;s:7:"rfloor;";i:8971;s:4:"rfr;";i:120111;s:6:"rhard;";i:8641;s:6:"rharu;";i:8640;s:7:"rharul;";i:10604;s:4:"rho;";i:961;s:5:"rhov;";i:1009;s:11:"rightarrow;";i:8594;s:15:"rightarrowtail;";i:8611;s:17:"rightharpoondown;";i:8641;s:15:"rightharpoonup;";i:8640;s:16:"rightleftarrows;";i:8644;s:18:"rightleftharpoons;";i:8652;s:17:"rightrightarrows;";i:8649;s:16:"rightsquigarrow;";i:8605;s:16:"rightthreetimes;";i:8908;s:5:"ring;";i:730;s:13:"risingdotseq;";i:8787;s:6:"rlarr;";i:8644;s:6:"rlhar;";i:8652;s:4:"rlm;";i:8207;s:7:"rmoust;";i:9137;s:11:"rmoustache;";i:9137;s:6:"rnmid;";i:10990;s:6:"roang;";i:10221;s:6:"roarr;";i:8702;s:6:"robrk;";i:10215;s:6:"ropar;";i:10630;s:5:"ropf;";i:120163;s:7:"roplus;";i:10798;s:8:"rotimes;";i:10805;s:5:"rpar;";i:41;s:7:"rpargt;";i:10644;s:9:"rppolint;";i:10770;s:6:"rrarr;";i:8649;s:7:"rsaquo;";i:8250;s:5:"rscr;";i:120007;s:4:"rsh;";i:8625;s:5:"rsqb;";i:93;s:6:"rsquo;";i:8217;s:7:"rsquor;";i:8217;s:7:"rthree;";i:8908;s:7:"rtimes;";i:8906;s:5:"rtri;";i:9657;s:6:"rtrie;";i:8885;s:6:"rtrif;";i:9656;s:9:"rtriltri;";i:10702;s:8:"ruluhar;";i:10600;s:3:"rx;";i:8478;s:7:"sacute;";i:347;s:6:"sbquo;";i:8218;s:3:"sc;";i:8827;s:4:"scE;";i:10932;s:5:"scap;";i:10936;s:7:"scaron;";i:353;s:6:"sccue;";i:8829;s:4:"sce;";i:10928;s:7:"scedil;";i:351;s:6:"scirc;";i:349;s:5:"scnE;";i:10934;s:6:"scnap;";i:10938;s:7:"scnsim;";i:8937;s:9:"scpolint;";i:10771;s:6:"scsim;";i:8831;s:4:"scy;";i:1089;s:5:"sdot;";i:8901;s:6:"sdotb;";i:8865;s:6:"sdote;";i:10854;s:6:"seArr;";i:8664;s:7:"searhk;";i:10533;s:6:"searr;";i:8600;s:8:"searrow;";i:8600;s:5:"sect;";i:167;s:4:"sect";i:167;s:5:"semi;";i:59;s:7:"seswar;";i:10537;s:9:"setminus;";i:8726;s:6:"setmn;";i:8726;s:5:"sext;";i:10038;s:4:"sfr;";i:120112;s:7:"sfrown;";i:8994;s:6:"sharp;";i:9839;s:7:"shchcy;";i:1097;s:5:"shcy;";i:1096;s:9:"shortmid;";i:8739;s:14:"shortparallel;";i:8741;s:4:"shy;";i:173;s:3:"shy";i:173;s:6:"sigma;";i:963;s:7:"sigmaf;";i:962;s:7:"sigmav;";i:962;s:4:"sim;";i:8764;s:7:"simdot;";i:10858;s:5:"sime;";i:8771;s:6:"simeq;";i:8771;s:5:"simg;";i:10910;s:6:"simgE;";i:10912;s:5:"siml;";i:10909;s:6:"simlE;";i:10911;s:6:"simne;";i:8774;s:8:"simplus;";i:10788;s:8:"simrarr;";i:10610;s:6:"slarr;";i:8592;s:14:"smallsetminus;";i:8726;s:7:"smashp;";i:10803;s:9:"smeparsl;";i:10724;s:5:"smid;";i:8739;s:6:"smile;";i:8995;s:4:"smt;";i:10922;s:5:"smte;";i:10924;s:7:"softcy;";i:1100;s:4:"sol;";i:47;s:5:"solb;";i:10692;s:7:"solbar;";i:9023;s:5:"sopf;";i:120164;s:7:"spades;";i:9824;s:10:"spadesuit;";i:9824;s:5:"spar;";i:8741;s:6:"sqcap;";i:8851;s:6:"sqcup;";i:8852;s:6:"sqsub;";i:8847;s:7:"sqsube;";i:8849;s:9:"sqsubset;";i:8847;s:11:"sqsubseteq;";i:8849;s:6:"sqsup;";i:8848;s:7:"sqsupe;";i:8850;s:9:"sqsupset;";i:8848;s:11:"sqsupseteq;";i:8850;s:4:"squ;";i:9633;s:7:"square;";i:9633;s:7:"squarf;";i:9642;s:5:"squf;";i:9642;s:6:"srarr;";i:8594;s:5:"sscr;";i:120008;s:7:"ssetmn;";i:8726;s:7:"ssmile;";i:8995;s:7:"sstarf;";i:8902;s:5:"star;";i:9734;s:6:"starf;";i:9733;s:16:"straightepsilon;";i:1013;s:12:"straightphi;";i:981;s:6:"strns;";i:175;s:4:"sub;";i:8834;s:5:"subE;";i:10949;s:7:"subdot;";i:10941;s:5:"sube;";i:8838;s:8:"subedot;";i:10947;s:8:"submult;";i:10945;s:6:"subnE;";i:10955;s:6:"subne;";i:8842;s:8:"subplus;";i:10943;s:8:"subrarr;";i:10617;s:7:"subset;";i:8834;s:9:"subseteq;";i:8838;s:10:"subseteqq;";i:10949;s:10:"subsetneq;";i:8842;s:11:"subsetneqq;";i:10955;s:7:"subsim;";i:10951;s:7:"subsub;";i:10965;s:7:"subsup;";i:10963;s:5:"succ;";i:8827;s:11:"succapprox;";i:10936;s:12:"succcurlyeq;";i:8829;s:7:"succeq;";i:10928;s:12:"succnapprox;";i:10938;s:9:"succneqq;";i:10934;s:9:"succnsim;";i:8937;s:8:"succsim;";i:8831;s:4:"sum;";i:8721;s:5:"sung;";i:9834;s:5:"sup1;";i:185;s:4:"sup1";i:185;s:5:"sup2;";i:178;s:4:"sup2";i:178;s:5:"sup3;";i:179;s:4:"sup3";i:179;s:4:"sup;";i:8835;s:5:"supE;";i:10950;s:7:"supdot;";i:10942;s:8:"supdsub;";i:10968;s:5:"supe;";i:8839;s:8:"supedot;";i:10948;s:8:"suphsub;";i:10967;s:8:"suplarr;";i:10619;s:8:"supmult;";i:10946;s:6:"supnE;";i:10956;s:6:"supne;";i:8843;s:8:"supplus;";i:10944;s:7:"supset;";i:8835;s:9:"supseteq;";i:8839;s:10:"supseteqq;";i:10950;s:10:"supsetneq;";i:8843;s:11:"supsetneqq;";i:10956;s:7:"supsim;";i:10952;s:7:"supsub;";i:10964;s:7:"supsup;";i:10966;s:6:"swArr;";i:8665;s:7:"swarhk;";i:10534;s:6:"swarr;";i:8601;s:8:"swarrow;";i:8601;s:7:"swnwar;";i:10538;s:6:"szlig;";i:223;s:5:"szlig";i:223;s:7:"target;";i:8982;s:4:"tau;";i:964;s:5:"tbrk;";i:9140;s:7:"tcaron;";i:357;s:7:"tcedil;";i:355;s:4:"tcy;";i:1090;s:5:"tdot;";i:8411;s:7:"telrec;";i:8981;s:4:"tfr;";i:120113;s:7:"there4;";i:8756;s:10:"therefore;";i:8756;s:6:"theta;";i:952;s:9:"thetasym;";i:977;s:7:"thetav;";i:977;s:12:"thickapprox;";i:8776;s:9:"thicksim;";i:8764;s:7:"thinsp;";i:8201;s:6:"thkap;";i:8776;s:7:"thksim;";i:8764;s:6:"thorn;";i:254;s:5:"thorn";i:254;s:6:"tilde;";i:732;s:6:"times;";i:215;s:5:"times";i:215;s:7:"timesb;";i:8864;s:9:"timesbar;";i:10801;s:7:"timesd;";i:10800;s:5:"tint;";i:8749;s:5:"toea;";i:10536;s:4:"top;";i:8868;s:7:"topbot;";i:9014;s:7:"topcir;";i:10993;s:5:"topf;";i:120165;s:8:"topfork;";i:10970;s:5:"tosa;";i:10537;s:7:"tprime;";i:8244;s:6:"trade;";i:8482;s:9:"triangle;";i:9653;s:13:"triangledown;";i:9663;s:13:"triangleleft;";i:9667;s:15:"trianglelefteq;";i:8884;s:10:"triangleq;";i:8796;s:14:"triangleright;";i:9657;s:16:"trianglerighteq;";i:8885;s:7:"tridot;";i:9708;s:5:"trie;";i:8796;s:9:"triminus;";i:10810;s:8:"triplus;";i:10809;s:6:"trisb;";i:10701;s:8:"tritime;";i:10811;s:9:"trpezium;";i:9186;s:5:"tscr;";i:120009;s:5:"tscy;";i:1094;s:6:"tshcy;";i:1115;s:7:"tstrok;";i:359;s:6:"twixt;";i:8812;s:17:"twoheadleftarrow;";i:8606;s:18:"twoheadrightarrow;";i:8608;s:5:"uArr;";i:8657;s:5:"uHar;";i:10595;s:7:"uacute;";i:250;s:6:"uacute";i:250;s:5:"uarr;";i:8593;s:6:"ubrcy;";i:1118;s:7:"ubreve;";i:365;s:6:"ucirc;";i:251;s:5:"ucirc";i:251;s:4:"ucy;";i:1091;s:6:"udarr;";i:8645;s:7:"udblac;";i:369;s:6:"udhar;";i:10606;s:7:"ufisht;";i:10622;s:4:"ufr;";i:120114;s:7:"ugrave;";i:249;s:6:"ugrave";i:249;s:6:"uharl;";i:8639;s:6:"uharr;";i:8638;s:6:"uhblk;";i:9600;s:7:"ulcorn;";i:8988;s:9:"ulcorner;";i:8988;s:7:"ulcrop;";i:8975;s:6:"ultri;";i:9720;s:6:"umacr;";i:363;s:4:"uml;";i:168;s:3:"uml";i:168;s:6:"uogon;";i:371;s:5:"uopf;";i:120166;s:8:"uparrow;";i:8593;s:12:"updownarrow;";i:8597;s:14:"upharpoonleft;";i:8639;s:15:"upharpoonright;";i:8638;s:6:"uplus;";i:8846;s:5:"upsi;";i:965;s:6:"upsih;";i:978;s:8:"upsilon;";i:965;s:11:"upuparrows;";i:8648;s:7:"urcorn;";i:8989;s:9:"urcorner;";i:8989;s:7:"urcrop;";i:8974;s:6:"uring;";i:367;s:6:"urtri;";i:9721;s:5:"uscr;";i:120010;s:6:"utdot;";i:8944;s:7:"utilde;";i:361;s:5:"utri;";i:9653;s:6:"utrif;";i:9652;s:6:"uuarr;";i:8648;s:5:"uuml;";i:252;s:4:"uuml";i:252;s:8:"uwangle;";i:10663;s:5:"vArr;";i:8661;s:5:"vBar;";i:10984;s:6:"vBarv;";i:10985;s:6:"vDash;";i:8872;s:7:"vangrt;";i:10652;s:11:"varepsilon;";i:949;s:9:"varkappa;";i:1008;s:11:"varnothing;";i:8709;s:7:"varphi;";i:966;s:6:"varpi;";i:982;s:10:"varpropto;";i:8733;s:5:"varr;";i:8597;s:7:"varrho;";i:1009;s:9:"varsigma;";i:962;s:9:"vartheta;";i:977;s:16:"vartriangleleft;";i:8882;s:17:"vartriangleright;";i:8883;s:4:"vcy;";i:1074;s:6:"vdash;";i:8866;s:4:"vee;";i:8744;s:7:"veebar;";i:8891;s:6:"veeeq;";i:8794;s:7:"vellip;";i:8942;s:7:"verbar;";i:124;s:5:"vert;";i:124;s:4:"vfr;";i:120115;s:6:"vltri;";i:8882;s:5:"vopf;";i:120167;s:6:"vprop;";i:8733;s:6:"vrtri;";i:8883;s:5:"vscr;";i:120011;s:8:"vzigzag;";i:10650;s:6:"wcirc;";i:373;s:7:"wedbar;";i:10847;s:6:"wedge;";i:8743;s:7:"wedgeq;";i:8793;s:7:"weierp;";i:8472;s:4:"wfr;";i:120116;s:5:"wopf;";i:120168;s:3:"wp;";i:8472;s:3:"wr;";i:8768;s:7:"wreath;";i:8768;s:5:"wscr;";i:120012;s:5:"xcap;";i:8898;s:6:"xcirc;";i:9711;s:5:"xcup;";i:8899;s:6:"xdtri;";i:9661;s:4:"xfr;";i:120117;s:6:"xhArr;";i:10234;s:6:"xharr;";i:10231;s:3:"xi;";i:958;s:6:"xlArr;";i:10232;s:6:"xlarr;";i:10229;s:5:"xmap;";i:10236;s:5:"xnis;";i:8955;s:6:"xodot;";i:10752;s:5:"xopf;";i:120169;s:7:"xoplus;";i:10753;s:7:"xotime;";i:10754;s:6:"xrArr;";i:10233;s:6:"xrarr;";i:10230;s:5:"xscr;";i:120013;s:7:"xsqcup;";i:10758;s:7:"xuplus;";i:10756;s:6:"xutri;";i:9651;s:5:"xvee;";i:8897;s:7:"xwedge;";i:8896;s:7:"yacute;";i:253;s:6:"yacute";i:253;s:5:"yacy;";i:1103;s:6:"ycirc;";i:375;s:4:"ycy;";i:1099;s:4:"yen;";i:165;s:3:"yen";i:165;s:4:"yfr;";i:120118;s:5:"yicy;";i:1111;s:5:"yopf;";i:120170;s:5:"yscr;";i:120014;s:5:"yucy;";i:1102;s:5:"yuml;";i:255;s:4:"yuml";i:255;s:7:"zacute;";i:378;s:7:"zcaron;";i:382;s:4:"zcy;";i:1079;s:5:"zdot;";i:380;s:7:"zeetrf;";i:8488;s:5:"zeta;";i:950;s:4:"zfr;";i:120119;s:5:"zhcy;";i:1078;s:8:"zigrarr;";i:8669;s:5:"zopf;";i:120171;s:5:"zscr;";i:120015;s:4:"zwj;";i:8205;s:5:"zwnj;";i:8204;} \ No newline at end of file