diff options
Diffstat (limited to 'lib/htmlpurifier/maintenance')
22 files changed, 0 insertions, 5156 deletions
diff --git a/lib/htmlpurifier/maintenance/.htaccess b/lib/htmlpurifier/maintenance/.htaccess deleted file mode 100644 index 3a4288278..000000000 --- a/lib/htmlpurifier/maintenance/.htaccess +++ /dev/null @@ -1 +0,0 @@ -Deny from all diff --git a/lib/htmlpurifier/maintenance/PH5P.php b/lib/htmlpurifier/maintenance/PH5P.php deleted file mode 100644 index 96d0d13f9..000000000 --- a/lib/htmlpurifier/maintenance/PH5P.php +++ /dev/null @@ -1,3824 +0,0 @@ -<?php -class HTML5 { - private $data; - private $char; - private $EOF; - private $state; - private $tree; - private $token; - private $content_model; - private $escape = false; - private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute', - 'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;', - 'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;', - 'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;', - 'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;', - 'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;', - 'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;', - 'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;', - 'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;', - 'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN', - 'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;', - 'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;', - 'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig', - 'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;', - 'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;', - 'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil', - 'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;', - 'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;', - 'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;', - 'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth', - 'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12', - 'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt', - 'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc', - 'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;', - 'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;', - 'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;', - 'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro', - 'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;', - 'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;', - 'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;', - 'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash', - 'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;', - 'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;', - 'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;', - 'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;', - 'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;', - 'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;', - 'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;', - 'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;', - 'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc', - 'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;', - 'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;'); - - const PCDATA = 0; - const RCDATA = 1; - const CDATA = 2; - const PLAINTEXT = 3; - - const DOCTYPE = 0; - const STARTTAG = 1; - const ENDTAG = 2; - const COMMENT = 3; - const CHARACTR = 4; - const EOF = 5; - - public function __construct($data) { - $data = str_replace("\r\n", "\n", $data); - $date = str_replace("\r", null, $data); - - $this->data = $data; - $this->char = -1; - $this->EOF = strlen($data); - $this->tree = new HTML5TreeConstructer; - $this->content_model = self::PCDATA; - - $this->state = 'data'; - - while($this->state !== null) { - $this->{$this->state.'State'}(); - } - } - - public function save() { - return $this->tree->save(); - } - - private function char() { - return ($this->char < $this->EOF) - ? $this->data[$this->char] - : false; - } - - private function character($s, $l = 0) { - if($s + $l < $this->EOF) { - if($l === 0) { - return $this->data[$s]; - } else { - return substr($this->data, $s, $l); - } - } - } - - private function characters($char_class, $start) { - return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start)); - } - - private function dataState() { - // Consume the next input character - $this->char++; - $char = $this->char(); - - if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { - /* U+0026 AMPERSAND (&) - When the content model flag is set to one of the PCDATA or RCDATA - states: switch to the entity data state. Otherwise: treat it as per - the "anything else" entry below. */ - $this->state = 'entityData'; - - } elseif($char === '-') { - /* If the content model flag is set to either the RCDATA state or - the CDATA state, and the escape flag is false, and there are at - least three characters before this one in the input stream, and the - last four characters in the input stream, including this one, are - U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, - and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */ - if(($this->content_model === self::RCDATA || $this->content_model === - self::CDATA) && $this->escape === false && - $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') { - $this->escape = true; - } - - /* In any case, emit the input character as a character token. Stay - in the data state. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => $char - )); - - /* U+003C LESS-THAN SIGN (<) */ - } elseif($char === '<' && ($this->content_model === self::PCDATA || - (($this->content_model === self::RCDATA || - $this->content_model === self::CDATA) && $this->escape === false))) { - /* When the content model flag is set to the PCDATA state: switch - to the tag open state. - - When the content model flag is set to either the RCDATA state or - the CDATA state and the escape flag is false: switch to the tag - open state. - - Otherwise: treat it as per the "anything else" entry below. */ - $this->state = 'tagOpen'; - - /* U+003E GREATER-THAN SIGN (>) */ - } elseif($char === '>') { - /* If the content model flag is set to either the RCDATA state or - the CDATA state, and the escape flag is true, and the last three - characters in the input stream including this one are U+002D - HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"), - set the escape flag to false. */ - if(($this->content_model === self::RCDATA || - $this->content_model === self::CDATA) && $this->escape === true && - $this->character($this->char, 3) === '-->') { - $this->escape = false; - } - - /* In any case, emit the input character as a character token. - Stay in the data state. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => $char - )); - - } elseif($this->char === $this->EOF) { - /* EOF - Emit an end-of-file token. */ - $this->EOF(); - - } elseif($this->content_model === self::PLAINTEXT) { - /* When the content model flag is set to the PLAINTEXT state - THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of - the text and emit it as a character token. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => substr($this->data, $this->char) - )); - - $this->EOF(); - - } else { - /* Anything else - THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that - otherwise would also be treated as a character token and emit it - as a single character token. Stay in the data state. */ - $len = strcspn($this->data, '<&', $this->char); - $char = substr($this->data, $this->char, $len); - $this->char += $len - 1; - - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => $char - )); - - $this->state = 'data'; - } - } - - private function entityDataState() { - // Attempt to consume an entity. - $entity = $this->entity(); - - // If nothing is returned, emit a U+0026 AMPERSAND character token. - // Otherwise, emit the character token that was returned. - $char = (!$entity) ? '&' : $entity; - $this->emitToken($char); - - // Finally, switch to the data state. - $this->state = 'data'; - } - - private function tagOpenState() { - switch($this->content_model) { - case self::RCDATA: - case self::CDATA: - /* If the next input character is a U+002F SOLIDUS (/) character, - consume it and switch to the close tag open state. If the next - input character is not a U+002F SOLIDUS (/) character, emit a - U+003C LESS-THAN SIGN character token and switch to the data - state to process the next input character. */ - if($this->character($this->char + 1) === '/') { - $this->char++; - $this->state = 'closeTagOpen'; - - } else { - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => '<' - )); - - $this->state = 'data'; - } - break; - - case self::PCDATA: - // If the content model flag is set to the PCDATA state - // Consume the next input character: - $this->char++; - $char = $this->char(); - - if($char === '!') { - /* U+0021 EXCLAMATION MARK (!) - Switch to the markup declaration open state. */ - $this->state = 'markupDeclarationOpen'; - - } elseif($char === '/') { - /* U+002F SOLIDUS (/) - Switch to the close tag open state. */ - $this->state = 'closeTagOpen'; - - } elseif(preg_match('/^[A-Za-z]$/', $char)) { - /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z - Create a new start tag token, set its tag name to the lowercase - version of the input character (add 0x0020 to the character's code - point), then switch to the tag name state. (Don't emit the token - yet; further details will be filled in before it is emitted.) */ - $this->token = array( - 'name' => strtolower($char), - 'type' => self::STARTTAG, - 'attr' => array() - ); - - $this->state = 'tagName'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Emit a U+003C LESS-THAN SIGN character token and a - U+003E GREATER-THAN SIGN character token. Switch to the data state. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => '<>' - )); - - $this->state = 'data'; - - } elseif($char === '?') { - /* U+003F QUESTION MARK (?) - Parse error. Switch to the bogus comment state. */ - $this->state = 'bogusComment'; - - } else { - /* Anything else - Parse error. Emit a U+003C LESS-THAN SIGN character token and - reconsume the current input character in the data state. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => '<' - )); - - $this->char--; - $this->state = 'data'; - } - break; - } - } - - private function closeTagOpenState() { - $next_node = strtolower($this->characters('A-Za-z', $this->char + 1)); - $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName; - - if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && - (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/', - $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) { - /* If the content model flag is set to the RCDATA or CDATA states then - examine the next few characters. If they do not match the tag name of - the last start tag token emitted (case insensitively), or if they do but - they are not immediately followed by one of the following characters: - * U+0009 CHARACTER TABULATION - * U+000A LINE FEED (LF) - * U+000B LINE TABULATION - * U+000C FORM FEED (FF) - * U+0020 SPACE - * U+003E GREATER-THAN SIGN (>) - * U+002F SOLIDUS (/) - * EOF - ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character - token, a U+002F SOLIDUS character token, and switch to the data state - to process the next input character. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => '</' - )); - - $this->state = 'data'; - - } else { - /* Otherwise, if the content model flag is set to the PCDATA state, - or if the next few characters do match that tag name, consume the - next input character: */ - $this->char++; - $char = $this->char(); - - if(preg_match('/^[A-Za-z]$/', $char)) { - /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z - Create a new end tag token, set its tag name to the lowercase version - of the input character (add 0x0020 to the character's code point), then - switch to the tag name state. (Don't emit the token yet; further details - will be filled in before it is emitted.) */ - $this->token = array( - 'name' => strtolower($char), - 'type' => self::ENDTAG - ); - - $this->state = 'tagName'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Switch to the data state. */ - $this->state = 'data'; - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F - SOLIDUS character token. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => '</' - )); - - $this->char--; - $this->state = 'data'; - - } else { - /* Parse error. Switch to the bogus comment state. */ - $this->state = 'bogusComment'; - } - } - } - - private function tagNameState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000B LINE TABULATION - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. */ - $this->emitToken($this->token); - - $this->char--; - $this->state = 'data'; - - } elseif($char === '/') { - /* U+002F SOLIDUS (/) - Parse error unless this is a permitted slash. Switch to the before - attribute name state. */ - $this->state = 'beforeAttributeName'; - - } else { - /* Anything else - Append the current input character to the current tag token's tag name. - Stay in the tag name state. */ - $this->token['name'] .= strtolower($char); - $this->state = 'tagName'; - } - } - - private function beforeAttributeNameState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000B LINE TABULATION - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($char === '/') { - /* U+002F SOLIDUS (/) - Parse error unless this is a permitted slash. Stay in the before - attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. */ - $this->emitToken($this->token); - - $this->char--; - $this->state = 'data'; - - } else { - /* Anything else - Start a new attribute in the current tag token. Set that attribute's - name to the current input character, and its value to the empty string. - Switch to the attribute name state. */ - $this->token['attr'][] = array( - 'name' => strtolower($char), - 'value' => null - ); - - $this->state = 'attributeName'; - } - } - - private function attributeNameState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000B LINE TABULATION - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before attribute name state. */ - $this->state = 'afterAttributeName'; - - } elseif($char === '=') { - /* U+003D EQUALS SIGN (=) - Switch to the before attribute value state. */ - $this->state = 'beforeAttributeValue'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($char === '/' && $this->character($this->char + 1) !== '>') { - /* U+002F SOLIDUS (/) - Parse error unless this is a permitted slash. Switch to the before - attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. */ - $this->emitToken($this->token); - - $this->char--; - $this->state = 'data'; - - } else { - /* Anything else - Append the current input character to the current attribute's name. - Stay in the attribute name state. */ - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['name'] .= strtolower($char); - - $this->state = 'attributeName'; - } - } - - private function afterAttributeNameState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000B LINE TABULATION - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after attribute name state. */ - $this->state = 'afterAttributeName'; - - } elseif($char === '=') { - /* U+003D EQUALS SIGN (=) - Switch to the before attribute value state. */ - $this->state = 'beforeAttributeValue'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($char === '/' && $this->character($this->char + 1) !== '>') { - /* U+002F SOLIDUS (/) - Parse error unless this is a permitted slash. Switch to the - before attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. */ - $this->emitToken($this->token); - - $this->char--; - $this->state = 'data'; - - } else { - /* Anything else - Start a new attribute in the current tag token. Set that attribute's - name to the current input character, and its value to the empty string. - Switch to the attribute name state. */ - $this->token['attr'][] = array( - 'name' => strtolower($char), - 'value' => null - ); - - $this->state = 'attributeName'; - } - } - - private function beforeAttributeValueState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000B LINE TABULATION - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before attribute value state. */ - $this->state = 'beforeAttributeValue'; - - } elseif($char === '"') { - /* U+0022 QUOTATION MARK (") - Switch to the attribute value (double-quoted) state. */ - $this->state = 'attributeValueDoubleQuoted'; - - } elseif($char === '&') { - /* U+0026 AMPERSAND (&) - Switch to the attribute value (unquoted) state and reconsume - this input character. */ - $this->char--; - $this->state = 'attributeValueUnquoted'; - - } elseif($char === '\'') { - /* U+0027 APOSTROPHE (') - Switch to the attribute value (single-quoted) state. */ - $this->state = 'attributeValueSingleQuoted'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $this->state = 'data'; - - } else { - /* Anything else - Append the current input character to the current attribute's value. - Switch to the attribute value (unquoted) state. */ - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char; - - $this->state = 'attributeValueUnquoted'; - } - } - - private function attributeValueDoubleQuotedState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if($char === '"') { - /* U+0022 QUOTATION MARK (") - Switch to the before attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($char === '&') { - /* U+0026 AMPERSAND (&) - Switch to the entity in attribute value state. */ - $this->entityInAttributeValueState('double'); - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit the current tag token. Reconsume the character - in the data state. */ - $this->emitToken($this->token); - - $this->char--; - $this->state = 'data'; - - } else { - /* Anything else - Append the current input character to the current attribute's value. - Stay in the attribute value (double-quoted) state. */ - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char; - - $this->state = 'attributeValueDoubleQuoted'; - } - } - - private function attributeValueSingleQuotedState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if($char === '\'') { - /* U+0022 QUOTATION MARK (') - Switch to the before attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($char === '&') { - /* U+0026 AMPERSAND (&) - Switch to the entity in attribute value state. */ - $this->entityInAttributeValueState('single'); - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit the current tag token. Reconsume the character - in the data state. */ - $this->emitToken($this->token); - - $this->char--; - $this->state = 'data'; - - } else { - /* Anything else - Append the current input character to the current attribute's value. - Stay in the attribute value (single-quoted) state. */ - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char; - - $this->state = 'attributeValueSingleQuoted'; - } - } - - private function attributeValueUnquotedState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000B LINE TABULATION - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($char === '&') { - /* U+0026 AMPERSAND (&) - Switch to the entity in attribute value state. */ - $this->entityInAttributeValueState('non'); - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $this->state = 'data'; - - } else { - /* Anything else - Append the current input character to the current attribute's value. - Stay in the attribute value (unquoted) state. */ - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char; - - $this->state = 'attributeValueUnquoted'; - } - } - - private function entityInAttributeValueState() { - // Attempt to consume an entity. - $entity = $this->entity(); - - // If nothing is returned, append a U+0026 AMPERSAND character to the - // current attribute's value. Otherwise, emit the character token that - // was returned. - $char = (!$entity) - ? '&' - : $entity; - - $this->emitToken($char); - } - - private function bogusCommentState() { - /* Consume every character up to the first U+003E GREATER-THAN SIGN - character (>) or the end of the file (EOF), whichever comes first. Emit - a comment token whose data is the concatenation of all the characters - starting from and including the character that caused the state machine - to switch into the bogus comment state, up to and including the last - consumed character before the U+003E character, if any, or up to the - end of the file otherwise. (If the comment was started by the end of - the file (EOF), the token is empty.) */ - $data = $this->characters('^>', $this->char); - $this->emitToken(array( - 'data' => $data, - 'type' => self::COMMENT - )); - - $this->char += strlen($data); - - /* Switch to the data state. */ - $this->state = 'data'; - - /* If the end of the file was reached, reconsume the EOF character. */ - if($this->char === $this->EOF) { - $this->char = $this->EOF - 1; - } - } - - private function markupDeclarationOpenState() { - /* If the next two characters are both U+002D HYPHEN-MINUS (-) - characters, consume those two characters, create a comment token whose - data is the empty string, and switch to the comment state. */ - if($this->character($this->char + 1, 2) === '--') { - $this->char += 2; - $this->state = 'comment'; - $this->token = array( - 'data' => null, - 'type' => self::COMMENT - ); - - /* Otherwise if the next seven chacacters are a case-insensitive match - for the word "DOCTYPE", then consume those characters and switch to the - DOCTYPE state. */ - } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') { - $this->char += 7; - $this->state = 'doctype'; - - /* Otherwise, is is a parse error. Switch to the bogus comment state. - The next character that is consumed, if any, is the first character - that will be in the comment. */ - } else { - $this->char++; - $this->state = 'bogusComment'; - } - } - - private function commentState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - /* U+002D HYPHEN-MINUS (-) */ - if($char === '-') { - /* Switch to the comment dash state */ - $this->state = 'commentDash'; - - /* EOF */ - } elseif($this->char === $this->EOF) { - /* Parse error. Emit the comment token. Reconsume the EOF character - in the data state. */ - $this->emitToken($this->token); - $this->char--; - $this->state = 'data'; - - /* Anything else */ - } else { - /* Append the input character to the comment token's data. Stay in - the comment state. */ - $this->token['data'] .= $char; - } - } - - private function commentDashState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - /* U+002D HYPHEN-MINUS (-) */ - if($char === '-') { - /* Switch to the comment end state */ - $this->state = 'commentEnd'; - - /* EOF */ - } elseif($this->char === $this->EOF) { - /* Parse error. Emit the comment token. Reconsume the EOF character - in the data state. */ - $this->emitToken($this->token); - $this->char--; - $this->state = 'data'; - - /* Anything else */ - } else { - /* Append a U+002D HYPHEN-MINUS (-) character and the input - character to the comment token's data. Switch to the comment state. */ - $this->token['data'] .= '-'.$char; - $this->state = 'comment'; - } - } - - private function commentEndState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - if($char === '>') { - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($char === '-') { - $this->token['data'] .= '-'; - - } elseif($this->char === $this->EOF) { - $this->emitToken($this->token); - $this->char--; - $this->state = 'data'; - - } else { - $this->token['data'] .= '--'.$char; - $this->state = 'comment'; - } - } - - private function doctypeState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - $this->state = 'beforeDoctypeName'; - - } else { - $this->char--; - $this->state = 'beforeDoctypeName'; - } - } - - private function beforeDoctypeNameState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - // Stay in the before DOCTYPE name state. - - } elseif(preg_match('/^[a-z]$/', $char)) { - $this->token = array( - 'name' => strtoupper($char), - 'type' => self::DOCTYPE, - 'error' => true - ); - - $this->state = 'doctypeName'; - - } elseif($char === '>') { - $this->emitToken(array( - 'name' => null, - 'type' => self::DOCTYPE, - 'error' => true - )); - - $this->state = 'data'; - - } elseif($this->char === $this->EOF) { - $this->emitToken(array( - 'name' => null, - 'type' => self::DOCTYPE, - 'error' => true - )); - - $this->char--; - $this->state = 'data'; - - } else { - $this->token = array( - 'name' => $char, - 'type' => self::DOCTYPE, - 'error' => true - ); - - $this->state = 'doctypeName'; - } - } - - private function doctypeNameState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - $this->state = 'AfterDoctypeName'; - - } elseif($char === '>') { - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif(preg_match('/^[a-z]$/', $char)) { - $this->token['name'] .= strtoupper($char); - - } elseif($this->char === $this->EOF) { - $this->emitToken($this->token); - $this->char--; - $this->state = 'data'; - - } else { - $this->token['name'] .= $char; - } - - $this->token['error'] = ($this->token['name'] === 'HTML') - ? false - : true; - } - - private function afterDoctypeNameState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - // Stay in the DOCTYPE name state. - - } elseif($char === '>') { - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($this->char === $this->EOF) { - $this->emitToken($this->token); - $this->char--; - $this->state = 'data'; - - } else { - $this->token['error'] = true; - $this->state = 'bogusDoctype'; - } - } - - private function bogusDoctypeState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - if($char === '>') { - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($this->char === $this->EOF) { - $this->emitToken($this->token); - $this->char--; - $this->state = 'data'; - - } else { - // Stay in the bogus DOCTYPE state. - } - } - - private function entity() { - $start = $this->char; - - // This section defines how to consume an entity. This definition is - // used when parsing entities in text and in attributes. - - // The behaviour depends on the identity of the next character (the - // one immediately after the U+0026 AMPERSAND character): - - switch($this->character($this->char + 1)) { - // U+0023 NUMBER SIGN (#) - case '#': - - // The behaviour further depends on the character after the - // U+0023 NUMBER SIGN: - switch($this->character($this->char + 1)) { - // U+0078 LATIN SMALL LETTER X - // U+0058 LATIN CAPITAL LETTER X - case 'x': - case 'X': - // Follow the steps below, but using the range of - // characters U+0030 DIGIT ZERO through to U+0039 DIGIT - // NINE, U+0061 LATIN SMALL LETTER A through to U+0066 - // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER - // A, through to U+0046 LATIN CAPITAL LETTER F (in other - // words, 0-9, A-F, a-f). - $char = 1; - $char_class = '0-9A-Fa-f'; - break; - - // Anything else - default: - // Follow the steps below, but using the range of - // characters U+0030 DIGIT ZERO through to U+0039 DIGIT - // NINE (i.e. just 0-9). - $char = 0; - $char_class = '0-9'; - break; - } - - // Consume as many characters as match the range of characters - // given above. - $this->char++; - $e_name = $this->characters($char_class, $this->char + $char + 1); - $entity = $this->character($start, $this->char); - $cond = strlen($e_name) > 0; - - // The rest of the parsing happens bellow. - break; - - // Anything else - default: - // Consume the maximum number of characters possible, with the - // consumed characters case-sensitively matching one of the - // identifiers in the first column of the entities table. - $e_name = $this->characters('0-9A-Za-z;', $this->char + 1); - $len = strlen($e_name); - - for($c = 1; $c <= $len; $c++) { - $id = substr($e_name, 0, $c); - $this->char++; - - if(in_array($id, $this->entities)) { - $entity = $id; - break; - } - } - - $cond = isset($entity); - // The rest of the parsing happens bellow. - break; - } - - if(!$cond) { - // If no match can be made, then this is a parse error. No - // characters are consumed, and nothing is returned. - $this->char = $start; - return false; - } - - // Return a character token for the character corresponding to the - // entity name (as given by the second column of the entities table). - return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8'); - } - - private function emitToken($token) { - $emit = $this->tree->emitToken($token); - - if(is_int($emit)) { - $this->content_model = $emit; - - } elseif($token['type'] === self::ENDTAG) { - $this->content_model = self::PCDATA; - } - } - - private function EOF() { - $this->state = null; - $this->tree->emitToken(array( - 'type' => self::EOF - )); - } -} - -class HTML5TreeConstructer { - public $stack = array(); - - private $phase; - private $mode; - private $dom; - private $foster_parent = null; - private $a_formatting = array(); - - private $head_pointer = null; - private $form_pointer = null; - - private $scoping = array('button','caption','html','marquee','object','table','td','th'); - private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u'); - private $special = array('address','area','base','basefont','bgsound', - 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl', - 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5', - 'h6','head','hr','iframe','image','img','input','isindex','li','link', - 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup', - 'option','p','param','plaintext','pre','script','select','spacer','style', - 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); - - // The different phases. - const INIT_PHASE = 0; - const ROOT_PHASE = 1; - const MAIN_PHASE = 2; - const END_PHASE = 3; - - // The different insertion modes for the main phase. - const BEFOR_HEAD = 0; - const IN_HEAD = 1; - const AFTER_HEAD = 2; - const IN_BODY = 3; - const IN_TABLE = 4; - const IN_CAPTION = 5; - const IN_CGROUP = 6; - const IN_TBODY = 7; - const IN_ROW = 8; - const IN_CELL = 9; - const IN_SELECT = 10; - const AFTER_BODY = 11; - const IN_FRAME = 12; - const AFTR_FRAME = 13; - - // The different types of elements. - const SPECIAL = 0; - const SCOPING = 1; - const FORMATTING = 2; - const PHRASING = 3; - - const MARKER = 0; - - public function __construct() { - $this->phase = self::INIT_PHASE; - $this->mode = self::BEFOR_HEAD; - $this->dom = new DOMDocument; - - $this->dom->encoding = 'UTF-8'; - $this->dom->preserveWhiteSpace = true; - $this->dom->substituteEntities = true; - $this->dom->strictErrorChecking = false; - } - - // Process tag tokens - public function emitToken($token) { - switch($this->phase) { - case self::INIT_PHASE: return $this->initPhase($token); break; - case self::ROOT_PHASE: return $this->rootElementPhase($token); break; - case self::MAIN_PHASE: return $this->mainPhase($token); break; - case self::END_PHASE : return $this->trailingEndPhase($token); break; - } - } - - private function initPhase($token) { - /* Initially, the tree construction stage must handle each token - emitted from the tokenisation stage as follows: */ - - /* A DOCTYPE token that is marked as being in error - A comment token - A start tag token - An end tag token - A character token that is not one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE - An end-of-file token */ - if((isset($token['error']) && $token['error']) || - $token['type'] === HTML5::COMMENT || - $token['type'] === HTML5::STARTTAG || - $token['type'] === HTML5::ENDTAG || - $token['type'] === HTML5::EOF || - ($token['type'] === HTML5::CHARACTR && isset($token['data']) && - !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) { - /* This specification does not define how to handle this case. In - particular, user agents may ignore the entirety of this specification - altogether for such documents, and instead invoke special parse modes - with a greater emphasis on backwards compatibility. */ - - $this->phase = self::ROOT_PHASE; - return $this->rootElementPhase($token); - - /* A DOCTYPE token marked as being correct */ - } elseif(isset($token['error']) && !$token['error']) { - /* Append a DocumentType node to the Document node, with the name - attribute set to the name given in the DOCTYPE token (which will be - "HTML"), and the other attributes specific to DocumentType objects - set to null, empty lists, or the empty string as appropriate. */ - $doctype = new DOMDocumentType(null, null, 'HTML'); - - /* Then, switch to the root element phase of the tree construction - stage. */ - $this->phase = self::ROOT_PHASE; - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', - $token['data'])) { - /* Append that character to the Document node. */ - $text = $this->dom->createTextNode($token['data']); - $this->dom->appendChild($text); - } - } - - private function rootElementPhase($token) { - /* After the initial phase, as each token is emitted from the tokenisation - stage, it must be processed as described in this section. */ - - /* A DOCTYPE token */ - if($token['type'] === HTML5::DOCTYPE) { - // Parse error. Ignore the token. - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. */ - $comment = $this->dom->createComment($token['data']); - $this->dom->appendChild($comment); - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - } elseif($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append that character to the Document node. */ - $text = $this->dom->createTextNode($token['data']); - $this->dom->appendChild($text); - - /* A character token that is not one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED - (FF), or U+0020 SPACE - A start tag token - An end tag token - An end-of-file token */ - } elseif(($token['type'] === HTML5::CHARACTR && - !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || - $token['type'] === HTML5::STARTTAG || - $token['type'] === HTML5::ENDTAG || - $token['type'] === HTML5::EOF) { - /* Create an HTMLElement node with the tag name html, in the HTML - namespace. Append it to the Document object. Switch to the main - phase and reprocess the current token. */ - $html = $this->dom->createElement('html'); - $this->dom->appendChild($html); - $this->stack[] = $html; - - $this->phase = self::MAIN_PHASE; - return $this->mainPhase($token); - } - } - - private function mainPhase($token) { - /* Tokens in the main phase must be handled as follows: */ - - /* A DOCTYPE token */ - if($token['type'] === HTML5::DOCTYPE) { - // Parse error. Ignore the token. - - /* A start tag token with the tag name "html" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { - /* If this start tag token was not the first start tag token, then - it is a parse error. */ - - /* For each attribute on the token, check to see if the attribute - is already present on the top element of the stack of open elements. - If it is not, add the attribute and its corresponding value to that - element. */ - foreach($token['attr'] as $attr) { - if(!$this->stack[0]->hasAttribute($attr['name'])) { - $this->stack[0]->setAttribute($attr['name'], $attr['value']); - } - } - - /* An end-of-file token */ - } elseif($token['type'] === HTML5::EOF) { - /* Generate implied end tags. */ - $this->generateImpliedEndTags(); - - /* Anything else. */ - } else { - /* Depends on the insertion mode: */ - switch($this->mode) { - case self::BEFOR_HEAD: return $this->beforeHead($token); break; - case self::IN_HEAD: return $this->inHead($token); break; - case self::AFTER_HEAD: return $this->afterHead($token); break; - case self::IN_BODY: return $this->inBody($token); break; - case self::IN_TABLE: return $this->inTable($token); break; - case self::IN_CAPTION: return $this->inCaption($token); break; - case self::IN_CGROUP: return $this->inColumnGroup($token); break; - case self::IN_TBODY: return $this->inTableBody($token); break; - case self::IN_ROW: return $this->inRow($token); break; - case self::IN_CELL: return $this->inCell($token); break; - case self::IN_SELECT: return $this->inSelect($token); break; - case self::AFTER_BODY: return $this->afterBody($token); break; - case self::IN_FRAME: return $this->inFrameset($token); break; - case self::AFTR_FRAME: return $this->afterFrameset($token); break; - case self::END_PHASE: return $this->trailingEndPhase($token); break; - } - } - } - - private function beforeHead($token) { - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append the character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data attribute - set to the data given in the comment token. */ - $this->insertComment($token['data']); - - /* A start tag token with the tag name "head" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { - /* Create an element for the token, append the new element to the - current node and push it onto the stack of open elements. */ - $element = $this->insertElement($token); - - /* Set the head element pointer to this new element node. */ - $this->head_pointer = $element; - - /* Change the insertion mode to "in head". */ - $this->mode = self::IN_HEAD; - - /* A start tag token whose tag name is one of: "base", "link", "meta", - "script", "style", "title". Or an end tag with the tag name "html". - Or a character token that is not one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE. Or any other start tag token */ - } elseif($token['type'] === HTML5::STARTTAG || - ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || - ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/', - $token['data']))) { - /* Act as if a start tag token with the tag name "head" and no - attributes had been seen, then reprocess the current token. */ - $this->beforeHead(array( - 'name' => 'head', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - return $this->inHead($token); - - /* Any other end tag */ - } elseif($token['type'] === HTML5::ENDTAG) { - /* Parse error. Ignore the token. */ - } - } - - private function inHead($token) { - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE. - - THIS DIFFERS FROM THE SPEC: If the current node is either a title, style - or script element, append the character to the current node regardless - of its content. */ - if(($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( - $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName, - array('title', 'style', 'script')))) { - /* Append the character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data attribute - set to the data given in the comment token. */ - $this->insertComment($token['data']); - - } elseif($token['type'] === HTML5::ENDTAG && - in_array($token['name'], array('title', 'style', 'script'))) { - array_pop($this->stack); - return HTML5::PCDATA; - - /* A start tag with the tag name "title" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { - /* Create an element for the token and append the new element to the - node pointed to by the head element pointer, or, if that is null - (innerHTML case), to the current node. */ - if($this->head_pointer !== null) { - $element = $this->insertElement($token, false); - $this->head_pointer->appendChild($element); - - } else { - $element = $this->insertElement($token); - } - - /* Switch the tokeniser's content model flag to the RCDATA state. */ - return HTML5::RCDATA; - - /* A start tag with the tag name "style" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { - /* Create an element for the token and append the new element to the - node pointed to by the head element pointer, or, if that is null - (innerHTML case), to the current node. */ - if($this->head_pointer !== null) { - $element = $this->insertElement($token, false); - $this->head_pointer->appendChild($element); - - } else { - $this->insertElement($token); - } - - /* Switch the tokeniser's content model flag to the CDATA state. */ - return HTML5::CDATA; - - /* A start tag with the tag name "script" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { - /* Create an element for the token. */ - $element = $this->insertElement($token, false); - $this->head_pointer->appendChild($element); - - /* Switch the tokeniser's content model flag to the CDATA state. */ - return HTML5::CDATA; - - /* A start tag with the tag name "base", "link", or "meta" */ - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('base', 'link', 'meta'))) { - /* Create an element for the token and append the new element to the - node pointed to by the head element pointer, or, if that is null - (innerHTML case), to the current node. */ - if($this->head_pointer !== null) { - $element = $this->insertElement($token, false); - $this->head_pointer->appendChild($element); - array_pop($this->stack); - - } else { - $this->insertElement($token); - } - - /* An end tag with the tag name "head" */ - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { - /* If the current node is a head element, pop the current node off - the stack of open elements. */ - if($this->head_pointer->isSameNode(end($this->stack))) { - array_pop($this->stack); - - /* Otherwise, this is a parse error. */ - } else { - // k - } - - /* Change the insertion mode to "after head". */ - $this->mode = self::AFTER_HEAD; - - /* A start tag with the tag name "head" or an end tag except "html". */ - } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || - ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) { - // Parse error. Ignore the token. - - /* Anything else */ - } else { - /* If the current node is a head element, act as if an end tag - token with the tag name "head" had been seen. */ - if($this->head_pointer->isSameNode(end($this->stack))) { - $this->inHead(array( - 'name' => 'head', - 'type' => HTML5::ENDTAG - )); - - /* Otherwise, change the insertion mode to "after head". */ - } else { - $this->mode = self::AFTER_HEAD; - } - - /* Then, reprocess the current token. */ - return $this->afterHead($token); - } - } - - private function afterHead($token) { - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append the character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data attribute - set to the data given in the comment token. */ - $this->insertComment($token['data']); - - /* A start tag token with the tag name "body" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { - /* Insert a body element for the token. */ - $this->insertElement($token); - - /* Change the insertion mode to "in body". */ - $this->mode = self::IN_BODY; - - /* A start tag token with the tag name "frameset" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { - /* Insert a frameset element for the token. */ - $this->insertElement($token); - - /* Change the insertion mode to "in frameset". */ - $this->mode = self::IN_FRAME; - - /* A start tag token whose tag name is one of: "base", "link", "meta", - "script", "style", "title" */ - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('base', 'link', 'meta', 'script', 'style', 'title'))) { - /* Parse error. Switch the insertion mode back to "in head" and - reprocess the token. */ - $this->mode = self::IN_HEAD; - return $this->inHead($token); - - /* Anything else */ - } else { - /* Act as if a start tag token with the tag name "body" and no - attributes had been seen, and then reprocess the current token. */ - $this->afterHead(array( - 'name' => 'body', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - return $this->inBody($token); - } - } - - private function inBody($token) { - /* Handle the token as follows: */ - - switch($token['type']) { - /* A character token */ - case HTML5::CHARACTR: - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Append the token's character to the current node. */ - $this->insertText($token['data']); - break; - - /* A comment token */ - case HTML5::COMMENT: - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $this->insertComment($token['data']); - break; - - case HTML5::STARTTAG: - switch($token['name']) { - /* A start tag token whose tag name is one of: "script", - "style" */ - case 'script': case 'style': - /* Process the token as if the insertion mode had been "in - head". */ - return $this->inHead($token); - break; - - /* A start tag token whose tag name is one of: "base", "link", - "meta", "title" */ - case 'base': case 'link': case 'meta': case 'title': - /* Parse error. Process the token as if the insertion mode - had been "in head". */ - return $this->inHead($token); - break; - - /* A start tag token with the tag name "body" */ - case 'body': - /* Parse error. If the second element on the stack of open - elements is not a body element, or, if the stack of open - elements has only one node on it, then ignore the token. - (innerHTML case) */ - if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { - // Ignore - - /* Otherwise, for each attribute on the token, check to see - if the attribute is already present on the body element (the - second element) on the stack of open elements. If it is not, - add the attribute and its corresponding value to that - element. */ - } else { - foreach($token['attr'] as $attr) { - if(!$this->stack[1]->hasAttribute($attr['name'])) { - $this->stack[1]->setAttribute($attr['name'], $attr['value']); - } - } - } - break; - - /* A start tag whose tag name is one of: "address", - "blockquote", "center", "dir", "div", "dl", "fieldset", - "listing", "menu", "ol", "p", "ul" */ - case 'address': case 'blockquote': case 'center': case 'dir': - case 'div': case 'dl': case 'fieldset': case 'listing': - case 'menu': case 'ol': case 'p': case 'ul': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been - seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - break; - - /* A start tag whose tag name is "form" */ - case 'form': - /* If the form element pointer is not null, ignore the - token with a parse error. */ - if($this->form_pointer !== null) { - // Ignore. - - /* Otherwise: */ - } else { - /* If the stack of open elements has a p element in - scope, then act as if an end tag with the tag name p - had been seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token, and set the - form element pointer to point to the element created. */ - $element = $this->insertElement($token); - $this->form_pointer = $element; - } - break; - - /* A start tag whose tag name is "li", "dd" or "dt" */ - case 'li': case 'dd': case 'dt': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been - seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - $stack_length = count($this->stack) - 1; - - for($n = $stack_length; 0 <= $n; $n--) { - /* 1. Initialise node to be the current node (the - bottommost node of the stack). */ - $stop = false; - $node = $this->stack[$n]; - $cat = $this->getElementCategory($node->tagName); - - /* 2. If node is an li, dd or dt element, then pop all - the nodes from the current node up to node, including - node, then stop this algorithm. */ - if($token['name'] === $node->tagName || ($token['name'] !== 'li' - && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { - for($x = $stack_length; $x >= $n ; $x--) { - array_pop($this->stack); - } - - break; - } - - /* 3. If node is not in the formatting category, and is - not in the phrasing category, and is not an address or - div element, then stop this algorithm. */ - if($cat !== self::FORMATTING && $cat !== self::PHRASING && - $node->tagName !== 'address' && $node->tagName !== 'div') { - break; - } - } - - /* Finally, insert an HTML element with the same tag - name as the token's. */ - $this->insertElement($token); - break; - - /* A start tag token whose tag name is "plaintext" */ - case 'plaintext': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been - seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - return HTML5::PLAINTEXT; - break; - - /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", - "h5", "h6" */ - case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - /* If the stack of open elements has in scope an element whose - tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then - this is a parse error; pop elements from the stack until an - element with one of those tag names has been popped from the - stack. */ - while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { - array_pop($this->stack); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - break; - - /* A start tag whose tag name is "a" */ - case 'a': - /* If the list of active formatting elements contains - an element whose tag name is "a" between the end of the - list and the last marker on the list (or the start of - the list if there is no marker on the list), then this - is a parse error; act as if an end tag with the tag name - "a" had been seen, then remove that element from the list - of active formatting elements and the stack of open - elements if the end tag didn't already remove it (it - might not have if the element is not in table scope). */ - $leng = count($this->a_formatting); - - for($n = $leng - 1; $n >= 0; $n--) { - if($this->a_formatting[$n] === self::MARKER) { - break; - - } elseif($this->a_formatting[$n]->nodeName === 'a') { - $this->emitToken(array( - 'name' => 'a', - 'type' => HTML5::ENDTAG - )); - break; - } - } - - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $el = $this->insertElement($token); - - /* Add that element to the list of active formatting - elements. */ - $this->a_formatting[] = $el; - break; - - /* A start tag whose tag name is one of: "b", "big", "em", "font", - "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ - case 'b': case 'big': case 'em': case 'font': case 'i': - case 'nobr': case 's': case 'small': case 'strike': - case 'strong': case 'tt': case 'u': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $el = $this->insertElement($token); - - /* Add that element to the list of active formatting - elements. */ - $this->a_formatting[] = $el; - break; - - /* A start tag token whose tag name is "button" */ - case 'button': - /* If the stack of open elements has a button element in scope, - then this is a parse error; act as if an end tag with the tag - name "button" had been seen, then reprocess the token. (We don't - do that. Unnecessary.) */ - if($this->elementInScope('button')) { - $this->inBody(array( - 'name' => 'button', - 'type' => HTML5::ENDTAG - )); - } - - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Insert a marker at the end of the list of active - formatting elements. */ - $this->a_formatting[] = self::MARKER; - break; - - /* A start tag token whose tag name is one of: "marquee", "object" */ - case 'marquee': case 'object': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Insert a marker at the end of the list of active - formatting elements. */ - $this->a_formatting[] = self::MARKER; - break; - - /* A start tag token whose tag name is "xmp" */ - case 'xmp': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Switch the content model flag to the CDATA state. */ - return HTML5::CDATA; - break; - - /* A start tag whose tag name is "table" */ - case 'table': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Change the insertion mode to "in table". */ - $this->mode = self::IN_TABLE; - break; - - /* A start tag whose tag name is one of: "area", "basefont", - "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ - case 'area': case 'basefont': case 'bgsound': case 'br': - case 'embed': case 'img': case 'param': case 'spacer': - case 'wbr': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Immediately pop the current node off the stack of open elements. */ - array_pop($this->stack); - break; - - /* A start tag whose tag name is "hr" */ - case 'hr': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Immediately pop the current node off the stack of open elements. */ - array_pop($this->stack); - break; - - /* A start tag whose tag name is "image" */ - case 'image': - /* Parse error. Change the token's tag name to "img" and - reprocess it. (Don't ask.) */ - $token['name'] = 'img'; - return $this->inBody($token); - break; - - /* A start tag whose tag name is "input" */ - case 'input': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an input element for the token. */ - $element = $this->insertElement($token, false); - - /* If the form element pointer is not null, then associate the - input element with the form element pointed to by the form - element pointer. */ - $this->form_pointer !== null - ? $this->form_pointer->appendChild($element) - : end($this->stack)->appendChild($element); - - /* Pop that input element off the stack of open elements. */ - array_pop($this->stack); - break; - - /* A start tag whose tag name is "isindex" */ - case 'isindex': - /* Parse error. */ - // w/e - - /* If the form element pointer is not null, - then ignore the token. */ - if($this->form_pointer === null) { - /* Act as if a start tag token with the tag name "form" had - been seen. */ - $this->inBody(array( - 'name' => 'body', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - /* Act as if a start tag token with the tag name "hr" had - been seen. */ - $this->inBody(array( - 'name' => 'hr', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - /* Act as if a start tag token with the tag name "p" had - been seen. */ - $this->inBody(array( - 'name' => 'p', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - /* Act as if a start tag token with the tag name "label" - had been seen. */ - $this->inBody(array( - 'name' => 'label', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - /* Act as if a stream of character tokens had been seen. */ - $this->insertText('This is a searchable index. '. - 'Insert your search keywords here: '); - - /* Act as if a start tag token with the tag name "input" - had been seen, with all the attributes from the "isindex" - token, except with the "name" attribute set to the value - "isindex" (ignoring any explicit "name" attribute). */ - $attr = $token['attr']; - $attr[] = array('name' => 'name', 'value' => 'isindex'); - - $this->inBody(array( - 'name' => 'input', - 'type' => HTML5::STARTTAG, - 'attr' => $attr - )); - - /* Act as if a stream of character tokens had been seen - (see below for what they should say). */ - $this->insertText('This is a searchable index. '. - 'Insert your search keywords here: '); - - /* Act as if an end tag token with the tag name "label" - had been seen. */ - $this->inBody(array( - 'name' => 'label', - 'type' => HTML5::ENDTAG - )); - - /* Act as if an end tag token with the tag name "p" had - been seen. */ - $this->inBody(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - - /* Act as if a start tag token with the tag name "hr" had - been seen. */ - $this->inBody(array( - 'name' => 'hr', - 'type' => HTML5::ENDTAG - )); - - /* Act as if an end tag token with the tag name "form" had - been seen. */ - $this->inBody(array( - 'name' => 'form', - 'type' => HTML5::ENDTAG - )); - } - break; - - /* A start tag whose tag name is "textarea" */ - case 'textarea': - $this->insertElement($token); - - /* Switch the tokeniser's content model flag to the - RCDATA state. */ - return HTML5::RCDATA; - break; - - /* A start tag whose tag name is one of: "iframe", "noembed", - "noframes" */ - case 'iframe': case 'noembed': case 'noframes': - $this->insertElement($token); - - /* Switch the tokeniser's content model flag to the CDATA state. */ - return HTML5::CDATA; - break; - - /* A start tag whose tag name is "select" */ - case 'select': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Change the insertion mode to "in select". */ - $this->mode = self::IN_SELECT; - break; - - /* A start or end tag whose tag name is one of: "caption", "col", - "colgroup", "frame", "frameset", "head", "option", "optgroup", - "tbody", "td", "tfoot", "th", "thead", "tr". */ - case 'caption': case 'col': case 'colgroup': case 'frame': - case 'frameset': case 'head': case 'option': case 'optgroup': - case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': - case 'tr': - // Parse error. Ignore the token. - break; - - /* A start or end tag whose tag name is one of: "event-source", - "section", "nav", "article", "aside", "header", "footer", - "datagrid", "command" */ - case 'event-source': case 'section': case 'nav': case 'article': - case 'aside': case 'header': case 'footer': case 'datagrid': - case 'command': - // Work in progress! - break; - - /* A start tag token not covered by the previous entries */ - default: - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - $this->insertElement($token); - break; - } - break; - - case HTML5::ENDTAG: - switch($token['name']) { - /* An end tag with the tag name "body" */ - case 'body': - /* If the second element in the stack of open elements is - not a body element, this is a parse error. Ignore the token. - (innerHTML case) */ - if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { - // Ignore. - - /* If the current node is not the body element, then this - is a parse error. */ - } elseif(end($this->stack)->nodeName !== 'body') { - // Parse error. - } - - /* Change the insertion mode to "after body". */ - $this->mode = self::AFTER_BODY; - break; - - /* An end tag with the tag name "html" */ - case 'html': - /* Act as if an end tag with tag name "body" had been seen, - then, if that token wasn't ignored, reprocess the current - token. */ - $this->inBody(array( - 'name' => 'body', - 'type' => HTML5::ENDTAG - )); - - return $this->afterBody($token); - break; - - /* An end tag whose tag name is one of: "address", "blockquote", - "center", "dir", "div", "dl", "fieldset", "listing", "menu", - "ol", "pre", "ul" */ - case 'address': case 'blockquote': case 'center': case 'dir': - case 'div': case 'dl': case 'fieldset': case 'listing': - case 'menu': case 'ol': case 'pre': case 'ul': - /* If the stack of open elements has an element in scope - with the same tag name as that of the token, then generate - implied end tags. */ - if($this->elementInScope($token['name'])) { - $this->generateImpliedEndTags(); - - /* Now, if the current node is not an element with - the same tag name as that of the token, then this - is a parse error. */ - // w/e - - /* If the stack of open elements has an element in - scope with the same tag name as that of the token, - then pop elements from this stack until an element - with that tag name has been popped from the stack. */ - for($n = count($this->stack) - 1; $n >= 0; $n--) { - if($this->stack[$n]->nodeName === $token['name']) { - $n = -1; - } - - array_pop($this->stack); - } - } - break; - - /* An end tag whose tag name is "form" */ - case 'form': - /* If the stack of open elements has an element in scope - with the same tag name as that of the token, then generate - implied end tags. */ - if($this->elementInScope($token['name'])) { - $this->generateImpliedEndTags(); - - } - - if(end($this->stack)->nodeName !== $token['name']) { - /* Now, if the current node is not an element with the - same tag name as that of the token, then this is a parse - error. */ - // w/e - - } else { - /* Otherwise, if the current node is an element with - the same tag name as that of the token pop that element - from the stack. */ - array_pop($this->stack); - } - - /* In any case, set the form element pointer to null. */ - $this->form_pointer = null; - break; - - /* An end tag whose tag name is "p" */ - case 'p': - /* If the stack of open elements has a p element in scope, - then generate implied end tags, except for p elements. */ - if($this->elementInScope('p')) { - $this->generateImpliedEndTags(array('p')); - - /* If the current node is not a p element, then this is - a parse error. */ - // k - - /* If the stack of open elements has a p element in - scope, then pop elements from this stack until the stack - no longer has a p element in scope. */ - for($n = count($this->stack) - 1; $n >= 0; $n--) { - if($this->elementInScope('p')) { - array_pop($this->stack); - - } else { - break; - } - } - } - break; - - /* An end tag whose tag name is "dd", "dt", or "li" */ - case 'dd': case 'dt': case 'li': - /* If the stack of open elements has an element in scope - whose tag name matches the tag name of the token, then - generate implied end tags, except for elements with the - same tag name as the token. */ - if($this->elementInScope($token['name'])) { - $this->generateImpliedEndTags(array($token['name'])); - - /* If the current node is not an element with the same - tag name as the token, then this is a parse error. */ - // w/e - - /* If the stack of open elements has an element in scope - whose tag name matches the tag name of the token, then - pop elements from this stack until an element with that - tag name has been popped from the stack. */ - for($n = count($this->stack) - 1; $n >= 0; $n--) { - if($this->stack[$n]->nodeName === $token['name']) { - $n = -1; - } - - array_pop($this->stack); - } - } - break; - - /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", - "h5", "h6" */ - case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': - $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); - - /* If the stack of open elements has in scope an element whose - tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then - generate implied end tags. */ - if($this->elementInScope($elements)) { - $this->generateImpliedEndTags(); - - /* Now, if the current node is not an element with the same - tag name as that of the token, then this is a parse error. */ - // w/e - - /* If the stack of open elements has in scope an element - whose tag name is one of "h1", "h2", "h3", "h4", "h5", or - "h6", then pop elements from the stack until an element - with one of those tag names has been popped from the stack. */ - while($this->elementInScope($elements)) { - array_pop($this->stack); - } - } - break; - - /* An end tag whose tag name is one of: "a", "b", "big", "em", - "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ - case 'a': case 'b': case 'big': case 'em': case 'font': - case 'i': case 'nobr': case 's': case 'small': case 'strike': - case 'strong': case 'tt': case 'u': - /* 1. Let the formatting element be the last element in - the list of active formatting elements that: - * is between the end of the list and the last scope - marker in the list, if any, or the start of the list - otherwise, and - * has the same tag name as the token. - */ - while(true) { - for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { - if($this->a_formatting[$a] === self::MARKER) { - break; - - } elseif($this->a_formatting[$a]->tagName === $token['name']) { - $formatting_element = $this->a_formatting[$a]; - $in_stack = in_array($formatting_element, $this->stack, true); - $fe_af_pos = $a; - break; - } - } - - /* If there is no such node, or, if that node is - also in the stack of open elements but the element - is not in scope, then this is a parse error. Abort - these steps. The token is ignored. */ - if(!isset($formatting_element) || ($in_stack && - !$this->elementInScope($token['name']))) { - break; - - /* Otherwise, if there is such a node, but that node - is not in the stack of open elements, then this is a - parse error; remove the element from the list, and - abort these steps. */ - } elseif(isset($formatting_element) && !$in_stack) { - unset($this->a_formatting[$fe_af_pos]); - $this->a_formatting = array_merge($this->a_formatting); - break; - } - - /* 2. Let the furthest block be the topmost node in the - stack of open elements that is lower in the stack - than the formatting element, and is not an element in - the phrasing or formatting categories. There might - not be one. */ - $fe_s_pos = array_search($formatting_element, $this->stack, true); - $length = count($this->stack); - - for($s = $fe_s_pos + 1; $s < $length; $s++) { - $category = $this->getElementCategory($this->stack[$s]->nodeName); - - if($category !== self::PHRASING && $category !== self::FORMATTING) { - $furthest_block = $this->stack[$s]; - } - } - - /* 3. If there is no furthest block, then the UA must - skip the subsequent steps and instead just pop all - the nodes from the bottom of the stack of open - elements, from the current node up to the formatting - element, and remove the formatting element from the - list of active formatting elements. */ - if(!isset($furthest_block)) { - for($n = $length - 1; $n >= $fe_s_pos; $n--) { - array_pop($this->stack); - } - - unset($this->a_formatting[$fe_af_pos]); - $this->a_formatting = array_merge($this->a_formatting); - break; - } - - /* 4. Let the common ancestor be the element - immediately above the formatting element in the stack - of open elements. */ - $common_ancestor = $this->stack[$fe_s_pos - 1]; - - /* 5. If the furthest block has a parent node, then - remove the furthest block from its parent node. */ - if($furthest_block->parentNode !== null) { - $furthest_block->parentNode->removeChild($furthest_block); - } - - /* 6. Let a bookmark note the position of the - formatting element in the list of active formatting - elements relative to the elements on either side - of it in the list. */ - $bookmark = $fe_af_pos; - - /* 7. Let node and last node be the furthest block. - Follow these steps: */ - $node = $furthest_block; - $last_node = $furthest_block; - - while(true) { - for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { - /* 7.1 Let node be the element immediately - prior to node in the stack of open elements. */ - $node = $this->stack[$n]; - - /* 7.2 If node is not in the list of active - formatting elements, then remove node from - the stack of open elements and then go back - to step 1. */ - if(!in_array($node, $this->a_formatting, true)) { - unset($this->stack[$n]); - $this->stack = array_merge($this->stack); - - } else { - break; - } - } - - /* 7.3 Otherwise, if node is the formatting - element, then go to the next step in the overall - algorithm. */ - if($node === $formatting_element) { - break; - - /* 7.4 Otherwise, if last node is the furthest - block, then move the aforementioned bookmark to - be immediately after the node in the list of - active formatting elements. */ - } elseif($last_node === $furthest_block) { - $bookmark = array_search($node, $this->a_formatting, true) + 1; - } - - /* 7.5 If node has any children, perform a - shallow clone of node, replace the entry for - node in the list of active formatting elements - with an entry for the clone, replace the entry - for node in the stack of open elements with an - entry for the clone, and let node be the clone. */ - if($node->hasChildNodes()) { - $clone = $node->cloneNode(); - $s_pos = array_search($node, $this->stack, true); - $a_pos = array_search($node, $this->a_formatting, true); - - $this->stack[$s_pos] = $clone; - $this->a_formatting[$a_pos] = $clone; - $node = $clone; - } - - /* 7.6 Insert last node into node, first removing - it from its previous parent node if any. */ - if($last_node->parentNode !== null) { - $last_node->parentNode->removeChild($last_node); - } - - $node->appendChild($last_node); - - /* 7.7 Let last node be node. */ - $last_node = $node; - } - - /* 8. Insert whatever last node ended up being in - the previous step into the common ancestor node, - first removing it from its previous parent node if - any. */ - if($last_node->parentNode !== null) { - $last_node->parentNode->removeChild($last_node); - } - - $common_ancestor->appendChild($last_node); - - /* 9. Perform a shallow clone of the formatting - element. */ - $clone = $formatting_element->cloneNode(); - - /* 10. Take all of the child nodes of the furthest - block and append them to the clone created in the - last step. */ - while($furthest_block->hasChildNodes()) { - $child = $furthest_block->firstChild; - $furthest_block->removeChild($child); - $clone->appendChild($child); - } - - /* 11. Append that clone to the furthest block. */ - $furthest_block->appendChild($clone); - - /* 12. Remove the formatting element from the list - of active formatting elements, and insert the clone - into the list of active formatting elements at the - position of the aforementioned bookmark. */ - $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); - unset($this->a_formatting[$fe_af_pos]); - $this->a_formatting = array_merge($this->a_formatting); - - $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); - $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); - $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); - - /* 13. Remove the formatting element from the stack - of open elements, and insert the clone into the stack - of open elements immediately after (i.e. in a more - deeply nested position than) the position of the - furthest block in that stack. */ - $fe_s_pos = array_search($formatting_element, $this->stack, true); - $fb_s_pos = array_search($furthest_block, $this->stack, true); - unset($this->stack[$fe_s_pos]); - - $s_part1 = array_slice($this->stack, 0, $fb_s_pos); - $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); - $this->stack = array_merge($s_part1, array($clone), $s_part2); - - /* 14. Jump back to step 1 in this series of steps. */ - unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); - } - break; - - /* An end tag token whose tag name is one of: "button", - "marquee", "object" */ - case 'button': case 'marquee': case 'object': - /* If the stack of open elements has an element in scope whose - tag name matches the tag name of the token, then generate implied - tags. */ - if($this->elementInScope($token['name'])) { - $this->generateImpliedEndTags(); - - /* Now, if the current node is not an element with the same - tag name as the token, then this is a parse error. */ - // k - - /* Now, if the stack of open elements has an element in scope - whose tag name matches the tag name of the token, then pop - elements from the stack until that element has been popped from - the stack, and clear the list of active formatting elements up - to the last marker. */ - for($n = count($this->stack) - 1; $n >= 0; $n--) { - if($this->stack[$n]->nodeName === $token['name']) { - $n = -1; - } - - array_pop($this->stack); - } - - $marker = end(array_keys($this->a_formatting, self::MARKER, true)); - - for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { - array_pop($this->a_formatting); - } - } - break; - - /* Or an end tag whose tag name is one of: "area", "basefont", - "bgsound", "br", "embed", "hr", "iframe", "image", "img", - "input", "isindex", "noembed", "noframes", "param", "select", - "spacer", "table", "textarea", "wbr" */ - case 'area': case 'basefont': case 'bgsound': case 'br': - case 'embed': case 'hr': case 'iframe': case 'image': - case 'img': case 'input': case 'isindex': case 'noembed': - case 'noframes': case 'param': case 'select': case 'spacer': - case 'table': case 'textarea': case 'wbr': - // Parse error. Ignore the token. - break; - - /* An end tag token not covered by the previous entries */ - default: - for($n = count($this->stack) - 1; $n >= 0; $n--) { - /* Initialise node to be the current node (the bottommost - node of the stack). */ - $node = end($this->stack); - - /* If node has the same tag name as the end tag token, - then: */ - if($token['name'] === $node->nodeName) { - /* Generate implied end tags. */ - $this->generateImpliedEndTags(); - - /* If the tag name of the end tag token does not - match the tag name of the current node, this is a - parse error. */ - // k - - /* Pop all the nodes from the current node up to - node, including node, then stop this algorithm. */ - for($x = count($this->stack) - $n; $x >= $n; $x--) { - array_pop($this->stack); - } - - } else { - $category = $this->getElementCategory($node); - - if($category !== self::SPECIAL && $category !== self::SCOPING) { - /* Otherwise, if node is in neither the formatting - category nor the phrasing category, then this is a - parse error. Stop this algorithm. The end tag token - is ignored. */ - return false; - } - } - } - break; - } - break; - } - } - - private function inTable($token) { - $clear = array('html', 'table'); - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append the character to the current node. */ - $text = $this->dom->createTextNode($token['data']); - end($this->stack)->appendChild($text); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $comment = $this->dom->createComment($token['data']); - end($this->stack)->appendChild($comment); - - /* A start tag whose tag name is "caption" */ - } elseif($token['type'] === HTML5::STARTTAG && - $token['name'] === 'caption') { - /* Clear the stack back to a table context. */ - $this->clearStackToTableContext($clear); - - /* Insert a marker at the end of the list of active - formatting elements. */ - $this->a_formatting[] = self::MARKER; - - /* Insert an HTML element for the token, then switch the - insertion mode to "in caption". */ - $this->insertElement($token); - $this->mode = self::IN_CAPTION; - - /* A start tag whose tag name is "colgroup" */ - } elseif($token['type'] === HTML5::STARTTAG && - $token['name'] === 'colgroup') { - /* Clear the stack back to a table context. */ - $this->clearStackToTableContext($clear); - - /* Insert an HTML element for the token, then switch the - insertion mode to "in column group". */ - $this->insertElement($token); - $this->mode = self::IN_CGROUP; - - /* A start tag whose tag name is "col" */ - } elseif($token['type'] === HTML5::STARTTAG && - $token['name'] === 'col') { - $this->inTable(array( - 'name' => 'colgroup', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - $this->inColumnGroup($token); - - /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('tbody', 'tfoot', 'thead'))) { - /* Clear the stack back to a table context. */ - $this->clearStackToTableContext($clear); - - /* Insert an HTML element for the token, then switch the insertion - mode to "in table body". */ - $this->insertElement($token); - $this->mode = self::IN_TBODY; - - /* A start tag whose tag name is one of: "td", "th", "tr" */ - } elseif($token['type'] === HTML5::STARTTAG && - in_array($token['name'], array('td', 'th', 'tr'))) { - /* Act as if a start tag token with the tag name "tbody" had been - seen, then reprocess the current token. */ - $this->inTable(array( - 'name' => 'tbody', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - return $this->inTableBody($token); - - /* A start tag whose tag name is "table" */ - } elseif($token['type'] === HTML5::STARTTAG && - $token['name'] === 'table') { - /* Parse error. Act as if an end tag token with the tag name "table" - had been seen, then, if that token wasn't ignored, reprocess the - current token. */ - $this->inTable(array( - 'name' => 'table', - 'type' => HTML5::ENDTAG - )); - - return $this->mainPhase($token); - - /* An end tag whose tag name is "table" */ - } elseif($token['type'] === HTML5::ENDTAG && - $token['name'] === 'table') { - /* If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse error. - Ignore the token. (innerHTML case) */ - if(!$this->elementInScope($token['name'], true)) { - return false; - - /* Otherwise: */ - } else { - /* Generate implied end tags. */ - $this->generateImpliedEndTags(); - - /* Now, if the current node is not a table element, then this - is a parse error. */ - // w/e - - /* Pop elements from this stack until a table element has been - popped from the stack. */ - while(true) { - $current = end($this->stack)->nodeName; - array_pop($this->stack); - - if($current === 'table') { - break; - } - } - - /* Reset the insertion mode appropriately. */ - $this->resetInsertionMode(); - } - - /* An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], - array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', - 'tfoot', 'th', 'thead', 'tr'))) { - // Parse error. Ignore the token. - - /* Anything else */ - } else { - /* Parse error. Process the token as if the insertion mode was "in - body", with the following exception: */ - - /* If the current node is a table, tbody, tfoot, thead, or tr - element, then, whenever a node would be inserted into the current - node, it must instead be inserted into the foster parent element. */ - if(in_array(end($this->stack)->nodeName, - array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { - /* The foster parent element is the parent element of the last - table element in the stack of open elements, if there is a - table element and it has such a parent element. If there is no - table element in the stack of open elements (innerHTML case), - then the foster parent element is the first element in the - stack of open elements (the html element). Otherwise, if there - is a table element in the stack of open elements, but the last - table element in the stack of open elements has no parent, or - its parent node is not an element, then the foster parent - element is the element before the last table element in the - stack of open elements. */ - for($n = count($this->stack) - 1; $n >= 0; $n--) { - if($this->stack[$n]->nodeName === 'table') { - $table = $this->stack[$n]; - break; - } - } - - if(isset($table) && $table->parentNode !== null) { - $this->foster_parent = $table->parentNode; - - } elseif(!isset($table)) { - $this->foster_parent = $this->stack[0]; - - } elseif(isset($table) && ($table->parentNode === null || - $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { - $this->foster_parent = $this->stack[$n - 1]; - } - } - - $this->inBody($token); - } - } - - private function inCaption($token) { - /* An end tag whose tag name is "caption" */ - if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { - /* If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse error. - Ignore the token. (innerHTML case) */ - if(!$this->elementInScope($token['name'], true)) { - // Ignore - - /* Otherwise: */ - } else { - /* Generate implied end tags. */ - $this->generateImpliedEndTags(); - - /* Now, if the current node is not a caption element, then this - is a parse error. */ - // w/e - - /* Pop elements from this stack until a caption element has - been popped from the stack. */ - while(true) { - $node = end($this->stack)->nodeName; - array_pop($this->stack); - - if($node === 'caption') { - break; - } - } - - /* Clear the list of active formatting elements up to the last - marker. */ - $this->clearTheActiveFormattingElementsUpToTheLastMarker(); - - /* Switch the insertion mode to "in table". */ - $this->mode = self::IN_TABLE; - } - - /* A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag - name is "table" */ - } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', - 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG && - $token['name'] === 'table')) { - /* Parse error. Act as if an end tag with the tag name "caption" - had been seen, then, if that token wasn't ignored, reprocess the - current token. */ - $this->inCaption(array( - 'name' => 'caption', - 'type' => HTML5::ENDTAG - )); - - return $this->inTable($token); - - /* An end tag whose tag name is one of: "body", "col", "colgroup", - "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], - array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', - 'thead', 'tr'))) { - // Parse error. Ignore the token. - - /* Anything else */ - } else { - /* Process the token as if the insertion mode was "in body". */ - $this->inBody($token); - } - } - - private function inColumnGroup($token) { - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append the character to the current node. */ - $text = $this->dom->createTextNode($token['data']); - end($this->stack)->appendChild($text); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $comment = $this->dom->createComment($token['data']); - end($this->stack)->appendChild($comment); - - /* A start tag whose tag name is "col" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { - /* Insert a col element for the token. Immediately pop the current - node off the stack of open elements. */ - $this->insertElement($token); - array_pop($this->stack); - - /* An end tag whose tag name is "colgroup" */ - } elseif($token['type'] === HTML5::ENDTAG && - $token['name'] === 'colgroup') { - /* If the current node is the root html element, then this is a - parse error, ignore the token. (innerHTML case) */ - if(end($this->stack)->nodeName === 'html') { - // Ignore - - /* Otherwise, pop the current node (which will be a colgroup - element) from the stack of open elements. Switch the insertion - mode to "in table". */ - } else { - array_pop($this->stack); - $this->mode = self::IN_TABLE; - } - - /* An end tag whose tag name is "col" */ - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { - /* Parse error. Ignore the token. */ - - /* Anything else */ - } else { - /* Act as if an end tag with the tag name "colgroup" had been seen, - and then, if that token wasn't ignored, reprocess the current token. */ - $this->inColumnGroup(array( - 'name' => 'colgroup', - 'type' => HTML5::ENDTAG - )); - - return $this->inTable($token); - } - } - - private function inTableBody($token) { - $clear = array('tbody', 'tfoot', 'thead', 'html'); - - /* A start tag whose tag name is "tr" */ - if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { - /* Clear the stack back to a table body context. */ - $this->clearStackToTableContext($clear); - - /* Insert a tr element for the token, then switch the insertion - mode to "in row". */ - $this->insertElement($token); - $this->mode = self::IN_ROW; - - /* A start tag whose tag name is one of: "th", "td" */ - } elseif($token['type'] === HTML5::STARTTAG && - ($token['name'] === 'th' || $token['name'] === 'td')) { - /* Parse error. Act as if a start tag with the tag name "tr" had - been seen, then reprocess the current token. */ - $this->inTableBody(array( - 'name' => 'tr', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - return $this->inRow($token); - - /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ - } elseif($token['type'] === HTML5::ENDTAG && - in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { - /* If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse error. - Ignore the token. */ - if(!$this->elementInScope($token['name'], true)) { - // Ignore - - /* Otherwise: */ - } else { - /* Clear the stack back to a table body context. */ - $this->clearStackToTableContext($clear); - - /* Pop the current node from the stack of open elements. Switch - the insertion mode to "in table". */ - array_pop($this->stack); - $this->mode = self::IN_TABLE; - } - - /* A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ - } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) || - ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) { - /* If the stack of open elements does not have a tbody, thead, or - tfoot element in table scope, this is a parse error. Ignore the - token. (innerHTML case) */ - if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { - // Ignore. - - /* Otherwise: */ - } else { - /* Clear the stack back to a table body context. */ - $this->clearStackToTableContext($clear); - - /* Act as if an end tag with the same tag name as the current - node ("tbody", "tfoot", or "thead") had been seen, then - reprocess the current token. */ - $this->inTableBody(array( - 'name' => end($this->stack)->nodeName, - 'type' => HTML5::ENDTAG - )); - - return $this->mainPhase($token); - } - - /* An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html", "td", "th", "tr" */ - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], - array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { - /* Parse error. Ignore the token. */ - - /* Anything else */ - } else { - /* Process the token as if the insertion mode was "in table". */ - $this->inTable($token); - } - } - - private function inRow($token) { - $clear = array('tr', 'html'); - - /* A start tag whose tag name is one of: "th", "td" */ - if($token['type'] === HTML5::STARTTAG && - ($token['name'] === 'th' || $token['name'] === 'td')) { - /* Clear the stack back to a table row context. */ - $this->clearStackToTableContext($clear); - - /* Insert an HTML element for the token, then switch the insertion - mode to "in cell". */ - $this->insertElement($token); - $this->mode = self::IN_CELL; - - /* Insert a marker at the end of the list of active formatting - elements. */ - $this->a_formatting[] = self::MARKER; - - /* An end tag whose tag name is "tr" */ - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { - /* If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse error. - Ignore the token. (innerHTML case) */ - if(!$this->elementInScope($token['name'], true)) { - // Ignore. - - /* Otherwise: */ - } else { - /* Clear the stack back to a table row context. */ - $this->clearStackToTableContext($clear); - - /* Pop the current node (which will be a tr element) from the - stack of open elements. Switch the insertion mode to "in table - body". */ - array_pop($this->stack); - $this->mode = self::IN_TBODY; - } - - /* A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) { - /* Act as if an end tag with the tag name "tr" had been seen, then, - if that token wasn't ignored, reprocess the current token. */ - $this->inRow(array( - 'name' => 'tr', - 'type' => HTML5::ENDTAG - )); - - return $this->inCell($token); - - /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ - } elseif($token['type'] === HTML5::ENDTAG && - in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { - /* If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse error. - Ignore the token. */ - if(!$this->elementInScope($token['name'], true)) { - // Ignore. - - /* Otherwise: */ - } else { - /* Otherwise, act as if an end tag with the tag name "tr" had - been seen, then reprocess the current token. */ - $this->inRow(array( - 'name' => 'tr', - 'type' => HTML5::ENDTAG - )); - - return $this->inCell($token); - } - - /* An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html", "td", "th" */ - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], - array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { - /* Parse error. Ignore the token. */ - - /* Anything else */ - } else { - /* Process the token as if the insertion mode was "in table". */ - $this->inTable($token); - } - } - - private function inCell($token) { - /* An end tag whose tag name is one of: "td", "th" */ - if($token['type'] === HTML5::ENDTAG && - ($token['name'] === 'td' || $token['name'] === 'th')) { - /* If the stack of open elements does not have an element in table - scope with the same tag name as that of the token, then this is a - parse error and the token must be ignored. */ - if(!$this->elementInScope($token['name'], true)) { - // Ignore. - - /* Otherwise: */ - } else { - /* Generate implied end tags, except for elements with the same - tag name as the token. */ - $this->generateImpliedEndTags(array($token['name'])); - - /* Now, if the current node is not an element with the same tag - name as the token, then this is a parse error. */ - // k - - /* Pop elements from this stack until an element with the same - tag name as the token has been popped from the stack. */ - while(true) { - $node = end($this->stack)->nodeName; - array_pop($this->stack); - - if($node === $token['name']) { - break; - } - } - - /* Clear the list of active formatting elements up to the last - marker. */ - $this->clearTheActiveFormattingElementsUpToTheLastMarker(); - - /* Switch the insertion mode to "in row". (The current node - will be a tr element at this point.) */ - $this->mode = self::IN_ROW; - } - - /* A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "td", "tfoot", "th", "thead", "tr" */ - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', - 'thead', 'tr'))) { - /* If the stack of open elements does not have a td or th element - in table scope, then this is a parse error; ignore the token. - (innerHTML case) */ - if(!$this->elementInScope(array('td', 'th'), true)) { - // Ignore. - - /* Otherwise, close the cell (see below) and reprocess the current - token. */ - } else { - $this->closeCell(); - return $this->inRow($token); - } - - /* A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "td", "tfoot", "th", "thead", "tr" */ - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', - 'thead', 'tr'))) { - /* If the stack of open elements does not have a td or th element - in table scope, then this is a parse error; ignore the token. - (innerHTML case) */ - if(!$this->elementInScope(array('td', 'th'), true)) { - // Ignore. - - /* Otherwise, close the cell (see below) and reprocess the current - token. */ - } else { - $this->closeCell(); - return $this->inRow($token); - } - - /* An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html" */ - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], - array('body', 'caption', 'col', 'colgroup', 'html'))) { - /* Parse error. Ignore the token. */ - - /* An end tag whose tag name is one of: "table", "tbody", "tfoot", - "thead", "tr" */ - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], - array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { - /* If the stack of open elements does not have an element in table - scope with the same tag name as that of the token (which can only - happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), - then this is a parse error and the token must be ignored. */ - if(!$this->elementInScope($token['name'], true)) { - // Ignore. - - /* Otherwise, close the cell (see below) and reprocess the current - token. */ - } else { - $this->closeCell(); - return $this->inRow($token); - } - - /* Anything else */ - } else { - /* Process the token as if the insertion mode was "in body". */ - $this->inBody($token); - } - } - - private function inSelect($token) { - /* Handle the token as follows: */ - - /* A character token */ - if($token['type'] === HTML5::CHARACTR) { - /* Append the token's character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $this->insertComment($token['data']); - - /* A start tag token whose tag name is "option" */ - } elseif($token['type'] === HTML5::STARTTAG && - $token['name'] === 'option') { - /* If the current node is an option element, act as if an end tag - with the tag name "option" had been seen. */ - if(end($this->stack)->nodeName === 'option') { - $this->inSelect(array( - 'name' => 'option', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* A start tag token whose tag name is "optgroup" */ - } elseif($token['type'] === HTML5::STARTTAG && - $token['name'] === 'optgroup') { - /* If the current node is an option element, act as if an end tag - with the tag name "option" had been seen. */ - if(end($this->stack)->nodeName === 'option') { - $this->inSelect(array( - 'name' => 'option', - 'type' => HTML5::ENDTAG - )); - } - - /* If the current node is an optgroup element, act as if an end tag - with the tag name "optgroup" had been seen. */ - if(end($this->stack)->nodeName === 'optgroup') { - $this->inSelect(array( - 'name' => 'optgroup', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* An end tag token whose tag name is "optgroup" */ - } elseif($token['type'] === HTML5::ENDTAG && - $token['name'] === 'optgroup') { - /* First, if the current node is an option element, and the node - immediately before it in the stack of open elements is an optgroup - element, then act as if an end tag with the tag name "option" had - been seen. */ - $elements_in_stack = count($this->stack); - - if($this->stack[$elements_in_stack - 1]->nodeName === 'option' && - $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') { - $this->inSelect(array( - 'name' => 'option', - 'type' => HTML5::ENDTAG - )); - } - - /* If the current node is an optgroup element, then pop that node - from the stack of open elements. Otherwise, this is a parse error, - ignore the token. */ - if($this->stack[$elements_in_stack - 1] === 'optgroup') { - array_pop($this->stack); - } - - /* An end tag token whose tag name is "option" */ - } elseif($token['type'] === HTML5::ENDTAG && - $token['name'] === 'option') { - /* If the current node is an option element, then pop that node - from the stack of open elements. Otherwise, this is a parse error, - ignore the token. */ - if(end($this->stack)->nodeName === 'option') { - array_pop($this->stack); - } - - /* An end tag whose tag name is "select" */ - } elseif($token['type'] === HTML5::ENDTAG && - $token['name'] === 'select') { - /* If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse error. - Ignore the token. (innerHTML case) */ - if(!$this->elementInScope($token['name'], true)) { - // w/e - - /* Otherwise: */ - } else { - /* Pop elements from the stack of open elements until a select - element has been popped from the stack. */ - while(true) { - $current = end($this->stack)->nodeName; - array_pop($this->stack); - - if($current === 'select') { - break; - } - } - - /* Reset the insertion mode appropriately. */ - $this->resetInsertionMode(); - } - - /* A start tag whose tag name is "select" */ - } elseif($token['name'] === 'select' && - $token['type'] === HTML5::STARTTAG) { - /* Parse error. Act as if the token had been an end tag with the - tag name "select" instead. */ - $this->inSelect(array( - 'name' => 'select', - 'type' => HTML5::ENDTAG - )); - - /* An end tag whose tag name is one of: "caption", "table", "tbody", - "tfoot", "thead", "tr", "td", "th" */ - } elseif(in_array($token['name'], array('caption', 'table', 'tbody', - 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) { - /* Parse error. */ - // w/e - - /* If the stack of open elements has an element in table scope with - the same tag name as that of the token, then act as if an end tag - with the tag name "select" had been seen, and reprocess the token. - Otherwise, ignore the token. */ - if($this->elementInScope($token['name'], true)) { - $this->inSelect(array( - 'name' => 'select', - 'type' => HTML5::ENDTAG - )); - - $this->mainPhase($token); - } - - /* Anything else */ - } else { - /* Parse error. Ignore the token. */ - } - } - - private function afterBody($token) { - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Process the token as it would be processed if the insertion mode - was "in body". */ - $this->inBody($token); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the first element in the stack of open - elements (the html element), with the data attribute set to the - data given in the comment token. */ - $comment = $this->dom->createComment($token['data']); - $this->stack[0]->appendChild($comment); - - /* An end tag with the tag name "html" */ - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { - /* If the parser was originally created in order to handle the - setting of an element's innerHTML attribute, this is a parse error; - ignore the token. (The element will be an html element in this - case.) (innerHTML case) */ - - /* Otherwise, switch to the trailing end phase. */ - $this->phase = self::END_PHASE; - - /* Anything else */ - } else { - /* Parse error. Set the insertion mode to "in body" and reprocess - the token. */ - $this->mode = self::IN_BODY; - return $this->inBody($token); - } - } - - private function inFrameset($token) { - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append the character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $this->insertComment($token['data']); - - /* A start tag with the tag name "frameset" */ - } elseif($token['name'] === 'frameset' && - $token['type'] === HTML5::STARTTAG) { - $this->insertElement($token); - - /* An end tag with the tag name "frameset" */ - } elseif($token['name'] === 'frameset' && - $token['type'] === HTML5::ENDTAG) { - /* If the current node is the root html element, then this is a - parse error; ignore the token. (innerHTML case) */ - if(end($this->stack)->nodeName === 'html') { - // Ignore - - } else { - /* Otherwise, pop the current node from the stack of open - elements. */ - array_pop($this->stack); - - /* If the parser was not originally created in order to handle - the setting of an element's innerHTML attribute (innerHTML case), - and the current node is no longer a frameset element, then change - the insertion mode to "after frameset". */ - $this->mode = self::AFTR_FRAME; - } - - /* A start tag with the tag name "frame" */ - } elseif($token['name'] === 'frame' && - $token['type'] === HTML5::STARTTAG) { - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Immediately pop the current node off the stack of open elements. */ - array_pop($this->stack); - - /* A start tag with the tag name "noframes" */ - } elseif($token['name'] === 'noframes' && - $token['type'] === HTML5::STARTTAG) { - /* Process the token as if the insertion mode had been "in body". */ - $this->inBody($token); - - /* Anything else */ - } else { - /* Parse error. Ignore the token. */ - } - } - - private function afterFrameset($token) { - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append the character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $this->insertComment($token['data']); - - /* An end tag with the tag name "html" */ - } elseif($token['name'] === 'html' && - $token['type'] === HTML5::ENDTAG) { - /* Switch to the trailing end phase. */ - $this->phase = self::END_PHASE; - - /* A start tag with the tag name "noframes" */ - } elseif($token['name'] === 'noframes' && - $token['type'] === HTML5::STARTTAG) { - /* Process the token as if the insertion mode had been "in body". */ - $this->inBody($token); - - /* Anything else */ - } else { - /* Parse error. Ignore the token. */ - } - } - - private function trailingEndPhase($token) { - /* After the main phase, as each token is emitted from the tokenisation - stage, it must be processed as described in this section. */ - - /* A DOCTYPE token */ - if($token['type'] === HTML5::DOCTYPE) { - // Parse error. Ignore the token. - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. */ - $comment = $this->dom->createComment($token['data']); - $this->dom->appendChild($comment); - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - } elseif($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Process the token as it would be processed in the main phase. */ - $this->mainPhase($token); - - /* A character token that is not one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE. Or a start tag token. Or an end tag token. */ - } elseif(($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || - $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) { - /* Parse error. Switch back to the main phase and reprocess the - token. */ - $this->phase = self::MAIN_PHASE; - return $this->mainPhase($token); - - /* An end-of-file token */ - } elseif($token['type'] === HTML5::EOF) { - /* OMG DONE!! */ - } - } - - private function insertElement($token, $append = true) { - $el = $this->dom->createElement($token['name']); - - foreach($token['attr'] as $attr) { - if(!$el->hasAttribute($attr['name'])) { - $el->setAttribute($attr['name'], $attr['value']); - } - } - - $this->appendToRealParent($el); - $this->stack[] = $el; - - return $el; - } - - private function insertText($data) { - $text = $this->dom->createTextNode($data); - $this->appendToRealParent($text); - } - - private function insertComment($data) { - $comment = $this->dom->createComment($data); - $this->appendToRealParent($comment); - } - - private function appendToRealParent($node) { - if($this->foster_parent === null) { - end($this->stack)->appendChild($node); - - } elseif($this->foster_parent !== null) { - /* If the foster parent element is the parent element of the - last table element in the stack of open elements, then the new - node must be inserted immediately before the last table element - in the stack of open elements in the foster parent element; - otherwise, the new node must be appended to the foster parent - element. */ - for($n = count($this->stack) - 1; $n >= 0; $n--) { - if($this->stack[$n]->nodeName === 'table' && - $this->stack[$n]->parentNode !== null) { - $table = $this->stack[$n]; - break; - } - } - - if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) - $this->foster_parent->insertBefore($node, $table); - else - $this->foster_parent->appendChild($node); - - $this->foster_parent = null; - } - } - - private function elementInScope($el, $table = false) { - if(is_array($el)) { - foreach($el as $element) { - if($this->elementInScope($element, $table)) { - return true; - } - } - - return false; - } - - $leng = count($this->stack); - - for($n = 0; $n < $leng; $n++) { - /* 1. Initialise node to be the current node (the bottommost node of - the stack). */ - $node = $this->stack[$leng - 1 - $n]; - - if($node->tagName === $el) { - /* 2. If node is the target node, terminate in a match state. */ - return true; - - } elseif($node->tagName === 'table') { - /* 3. Otherwise, if node is a table element, terminate in a failure - state. */ - return false; - - } elseif($table === true && in_array($node->tagName, array('caption', 'td', - 'th', 'button', 'marquee', 'object'))) { - /* 4. Otherwise, if the algorithm is the "has an element in scope" - variant (rather than the "has an element in table scope" variant), - and node is one of the following, terminate in a failure state. */ - return false; - - } elseif($node === $node->ownerDocument->documentElement) { - /* 5. Otherwise, if node is an html element (root element), terminate - in a failure state. (This can only happen if the node is the topmost - node of the stack of open elements, and prevents the next step from - being invoked if there are no more elements in the stack.) */ - return false; - } - - /* Otherwise, set node to the previous entry in the stack of open - elements and return to step 2. (This will never fail, since the loop - will always terminate in the previous step if the top of the stack - is reached.) */ - } - } - - private function reconstructActiveFormattingElements() { - /* 1. If there are no entries in the list of active formatting elements, - then there is nothing to reconstruct; stop this algorithm. */ - $formatting_elements = count($this->a_formatting); - - if($formatting_elements === 0) { - return false; - } - - /* 3. Let entry be the last (most recently added) element in the list - of active formatting elements. */ - $entry = end($this->a_formatting); - - /* 2. If the last (most recently added) entry in the list of active - formatting elements is a marker, or if it is an element that is in the - stack of open elements, then there is nothing to reconstruct; stop this - algorithm. */ - if($entry === self::MARKER || in_array($entry, $this->stack, true)) { - return false; - } - - for($a = $formatting_elements - 1; $a >= 0; true) { - /* 4. If there are no entries before entry in the list of active - formatting elements, then jump to step 8. */ - if($a === 0) { - $step_seven = false; - break; - } - - /* 5. Let entry be the entry one earlier than entry in the list of - active formatting elements. */ - $a--; - $entry = $this->a_formatting[$a]; - - /* 6. If entry is neither a marker nor an element that is also in - thetack of open elements, go to step 4. */ - if($entry === self::MARKER || in_array($entry, $this->stack, true)) { - break; - } - } - - while(true) { - /* 7. Let entry be the element one later than entry in the list of - active formatting elements. */ - if(isset($step_seven) && $step_seven === true) { - $a++; - $entry = $this->a_formatting[$a]; - } - - /* 8. Perform a shallow clone of the element entry to obtain clone. */ - $clone = $entry->cloneNode(); - - /* 9. Append clone to the current node and push it onto the stack - of open elements so that it is the new current node. */ - end($this->stack)->appendChild($clone); - $this->stack[] = $clone; - - /* 10. Replace the entry for entry in the list with an entry for - clone. */ - $this->a_formatting[$a] = $clone; - - /* 11. If the entry for clone in the list of active formatting - elements is not the last entry in the list, return to step 7. */ - if(end($this->a_formatting) !== $clone) { - $step_seven = true; - } else { - break; - } - } - } - - private function clearTheActiveFormattingElementsUpToTheLastMarker() { - /* When the steps below require the UA to clear the list of active - formatting elements up to the last marker, the UA must perform the - following steps: */ - - while(true) { - /* 1. Let entry be the last (most recently added) entry in the list - of active formatting elements. */ - $entry = end($this->a_formatting); - - /* 2. Remove entry from the list of active formatting elements. */ - array_pop($this->a_formatting); - - /* 3. If entry was a marker, then stop the algorithm at this point. - The list has been cleared up to the last marker. */ - if($entry === self::MARKER) { - break; - } - } - } - - private function generateImpliedEndTags(array $exclude = array()) { - /* When the steps below require the UA to generate implied end tags, - then, if the current node is a dd element, a dt element, an li element, - a p element, a td element, a th element, or a tr element, the UA must - act as if an end tag with the respective tag name had been seen and - then generate implied end tags again. */ - $node = end($this->stack); - $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); - - while(in_array(end($this->stack)->nodeName, $elements)) { - array_pop($this->stack); - } - } - - private function getElementCategory($name) { - if(in_array($name, $this->special)) - return self::SPECIAL; - - elseif(in_array($name, $this->scoping)) - return self::SCOPING; - - elseif(in_array($name, $this->formatting)) - return self::FORMATTING; - - else - return self::PHRASING; - } - - private function clearStackToTableContext($elements) { - /* When the steps above require the UA to clear the stack back to a - table context, it means that the UA must, while the current node is not - a table element or an html element, pop elements from the stack of open - elements. If this causes any elements to be popped from the stack, then - this is a parse error. */ - while(true) { - $node = end($this->stack)->nodeName; - - if(in_array($node, $elements)) { - break; - } else { - array_pop($this->stack); - } - } - } - - private function resetInsertionMode() { - /* 1. Let last be false. */ - $last = false; - $leng = count($this->stack); - - for($n = $leng - 1; $n >= 0; $n--) { - /* 2. Let node be the last node in the stack of open elements. */ - $node = $this->stack[$n]; - - /* 3. If node is the first node in the stack of open elements, then - set last to true. If the element whose innerHTML attribute is being - set is neither a td element nor a th element, then set node to the - element whose innerHTML attribute is being set. (innerHTML case) */ - if($this->stack[0]->isSameNode($node)) { - $last = true; - } - - /* 4. If node is a select element, then switch the insertion mode to - "in select" and abort these steps. (innerHTML case) */ - if($node->nodeName === 'select') { - $this->mode = self::IN_SELECT; - break; - - /* 5. If node is a td or th element, then switch the insertion mode - to "in cell" and abort these steps. */ - } elseif($node->nodeName === 'td' || $node->nodeName === 'th') { - $this->mode = self::IN_CELL; - break; - - /* 6. If node is a tr element, then switch the insertion mode to - "in row" and abort these steps. */ - } elseif($node->nodeName === 'tr') { - $this->mode = self::IN_ROW; - break; - - /* 7. If node is a tbody, thead, or tfoot element, then switch the - insertion mode to "in table body" and abort these steps. */ - } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { - $this->mode = self::IN_TBODY; - break; - - /* 8. If node is a caption element, then switch the insertion mode - to "in caption" and abort these steps. */ - } elseif($node->nodeName === 'caption') { - $this->mode = self::IN_CAPTION; - break; - - /* 9. If node is a colgroup element, then switch the insertion mode - to "in column group" and abort these steps. (innerHTML case) */ - } elseif($node->nodeName === 'colgroup') { - $this->mode = self::IN_CGROUP; - break; - - /* 10. If node is a table element, then switch the insertion mode - to "in table" and abort these steps. */ - } elseif($node->nodeName === 'table') { - $this->mode = self::IN_TABLE; - break; - - /* 11. If node is a head element, then switch the insertion mode - to "in body" ("in body"! not "in head"!) and abort these steps. - (innerHTML case) */ - } elseif($node->nodeName === 'head') { - $this->mode = self::IN_BODY; - break; - - /* 12. If node is a body element, then switch the insertion mode to - "in body" and abort these steps. */ - } elseif($node->nodeName === 'body') { - $this->mode = self::IN_BODY; - break; - - /* 13. If node is a frameset element, then switch the insertion - mode to "in frameset" and abort these steps. (innerHTML case) */ - } elseif($node->nodeName === 'frameset') { - $this->mode = self::IN_FRAME; - break; - - /* 14. If node is an html element, then: if the head element - pointer is null, switch the insertion mode to "before head", - otherwise, switch the insertion mode to "after head". In either - case, abort these steps. (innerHTML case) */ - } elseif($node->nodeName === 'html') { - $this->mode = ($this->head_pointer === null) - ? self::BEFOR_HEAD - : self::AFTER_HEAD; - - break; - - /* 15. If last is true, then set the insertion mode to "in body" - and abort these steps. (innerHTML case) */ - } elseif($last) { - $this->mode = self::IN_BODY; - break; - } - } - } - - private function closeCell() { - /* If the stack of open elements has a td or th element in table scope, - then act as if an end tag token with that tag name had been seen. */ - foreach(array('td', 'th') as $cell) { - if($this->elementInScope($cell, true)) { - $this->inCell(array( - 'name' => $cell, - 'type' => HTML5::ENDTAG - )); - - break; - } - } - } - - public function save() { - return $this->dom; - } -} -?> diff --git a/lib/htmlpurifier/maintenance/add-vimline.php b/lib/htmlpurifier/maintenance/add-vimline.php deleted file mode 100644 index 34a177f21..000000000 --- a/lib/htmlpurifier/maintenance/add-vimline.php +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -assertCli(); - -/** - * @file - * Adds vimline to files - */ - -chdir(dirname(__FILE__) . '/..'); -$FS = new FSTools(); - -$vimline = 'vim: et sw=4 sts=4'; - -$files = $FS->globr('.', '*'); -foreach ($files as $file) { - if ( - !is_file($file) || - prefix_is('./docs/doxygen', $file) || - prefix_is('./library/standalone', $file) || - prefix_is('./docs/specimens', $file) || - postfix_is('.ser', $file) || - postfix_is('.tgz', $file) || - postfix_is('.patch', $file) || - postfix_is('.dtd', $file) || - postfix_is('.ent', $file) || - postfix_is('.png', $file) || - postfix_is('.ico', $file) || - // wontfix - postfix_is('.vtest', $file) || - postfix_is('.svg', $file) || - postfix_is('.phpt', $file) || - postfix_is('VERSION', $file) || - postfix_is('WHATSNEW', $file) || - postfix_is('FOCUS', $file) || - postfix_is('configdoc/usage.xml', $file) || - postfix_is('library/HTMLPurifier.includes.php', $file) || - postfix_is('library/HTMLPurifier.safe-includes.php', $file) || - postfix_is('smoketests/xssAttacks.xml', $file) || - // phpt files - postfix_is('.diff', $file) || - postfix_is('.exp', $file) || - postfix_is('.log', $file) || - postfix_is('.out', $file) || - - $file == './library/HTMLPurifier/Lexer/PH5P.php' || - $file == './maintenance/PH5P.php' - ) continue; - $ext = strrchr($file, '.'); - if ( - postfix_is('README', $file) || - postfix_is('LICENSE', $file) || - postfix_is('CREDITS', $file) || - postfix_is('INSTALL', $file) || - postfix_is('NEWS', $file) || - postfix_is('TODO', $file) || - postfix_is('WYSIWYG', $file) || - postfix_is('Changelog', $file) - ) $ext = '.txt'; - if (postfix_is('Doxyfile', $file)) $ext = 'Doxyfile'; - if (postfix_is('.php.in', $file)) $ext = '.php'; - $no_nl = false; - switch ($ext) { - case '.php': - case '.inc': - case '.js': - $line = '// %s'; - break; - case '.html': - case '.xsl': - case '.xml': - case '.htc': - $line = "<!-- %s\n-->"; - break; - case '.htmlt': - $no_nl = true; - $line = '--# %s'; - break; - case '.ini': - $line = '; %s'; - break; - case '.css': - $line = '/* %s */'; - break; - case '.bat': - $line = 'rem %s'; - break; - case '.txt': - case '.utf8': - if ( - prefix_is('./library/HTMLPurifier/ConfigSchema', $file) || - prefix_is('./smoketests/test-schema', $file) || - prefix_is('./tests/HTMLPurifier/StringHashParser', $file) - ) { - $no_nl = true; - $line = '--# %s'; - } else { - $line = ' %s'; - } - break; - case 'Doxyfile': - $line = '# %s'; - break; - default: - throw new Exception('Unknown file: ' . $file); - } - - echo "$file\n"; - $contents = file_get_contents($file); - - $regex = '~' . str_replace('%s', 'vim: .+', preg_quote($line, '~')) . '~m'; - $contents = preg_replace($regex, '', $contents); - - $contents = rtrim($contents); - - if (strpos($contents, "\r\n") !== false) $nl = "\r\n"; - elseif (strpos($contents, "\n") !== false) $nl = "\n"; - elseif (strpos($contents, "\r") !== false) $nl = "\r"; - else $nl = PHP_EOL; - - if (!$no_nl) $contents .= $nl; - $contents .= $nl . str_replace('%s', $vimline, $line) . $nl; - - file_put_contents($file, $contents); - -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/common.php b/lib/htmlpurifier/maintenance/common.php deleted file mode 100644 index 888c7daf9..000000000 --- a/lib/htmlpurifier/maintenance/common.php +++ /dev/null @@ -1,22 +0,0 @@ -<?php - -function assertCli() { - if (php_sapi_name() != 'cli' && !getenv('PHP_IS_CLI')) { - echo 'Script cannot be called from web-browser (if you are indeed calling via cli, -set environment variable PHP_IS_CLI to work around this).'; - exit(1); - } -} - -function prefix_is($comp, $subject) { - return strncmp($comp, $subject, strlen($comp)) === 0; -} - -function postfix_is($comp, $subject) { - return strlen($subject) < $comp ? false : substr($subject, -strlen($comp)) === $comp; -} - -// Load useful stuff like FSTools -require_once dirname(__FILE__) . '/../extras/HTMLPurifierExtras.auto.php'; - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/compile-doxygen.sh b/lib/htmlpurifier/maintenance/compile-doxygen.sh deleted file mode 100755 index ecd1127fd..000000000 --- a/lib/htmlpurifier/maintenance/compile-doxygen.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -cd .. -mkdir docs/doxygen -rm -Rf docs/doxygen/* -doxygen 1>docs/doxygen/info.log 2>docs/doxygen/errors.log -if [ "$?" != 0 ]; then - cat docs/doxygen/errors.log - exit -fi -cd docs -tar czf doxygen.tgz doxygen diff --git a/lib/htmlpurifier/maintenance/config-scanner.php b/lib/htmlpurifier/maintenance/config-scanner.php deleted file mode 100644 index 2b4efa3f8..000000000 --- a/lib/htmlpurifier/maintenance/config-scanner.php +++ /dev/null @@ -1,153 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -require_once '../library/HTMLPurifier.auto.php'; -assertCli(); - -if (version_compare(PHP_VERSION, '5.2.2', '<')) { - echo "This script requires PHP 5.2.2 or later, for tokenizer line numbers."; - exit(1); -} - -/** - * @file - * Scans HTML Purifier source code for $config tokens and records the - * directive being used; configdoc can use this info later. - * - * Currently, this just dumps all the info onto the console. Eventually, it - * will create an XML file that our XSLT transform can use. - */ - -$FS = new FSTools(); -chdir(dirname(__FILE__) . '/../library/'); -$raw_files = $FS->globr('.', '*.php'); -$files = array(); -foreach ($raw_files as $file) { - $file = substr($file, 2); // rm leading './' - if (strncmp('standalone/', $file, 11) === 0) continue; // rm generated files - if (substr_count($file, '.') > 1) continue; // rm meta files - $files[] = $file; -} - -/** - * Moves the $i cursor to the next non-whitespace token - */ -function consumeWhitespace($tokens, &$i) { - do {$i++;} while (is_array($tokens[$i]) && $tokens[$i][0] === T_WHITESPACE); -} - -/** - * Tests whether or not a token is a particular type. There are three run-cases: - * - ($token, $expect_token): tests if the token is $expect_token type; - * - ($token, $expect_value): tests if the token is the string $expect_value; - * - ($token, $expect_token, $expect_value): tests if token is $expect_token type, and - * its string representation is $expect_value - */ -function testToken($token, $value_or_token, $value = null) { - if (is_null($value)) { - if (is_int($value_or_token)) return is_array($token) && $token[0] === $value_or_token; - else return $token === $value_or_token; - } else { - return is_array($token) && $token[0] === $value_or_token && $token[1] === $value; - } -} - -$counter = 0; -$full_counter = 0; -$tracker = array(); - -foreach ($files as $file) { - $tokens = token_get_all(file_get_contents($file)); - $file = str_replace('\\', '/', $file); - for ($i = 0, $c = count($tokens); $i < $c; $i++) { - $ok = false; - // Match $config - if (!$ok && testToken($tokens[$i], T_VARIABLE, '$config')) $ok = true; - // Match $this->config - while (!$ok && testToken($tokens[$i], T_VARIABLE, '$this')) { - consumeWhitespace($tokens, $i); - if (!testToken($tokens[$i], T_OBJECT_OPERATOR)) break; - consumeWhitespace($tokens, $i); - if (testToken($tokens[$i], T_STRING, 'config')) $ok = true; - break; - } - if (!$ok) continue; - - $ok = false; - for($i++; $i < $c; $i++) { - if ($tokens[$i] === ',' || $tokens[$i] === ')' || $tokens[$i] === ';') { - break; - } - if (is_string($tokens[$i])) continue; - if ($tokens[$i][0] === T_OBJECT_OPERATOR) { - $ok = true; - break; - } - } - if (!$ok) continue; - - $line = $tokens[$i][2]; - - consumeWhitespace($tokens, $i); - if (!testToken($tokens[$i], T_STRING, 'get')) continue; - - consumeWhitespace($tokens, $i); - if (!testToken($tokens[$i], '(')) continue; - - $full_counter++; - - $matched = false; - do { - - // What we currently don't match are batch retrievals, and - // wildcard retrievals. This data might be useful in the future, - // which is why we have a do {} while loop that doesn't actually - // do anything. - - consumeWhitespace($tokens, $i); - if (!testToken($tokens[$i], T_CONSTANT_ENCAPSED_STRING)) continue; - $id = substr($tokens[$i][1], 1, -1); - - $counter++; - $matched = true; - - if (!isset($tracker[$id])) $tracker[$id] = array(); - if (!isset($tracker[$id][$file])) $tracker[$id][$file] = array(); - $tracker[$id][$file][] = $line; - - } while (0); - - //echo "$file:$line uses $namespace.$directive\n"; - } -} - -echo "\n$counter/$full_counter instances of \$config or \$this->config found in source code.\n"; - -echo "Generating XML... "; - -$xw = new XMLWriter(); -$xw->openURI('../configdoc/usage.xml'); -$xw->setIndent(true); -$xw->startDocument('1.0', 'UTF-8'); -$xw->startElement('usage'); -foreach ($tracker as $id => $files) { - $xw->startElement('directive'); - $xw->writeAttribute('id', $id); - foreach ($files as $file => $lines) { - $xw->startElement('file'); - $xw->writeAttribute('name', $file); - foreach ($lines as $line) { - $xw->writeElement('line', $line); - } - $xw->endElement(); - } - $xw->endElement(); -} -$xw->endElement(); -$xw->flush(); - -echo "done!\n"; - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/flush-definition-cache.php b/lib/htmlpurifier/maintenance/flush-definition-cache.php deleted file mode 100755 index 138badb65..000000000 --- a/lib/htmlpurifier/maintenance/flush-definition-cache.php +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -assertCli(); - -/** - * @file - * Flushes the definition serial cache. This file should be - * called if changes to any subclasses of HTMLPurifier_Definition - * or related classes (such as HTMLPurifier_HTMLModule) are made. This - * may also be necessary if you've modified a customized version. - * - * @param Accepts one argument, cache type to flush; otherwise flushes all - * the caches. - */ - -echo "Flushing cache... \n"; - -require_once(dirname(__FILE__) . '/../library/HTMLPurifier.auto.php'); - -$config = HTMLPurifier_Config::createDefault(); - -$names = array('HTML', 'CSS', 'URI', 'Test'); -if (isset($argv[1])) { - if (in_array($argv[1], $names)) { - $names = array($argv[1]); - } else { - throw new Exception("Cache parameter {$argv[1]} is not a valid cache"); - } -} - -foreach ($names as $name) { - echo " - Flushing $name\n"; - $cache = new HTMLPurifier_DefinitionCache_Serializer($name); - $cache->flush($config); -} - -echo "Cache flushed successfully.\n"; - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/flush.php b/lib/htmlpurifier/maintenance/flush.php deleted file mode 100644 index 6dada93f4..000000000 --- a/lib/htmlpurifier/maintenance/flush.php +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -assertCli(); - -/** - * @file - * Runs all generation/flush cache scripts to ensure that somewhat volatile - * generated files are up-to-date. - */ - -function e($cmd) { - echo "\$ $cmd\n"; - passthru($cmd, $status); - echo "\n"; - if ($status) exit($status); -} - -$php = empty($_SERVER['argv'][1]) ? 'php' : $_SERVER['argv'][1]; - -e($php . ' generate-includes.php'); -e($php . ' generate-schema-cache.php'); -e($php . ' flush-definition-cache.php'); -e($php . ' generate-standalone.php'); -e($php . ' config-scanner.php'); - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/generate-entity-file.php b/lib/htmlpurifier/maintenance/generate-entity-file.php deleted file mode 100755 index dee8e61e7..000000000 --- a/lib/htmlpurifier/maintenance/generate-entity-file.php +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -assertCli(); - -/** - * @file - * Parses *.ent files into an entity lookup table, and then serializes and - * writes the whole kaboodle to a file. The resulting file is cached so - * that this script does not need to be run. This script should rarely, - * if ever, be run, since HTML's entities are fairly immutable. - */ - -// here's where the entity files are located, assuming working directory -// is the same as the location of this PHP file. Needs trailing slash. -$entity_dir = '../docs/entities/'; - -// defines the output file for the serialized content. -$output_file = '../library/HTMLPurifier/EntityLookup/entities.ser'; - -// courtesy of a PHP manual comment -function unichr($dec) { - if ($dec < 128) { - $utf = chr($dec); - } else if ($dec < 2048) { - $utf = chr(192 + (($dec - ($dec % 64)) / 64)); - $utf .= chr(128 + ($dec % 64)); - } else { - $utf = chr(224 + (($dec - ($dec % 4096)) / 4096)); - $utf .= chr(128 + ((($dec % 4096) - ($dec % 64)) / 64)); - $utf .= chr(128 + ($dec % 64)); - } - return $utf; -} - -if ( !is_dir($entity_dir) ) exit("Fatal Error: Can't find entity directory.\n"); -if ( file_exists($output_file) ) exit("Fatal Error: output file already exists.\n"); - -$dh = @opendir($entity_dir); -if ( !$dh ) exit("Fatal Error: Cannot read entity directory.\n"); - -$entity_files = array(); -while (($file = readdir($dh)) !== false) { - if (@$file[0] === '.') continue; - if (substr(strrchr($file, "."), 1) !== 'ent') continue; - $entity_files[] = $file; -} -closedir($dh); - -if ( !$entity_files ) exit("Fatal Error: No entity files to parse.\n"); - -$entity_table = array(); -$regexp = '/<!ENTITY\s+([A-Za-z0-9]+)\s+"&#(?:38;#)?([0-9]+);">/'; - -foreach ( $entity_files as $file ) { - $contents = file_get_contents($entity_dir . $file); - $matches = array(); - preg_match_all($regexp, $contents, $matches, PREG_SET_ORDER); - foreach ($matches as $match) { - $entity_table[$match[1]] = unichr($match[2]); - } -} - -$output = serialize($entity_table); - -$fh = fopen($output_file, 'w'); -fwrite($fh, $output); -fclose($fh); - -echo "Completed successfully."; - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/generate-includes.php b/lib/htmlpurifier/maintenance/generate-includes.php deleted file mode 100644 index 498f49b91..000000000 --- a/lib/htmlpurifier/maintenance/generate-includes.php +++ /dev/null @@ -1,190 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -require_once '../tests/path2class.func.php'; -require_once '../library/HTMLPurifier/Bootstrap.php'; -assertCli(); - -/** - * @file - * Generates an include stub for users who do not want to use the autoloader. - * When new files are added to HTML Purifier's main codebase, this file should - * be called. - */ - -chdir(dirname(__FILE__) . '/../library/'); -$FS = new FSTools(); - -$exclude_dirs = array( - 'HTMLPurifier/Language/', - 'HTMLPurifier/ConfigSchema/', - 'HTMLPurifier/Filter/', - 'HTMLPurifier/Printer/', - /* These should be excluded, but need to have ConfigSchema support first - - */ -); -$exclude_files = array( - 'HTMLPurifier/Lexer/PEARSax3.php', - 'HTMLPurifier/Lexer/PH5P.php', - 'HTMLPurifier/Printer.php', -); - -// Determine what files need to be included: -echo 'Scanning for files... '; -$raw_files = $FS->globr('.', '*.php'); -if (!$raw_files) throw new Exception('Did not find any PHP source files'); -$files = array(); -foreach ($raw_files as $file) { - $file = substr($file, 2); // rm leading './' - if (strncmp('standalone/', $file, 11) === 0) continue; // rm generated files - if (substr_count($file, '.') > 1) continue; // rm meta files - $ok = true; - foreach ($exclude_dirs as $dir) { - if (strncmp($dir, $file, strlen($dir)) === 0) { - $ok = false; - break; - } - } - if (!$ok) continue; // rm excluded directories - if (in_array($file, $exclude_files)) continue; // rm excluded files - $files[] = $file; -} -echo "done!\n"; - -// Reorder list so that dependencies are included first: - -/** - * Returns a lookup array of dependencies for a file. - * - * @note This function expects that format $name extends $parent on one line - * - * @param $file - * File to check dependencies of. - * @return - * Lookup array of files the file is dependent on, sorted accordingly. - */ -function get_dependency_lookup($file) { - static $cache = array(); - if (isset($cache[$file])) return $cache[$file]; - if (!file_exists($file)) { - echo "File doesn't exist: $file\n"; - return array(); - } - $fh = fopen($file, 'r'); - $deps = array(); - while (!feof($fh)) { - $line = fgets($fh); - if (strncmp('class', $line, 5) === 0) { - // The implementation here is fragile and will break if we attempt - // to use interfaces. Beware! - $arr = explode(' extends ', trim($line, ' {'."\n\r"), 2); - if (count($arr) < 2) break; - $parent = $arr[1]; - $dep_file = HTMLPurifier_Bootstrap::getPath($parent); - if (!$dep_file) break; - $deps[$dep_file] = true; - break; - } - } - fclose($fh); - foreach (array_keys($deps) as $file) { - // Extra dependencies must come *before* base dependencies - $deps = get_dependency_lookup($file) + $deps; - } - $cache[$file] = $deps; - return $deps; -} - -/** - * Sorts files based on dependencies. This function is lazy and will not - * group files with dependencies together; it will merely ensure that a file - * is never included before its dependencies are. - * - * @param $files - * Files array to sort. - * @return - * Sorted array ($files is not modified by reference!) - */ -function dep_sort($files) { - $ret = array(); - $cache = array(); - foreach ($files as $file) { - if (isset($cache[$file])) continue; - $deps = get_dependency_lookup($file); - foreach (array_keys($deps) as $dep) { - if (!isset($cache[$dep])) { - $ret[] = $dep; - $cache[$dep] = true; - } - } - $cache[$file] = true; - $ret[] = $file; - } - return $ret; -} - -$files = dep_sort($files); - -// Build the actual include stub: - -$version = trim(file_get_contents('../VERSION')); - -// stub -$php = "<?php - -/** - * @file - * This file was auto-generated by generate-includes.php and includes all of - * the core files required by HTML Purifier. Use this if performance is a - * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS - * FILE, changes will be overwritten the next time the script is run. - * - * @version $version - * - * @warning - * You must *not* include any other HTML Purifier files before this file, - * because 'require' not 'require_once' is used. - * - * @warning - * This file requires that the include path contains the HTML Purifier - * library directory; this is not auto-set. - */ - -"; - -foreach ($files as $file) { - $php .= "require '$file';" . PHP_EOL; -} - -echo "Writing HTMLPurifier.includes.php... "; -file_put_contents('HTMLPurifier.includes.php', $php); -echo "done!\n"; - -$php = "<?php - -/** - * @file - * This file was auto-generated by generate-includes.php and includes all of - * the core files required by HTML Purifier. This is a convenience stub that - * includes all files using dirname(__FILE__) and require_once. PLEASE DO NOT - * EDIT THIS FILE, changes will be overwritten the next time the script is run. - * - * Changes to include_path are not necessary. - */ - -\$__dir = dirname(__FILE__); - -"; - -foreach ($files as $file) { - $php .= "require_once \$__dir . '/$file';" . PHP_EOL; -} - -echo "Writing HTMLPurifier.safe-includes.php... "; -file_put_contents('HTMLPurifier.safe-includes.php', $php); -echo "done!\n"; - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/generate-ph5p-patch.php b/lib/htmlpurifier/maintenance/generate-ph5p-patch.php deleted file mode 100644 index c92a7d211..000000000 --- a/lib/htmlpurifier/maintenance/generate-ph5p-patch.php +++ /dev/null @@ -1,22 +0,0 @@ -<?php - -/** - * @file - * This file compares our version of PH5P with Jero's original version, and - * generates a patch of the differences. This script should be run whenever - * library/HTMLPurifier/Lexer/PH5P.php is modified. - */ - -$orig = realpath(dirname(__FILE__) . '/PH5P.php'); -$new = realpath(dirname(__FILE__) . '/../library/HTMLPurifier/Lexer/PH5P.php'); -$newt = dirname(__FILE__) . '/PH5P.new.php'; // temporary file - -// minor text-processing of new file to get into same format as original -$new_src = file_get_contents($new); -$new_src = '<?php' . PHP_EOL . substr($new_src, strpos($new_src, 'class HTML5 {')); - -file_put_contents($newt, $new_src); -shell_exec("diff -u \"$orig\" \"$newt\" > PH5P.patch"); -unlink($newt); - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/generate-schema-cache.php b/lib/htmlpurifier/maintenance/generate-schema-cache.php deleted file mode 100644 index 339ff12da..000000000 --- a/lib/htmlpurifier/maintenance/generate-schema-cache.php +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/php -<?php - -require_once dirname(__FILE__) . '/common.php'; -require_once dirname(__FILE__) . '/../library/HTMLPurifier.auto.php'; -assertCli(); - -/** - * @file - * Generates a schema cache file, saving it to - * library/HTMLPurifier/ConfigSchema/schema.ser. - * - * This should be run when new configuration options are added to - * HTML Purifier. A cached version is available via the repository - * so this does not normally have to be regenerated. - * - * If you have a directory containing custom configuration schema files, - * you can simple add a path to that directory as a parameter to - * this, and they will get included. - */ - -$target = dirname(__FILE__) . '/../library/HTMLPurifier/ConfigSchema/schema.ser'; - -$builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder(); -$interchange = new HTMLPurifier_ConfigSchema_Interchange(); - -$builder->buildDir($interchange); - -$loader = dirname(__FILE__) . '/../config-schema.php'; -if (file_exists($loader)) include $loader; -foreach ($_SERVER['argv'] as $i => $dir) { - if ($i === 0) continue; - $builder->buildDir($interchange, realpath($dir)); -} - -$interchange->validate(); - -$schema_builder = new HTMLPurifier_ConfigSchema_Builder_ConfigSchema(); -$schema = $schema_builder->build($interchange); - -echo "Saving schema... "; -file_put_contents($target, serialize($schema)); -echo "done!\n"; - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/generate-standalone.php b/lib/htmlpurifier/maintenance/generate-standalone.php deleted file mode 100755 index e67f07124..000000000 --- a/lib/htmlpurifier/maintenance/generate-standalone.php +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -assertCli(); - -/** - * @file - * Compiles all of HTML Purifier's library files into one big file - * named HTMLPurifier.standalone.php. This is usually called during the - * release process. - */ - -/** - * Global hash that tracks already loaded includes - */ -$GLOBALS['loaded'] = array(); - -/** - * Custom FSTools for this script that overloads some behavior - * @warning The overloading of copy() is not necessarily global for - * this script. Watch out! - */ -class MergeLibraryFSTools extends FSTools -{ - function copyable($entry) { - // Skip hidden files - if ($entry[0] == '.') { - return false; - } - return true; - } - function copy($source, $dest) { - copy_and_remove_includes($source, $dest); - } -} -$FS = new MergeLibraryFSTools(); - -/** - * Replaces the includes inside PHP source code with the corresponding - * source. - * @param string $text PHP source code to replace includes from - */ -function replace_includes($text) { - // also remove vim modelines - return preg_replace_callback( - "/require(?:_once)? ['\"]([^'\"]+)['\"];/", - 'replace_includes_callback', - $text - ); -} - -/** - * Removes leading PHP tags from included files. Assumes that there is - * no trailing tag. Also removes vim modelines. - * @note This is safe for files that have internal <?php - * @param string $text Text to have leading PHP tag from - */ -function remove_php_tags($text) { - $text = preg_replace('#// vim:.+#', '', $text); - return substr($text, 5); -} - -/** - * Copies the contents of a directory to the standalone directory - * @param string $dir Directory to copy - */ -function make_dir_standalone($dir) { - global $FS; - return $FS->copyr($dir, 'standalone/' . $dir); -} - -/** - * Copies the contents of a file to the standalone directory - * @param string $file File to copy - */ -function make_file_standalone($file) { - global $FS; - $FS->mkdirr('standalone/' . dirname($file)); - copy_and_remove_includes($file, 'standalone/' . $file); - return true; -} - -/** - * Copies a file to another location recursively, if it is a PHP file - * remove includes - * @param string $file Original file - * @param string $sfile New location of file - */ -function copy_and_remove_includes($file, $sfile) { - $contents = file_get_contents($file); - if (strrchr($file, '.') === '.php') $contents = replace_includes($contents); - return file_put_contents($sfile, $contents); -} - -/** - * @param $matches preg_replace_callback matches array, where index 1 - * is the filename to include - */ -function replace_includes_callback($matches) { - $file = $matches[1]; - $preserve = array( - // PEAR (external) - 'XML/HTMLSax3.php' => 1 - ); - if (isset($preserve[$file])) { - return $matches[0]; - } - if (isset($GLOBALS['loaded'][$file])) return ''; - $GLOBALS['loaded'][$file] = true; - return replace_includes(remove_php_tags(file_get_contents($file))); -} - -echo 'Generating includes file... '; -shell_exec('php generate-includes.php'); -echo "done!\n"; - -chdir(dirname(__FILE__) . '/../library/'); - -echo 'Creating full file...'; -$contents = replace_includes(file_get_contents('HTMLPurifier.includes.php')); -$contents = str_replace( - // Note that bootstrap is now inside the standalone file - "define('HTMLPURIFIER_PREFIX', realpath(dirname(__FILE__) . '/..'));", - "define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone'); - set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path());", - $contents -); -file_put_contents('HTMLPurifier.standalone.php', $contents); -echo ' done!' . PHP_EOL; - -echo 'Creating standalone directory...'; -$FS->rmdirr('standalone'); // ensure a clean copy - -// data files -$FS->mkdirr('standalone/HTMLPurifier/DefinitionCache/Serializer'); -make_file_standalone('HTMLPurifier/EntityLookup/entities.ser'); -make_file_standalone('HTMLPurifier/ConfigSchema/schema.ser'); - -// non-standard inclusion setup -make_dir_standalone('HTMLPurifier/ConfigSchema'); -make_dir_standalone('HTMLPurifier/Language'); -make_dir_standalone('HTMLPurifier/Filter'); -make_dir_standalone('HTMLPurifier/Printer'); -make_file_standalone('HTMLPurifier/Printer.php'); -make_file_standalone('HTMLPurifier/Lexer/PH5P.php'); - -echo ' done!' . PHP_EOL; - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/merge-library.php b/lib/htmlpurifier/maintenance/merge-library.php deleted file mode 100755 index de2eecdc0..000000000 --- a/lib/htmlpurifier/maintenance/merge-library.php +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/php -<?php - -/** - * @file - * Deprecated in favor of generate-standalone.php. - */ - -require dirname(__FILE__) . '/generate-standalone.php'; - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/old-extract-schema.php b/lib/htmlpurifier/maintenance/old-extract-schema.php deleted file mode 100644 index bd55feaa6..000000000 --- a/lib/htmlpurifier/maintenance/old-extract-schema.php +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -assertCli(); - -echo "Please do not run this script. It is here for historical purposes only."; -exit; - -/** - * @file - * Extracts all definitions inside a configuration schema - * (HTMLPurifier_ConfigSchema) and exports them as plain text files. - * - * @todo Extract version numbers. - */ - -define('HTMLPURIFIER_SCHEMA_STRICT', true); // description data needs to be collected -require_once dirname(__FILE__) . '/../library/HTMLPurifier.auto.php'; - -// We need includes to ensure all HTMLPurifier_ConfigSchema calls are -// performed. -require_once 'HTMLPurifier.includes.php'; - -// Also, these extra files will be necessary. -require_once 'HTMLPurifier/Filter/ExtractStyleBlocks.php'; - -/** - * Takes a hash and saves its contents to library/HTMLPurifier/ConfigSchema/ - */ -function saveHash($hash) { - if ($hash === false) return; - $dir = realpath(dirname(__FILE__) . '/../library/HTMLPurifier/ConfigSchema'); - $name = $hash['ID'] . '.txt'; - $file = $dir . '/' . $name; - if (file_exists($file)) { - trigger_error("File already exists; skipped $name"); - return; - } - $file = new FSTools_File($file); - $file->open('w'); - $multiline = false; - foreach ($hash as $key => $value) { - $multiline = $multiline || (strpos($value, "\n") !== false); - if ($multiline) { - $file->put("--$key--" . PHP_EOL); - $file->put(str_replace("\n", PHP_EOL, $value) . PHP_EOL); - } else { - if ($key == 'ID') { - $file->put("$value" . PHP_EOL); - } else { - $file->put("$key: $value" . PHP_EOL); - } - } - } - $file->close(); -} - -$schema = HTMLPurifier_ConfigSchema::instance(); -$adapter = new HTMLPurifier_ConfigSchema_StringHashReverseAdapter($schema); - -foreach ($schema->info as $ns => $ns_array) { - saveHash($adapter->get($ns)); - foreach ($ns_array as $dir => $x) { - saveHash($adapter->get($ns, $dir)); - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/old-remove-require-once.php b/lib/htmlpurifier/maintenance/old-remove-require-once.php deleted file mode 100644 index f47c7d0f1..000000000 --- a/lib/htmlpurifier/maintenance/old-remove-require-once.php +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -assertCli(); - -echo "Please do not run this script. It is here for historical purposes only."; -exit; - -/** - * @file - * Removes leading includes from files. - * - * @note - * This does not remove inline includes; those must be handled manually. - */ - -chdir(dirname(__FILE__) . '/../tests/HTMLPurifier'); -$FS = new FSTools(); - -$files = $FS->globr('.', '*.php'); -foreach ($files as $file) { - if (substr_count(basename($file), '.') > 1) continue; - $old_code = file_get_contents($file); - $new_code = preg_replace("#^require_once .+[\n\r]*#m", '', $old_code); - if ($old_code !== $new_code) { - file_put_contents($file, $new_code); - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/old-remove-schema-def.php b/lib/htmlpurifier/maintenance/old-remove-schema-def.php deleted file mode 100644 index 5ae031973..000000000 --- a/lib/htmlpurifier/maintenance/old-remove-schema-def.php +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -assertCli(); - -echo "Please do not run this script. It is here for historical purposes only."; -exit; - -/** - * @file - * Removes ConfigSchema function calls from source files. - */ - -chdir(dirname(__FILE__) . '/../library/'); -$FS = new FSTools(); - -$files = $FS->globr('.', '*.php'); -foreach ($files as $file) { - if (substr_count(basename($file), '.') > 1) continue; - $old_code = file_get_contents($file); - $new_code = preg_replace("#^HTMLPurifier_ConfigSchema::.+?\);[\n\r]*#ms", '', $old_code); - if ($old_code !== $new_code) { - file_put_contents($file, $new_code); - } - if (preg_match('#^\s+HTMLPurifier_ConfigSchema::#m', $new_code)) { - echo "Indented ConfigSchema call in $file\n"; - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/regenerate-docs.sh b/lib/htmlpurifier/maintenance/regenerate-docs.sh deleted file mode 100755 index 6f4d720ff..000000000 --- a/lib/htmlpurifier/maintenance/regenerate-docs.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -e -./compile-doxygen.sh -cd ../docs -scp doxygen.tgz htmlpurifier.org:/home/ezyang/htmlpurifier.org -ssh htmlpurifier.org "cd /home/ezyang/htmlpurifier.org && ./reload-docs.sh" diff --git a/lib/htmlpurifier/maintenance/remove-trailing-whitespace.php b/lib/htmlpurifier/maintenance/remove-trailing-whitespace.php deleted file mode 100644 index 857870546..000000000 --- a/lib/htmlpurifier/maintenance/remove-trailing-whitespace.php +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -assertCli(); - -/** - * @file - * Removes trailing whitespace from files. - */ - -chdir(dirname(__FILE__) . '/..'); -$FS = new FSTools(); - -$files = $FS->globr('.', '{,.}*', GLOB_BRACE); -foreach ($files as $file) { - if ( - !is_file($file) || - prefix_is('./.git', $file) || - prefix_is('./docs/doxygen', $file) || - postfix_is('.ser', $file) || - postfix_is('.tgz', $file) || - postfix_is('.patch', $file) || - postfix_is('.dtd', $file) || - postfix_is('.ent', $file) || - $file == './library/HTMLPurifier/Lexer/PH5P.php' || - $file == './maintenance/PH5P.php' - ) continue; - $contents = file_get_contents($file); - $result = preg_replace('/^(.*?)[ \t]+(\r?)$/m', '\1\2', $contents, -1, $count); - if (!$count) continue; - echo "$file\n"; - file_put_contents($file, $result); -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/rename-config.php b/lib/htmlpurifier/maintenance/rename-config.php deleted file mode 100644 index 6e59e2a79..000000000 --- a/lib/htmlpurifier/maintenance/rename-config.php +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -require_once '../library/HTMLPurifier.auto.php'; -assertCli(); - -/** - * @file - * Renames a configuration directive. This involves renaming the file, - * adding an alias, and then regenerating the cache. You still have to - * manually go through and fix any calls to the directive. - * @warning This script doesn't handle multi-stringhash files. - */ - -$argv = $_SERVER['argv']; -if (count($argv) < 3) { - echo "Usage: {$argv[0]} OldName NewName\n"; - exit(1); -} - -chdir('../library/HTMLPurifier/ConfigSchema/schema'); - -$old = $argv[1]; -$new = $argv[2]; - -if (!file_exists("$old.txt")) { - echo "Cannot move undefined configuration directive $old\n"; - exit(1); -} - -if ($old === $new) { - echo "Attempting to move to self, aborting\n"; - exit(1); -} - -if (file_exists("$new.txt")) { - echo "Cannot move to already defined directive $new\n"; - exit(1); -} - -$file = "$old.txt"; -$builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder(); -$interchange = new HTMLPurifier_ConfigSchema_Interchange(); -$builder->buildFile($interchange, $file); -$contents = file_get_contents($file); - -if (strpos($contents, "\r\n") !== false) { - $nl = "\r\n"; -} elseif (strpos($contents, "\r") !== false) { - $nl = "\r"; -} else { - $nl = "\n"; -} - -// replace name with new name -$contents = str_replace($old, $new, $contents); - -if ($interchange->directives[$old]->aliases) { - $pos_alias = strpos($contents, 'ALIASES:'); - $pos_ins = strpos($contents, $nl, $pos_alias); - if ($pos_ins === false) $pos_ins = strlen($contents); - $contents = - substr($contents, 0, $pos_ins) . ", $old" . substr($contents, $pos_ins); - file_put_contents($file, $contents); -} else { - $lines = explode($nl, $contents); - $insert = false; - foreach ($lines as $n => $line) { - if (strncmp($line, '--', 2) === 0) { - $insert = $n; - break; - } - } - if (!$insert) { - $lines[] = "ALIASES: $old"; - } else { - array_splice($lines, $insert, 0, "ALIASES: $old"); - } - file_put_contents($file, implode($nl, $lines)); -} - -rename("$old.txt", "$new.txt") || exit(1); diff --git a/lib/htmlpurifier/maintenance/update-config.php b/lib/htmlpurifier/maintenance/update-config.php deleted file mode 100644 index 2d8a7a9c1..000000000 --- a/lib/htmlpurifier/maintenance/update-config.php +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -assertCli(); - -/** - * @file - * Converts all instances of $config->set and $config->get to the new - * format, as described by docs/dev-config-bcbreaks.txt - */ - -$FS = new FSTools(); -chdir(dirname(__FILE__) . '/..'); -$raw_files = $FS->globr('.', '*.php'); -foreach ($raw_files as $file) { - $file = substr($file, 2); // rm leading './' - if (strpos($file, 'library/standalone/') === 0) continue; - if (strpos($file, 'maintenance/update-config.php') === 0) continue; - if (strpos($file, 'test-settings.php') === 0) continue; - if (substr_count($file, '.') > 1) continue; // rm meta files - // process the file - $contents = file_get_contents($file); - $contents = preg_replace( - "#config->(set|get)\('(.+?)', '(.+?)'#", - "config->\\1('\\2.\\3'", - $contents - ); - if ($contents === '') continue; - file_put_contents($file, $contents); -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/maintenance/update-freshmeat.php b/lib/htmlpurifier/maintenance/update-freshmeat.php deleted file mode 100644 index 5295c0430..000000000 --- a/lib/htmlpurifier/maintenance/update-freshmeat.php +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/php -<?php - -chdir(dirname(__FILE__)); -require_once 'common.php'; -assertCli(); - -/** - * @file - * Updates Freshmeat's HTML Purifier with the latest information via XML RPC. - */ - -class XmlRpc_Freshmeat -{ - - const URL = 'http://freshmeat.net/xmlrpc/'; - - public $chatty = false; - - public $encodeOptions = array( - 'encoding' => 'utf-8', - ); - - /** - * This array defines shortcut method signatures for dealing with simple - * XML RPC methods. More complex ones (publish_release) should use the named parameter - * syntax. - */ - public $signatures = array( - 'login' => array('username', 'password'), - 'fetch_branch_list' => array('project_name'), - 'fetch_release' => array('project_name', 'branch_name', 'version'), - 'withdraw_release' => array('project_name', 'branch_name', 'version'), - ); - - protected $sid = null; - - /** - * @param $username Username to login with - * @param $password Password to login with - */ - public function __construct($username = null, $password = null) { - if ($username && $password) { - $this->login($username, $password); - } - } - - /** - * Performs a raw XML RPC call to self::URL - */ - protected function call($method, $params) { - $request = xmlrpc_encode_request($method, $params, $this->encodeOptions); - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, self::URL); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($ch, CURLOPT_TIMEOUT, 1); - curl_setopt($ch, CURLOPT_HTTPHEADER, array( - 'Content-type: text/xml', - 'Content-length: ' . strlen($request) - )); - curl_setopt($ch, CURLOPT_POSTFIELDS, $request); - $data = curl_exec($ch); - if ($errno = curl_errno($ch)) { - throw new Exception("Curl error [$errno]: " . curl_error($ch)); - } else { - curl_close($ch); - return xmlrpc_decode($data); - } - } - - /** - * Performs an XML RPC call to Freshmeat. - * @param $name Name of method to call, can be methodName or method_name - * @param $args Arguments of call, in form array('key1', 'val1', 'key2' ...) - */ - public function __call($name, $args) { - $method = $this->camelToUnderscore($name); - $params = array(); - if ($this->sid) $params['SID'] = $this->sid; - if (isset($this->signatures[$method])) { - for ($i = 0, $c = count($this->signatures[$method]); $i < $c; $i++) { - $params[$this->signatures[$method][$i]] = $args[$i]; - } - } else { - for ($i = 0, $c = count($args); $i + 1 < $c; $i += 2) { - $params[$args[$i]] = $args[$i + 1]; - } - } - $result = $this->call($method, $params); - switch ($method) { - case 'login': - $this->sid = $result['SID']; - break; - case 'logout': - $this->sid = null; - break; - } - if ($this->chatty) print_r($result); - return $result; - } - - /** - * Munge methodName to method_name - */ - private function camelToUnderscore($name) { - $method = ''; - for ($i = 0, $c = strlen($name); $i < $c; $i++) { - $v = $name[$i]; - if (ctype_lower($v)) $method .= $v; - else $method .= '_' . strtolower($v); - } - return $method; - } - - /** - * Automatically logout at end of scope - */ - public function __destruct() { - if ($this->sid) $this->logout(); - } - -} - -$rpc = new XmlRpc_Freshmeat($argv[1], $argv[2]); -$rpc->chatty = true; - -$project = 'htmlpurifier'; -$branch = 'Default'; -$version = file_get_contents('../VERSION'); - -$result = $rpc->fetchRelease($project, $branch, $version); -if (!isset($result['faultCode'])) { - echo "Freshmeat release already exists.\n"; - exit(0); -} - -$changes = strtr(file_get_contents('../WHATSNEW'), array("\r" => '', "\n" => ' ')); -$focus = (int) trim(file_get_contents('../FOCUS')); - -if (strlen($changes) > 600) { - echo "WHATSNEW entry is too long.\n"; - exit(1); -} - -$rpc->publishRelease( - 'project_name', $project, - 'branch_name', $branch, - 'version', $version, - 'changes', $changes, - 'release_focus', $focus, - 'url_tgz', "http://htmlpurifier.org/releases/htmlpurifier-$version.tar.gz", - 'url_zip', "http://htmlpurifier.org/releases/htmlpurifier-$version.zip", - 'url_changelog', "http://htmlpurifier.org/svnroot/htmlpurifier/tags/$version/NEWS" -); - -// vim: et sw=4 sts=4 |