diff options
Diffstat (limited to 'lib/htmlpurifier/library/HTMLPurifier/Token')
6 files changed, 153 insertions, 0 deletions
diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Comment.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Comment.php new file mode 100644 index 000000000..dc6bdcabb --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/Token/Comment.php @@ -0,0 +1,22 @@ +<?php + +/** + * Concrete comment token class. Generally will be ignored. + */ +class HTMLPurifier_Token_Comment extends HTMLPurifier_Token +{ + public $data; /**< Character data within comment. */ + public $is_whitespace = true; + /** + * Transparent constructor. + * + * @param $data String comment data. + */ + public function __construct($data, $line = null, $col = null) { + $this->data = $data; + $this->line = $line; + $this->col = $col; + } +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Empty.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Empty.php new file mode 100644 index 000000000..2a82b47ad --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/Token/Empty.php @@ -0,0 +1,11 @@ +<?php + +/** + * Concrete empty token class. + */ +class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag +{ + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/End.php b/lib/htmlpurifier/library/HTMLPurifier/Token/End.php new file mode 100644 index 000000000..353e79daf --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/Token/End.php @@ -0,0 +1,19 @@ +<?php + +/** + * Concrete end token class. + * + * @warning This class accepts attributes even though end tags cannot. This + * is for optimization reasons, as under normal circumstances, the Lexers + * do not pass attributes. + */ +class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag +{ + /** + * Token that started this node. Added by MakeWellFormed. Please + * do not edit this! + */ + public $start; +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Start.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Start.php new file mode 100644 index 000000000..e0e14fc62 --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/Token/Start.php @@ -0,0 +1,11 @@ +<?php + +/** + * Concrete start token class. + */ +class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag +{ + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Tag.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Tag.php new file mode 100644 index 000000000..f4d8f640e --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/Token/Tag.php @@ -0,0 +1,57 @@ +<?php + +/** + * Abstract class of a tag token (start, end or empty), and its behavior. + */ +class HTMLPurifier_Token_Tag extends HTMLPurifier_Token +{ + /** + * Static bool marker that indicates the class is a tag. + * + * This allows us to check objects with <tt>!empty($obj->is_tag)</tt> + * without having to use a function call <tt>is_a()</tt>. + */ + public $is_tag = true; + + /** + * The lower-case name of the tag, like 'a', 'b' or 'blockquote'. + * + * @note Strictly speaking, XML tags are case sensitive, so we shouldn't + * be lower-casing them, but these tokens cater to HTML tags, which are + * insensitive. + */ + public $name; + + /** + * Associative array of the tag's attributes. + */ + public $attr = array(); + + /** + * Non-overloaded constructor, which lower-cases passed tag name. + * + * @param $name String name. + * @param $attr Associative array of attributes. + */ + public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) { + $this->name = ctype_lower($name) ? $name : strtolower($name); + foreach ($attr as $key => $value) { + // normalization only necessary when key is not lowercase + if (!ctype_lower($key)) { + $new_key = strtolower($key); + if (!isset($attr[$new_key])) { + $attr[$new_key] = $attr[$key]; + } + if ($new_key !== $key) { + unset($attr[$key]); + } + } + } + $this->attr = $attr; + $this->line = $line; + $this->col = $col; + $this->armor = $armor; + } +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/Token/Text.php b/lib/htmlpurifier/library/HTMLPurifier/Token/Text.php new file mode 100644 index 000000000..82efd823d --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/Token/Text.php @@ -0,0 +1,33 @@ +<?php + +/** + * Concrete text token class. + * + * Text tokens comprise of regular parsed character data (PCDATA) and raw + * character data (from the CDATA sections). Internally, their + * data is parsed with all entities expanded. Surprisingly, the text token + * does have a "tag name" called #PCDATA, which is how the DTD represents it + * in permissible child nodes. + */ +class HTMLPurifier_Token_Text extends HTMLPurifier_Token +{ + + public $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. */ + public $data; /**< Parsed character data of text. */ + public $is_whitespace; /**< Bool indicating if node is whitespace. */ + + /** + * Constructor, accepts data and determines if it is whitespace. + * + * @param $data String parsed character data. + */ + public function __construct($data, $line = null, $col = null) { + $this->data = $data; + $this->is_whitespace = ctype_space($data); + $this->line = $line; + $this->col = $col; + } + +} + +// vim: et sw=4 sts=4 |