aboutsummaryrefslogtreecommitdiffstats
path: root/library/HTMLPurifier/Token
diff options
context:
space:
mode:
Diffstat (limited to 'library/HTMLPurifier/Token')
-rw-r--r--library/HTMLPurifier/Token/Comment.php22
-rw-r--r--library/HTMLPurifier/Token/Empty.php11
-rw-r--r--library/HTMLPurifier/Token/End.php19
-rw-r--r--library/HTMLPurifier/Token/Start.php11
-rw-r--r--library/HTMLPurifier/Token/Tag.php56
-rw-r--r--library/HTMLPurifier/Token/Text.php33
6 files changed, 152 insertions, 0 deletions
diff --git a/library/HTMLPurifier/Token/Comment.php b/library/HTMLPurifier/Token/Comment.php
new file mode 100644
index 000000000..dc6bdcabb
--- /dev/null
+++ b/library/HTMLPurifier/Token/Comment.php
@@ -0,0 +1,22 @@
+<?php
+
+/**
+ * Concrete comment token class. Generally will be ignored.
+ */
+class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
+{
+ public $data; /**< Character data within comment. */
+ public $is_whitespace = true;
+ /**
+ * Transparent constructor.
+ *
+ * @param $data String comment data.
+ */
+ public function __construct($data, $line = null, $col = null) {
+ $this->data = $data;
+ $this->line = $line;
+ $this->col = $col;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/Empty.php b/library/HTMLPurifier/Token/Empty.php
new file mode 100644
index 000000000..2a82b47ad
--- /dev/null
+++ b/library/HTMLPurifier/Token/Empty.php
@@ -0,0 +1,11 @@
+<?php
+
+/**
+ * Concrete empty token class.
+ */
+class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
+{
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/End.php b/library/HTMLPurifier/Token/End.php
new file mode 100644
index 000000000..353e79daf
--- /dev/null
+++ b/library/HTMLPurifier/Token/End.php
@@ -0,0 +1,19 @@
+<?php
+
+/**
+ * Concrete end token class.
+ *
+ * @warning This class accepts attributes even though end tags cannot. This
+ * is for optimization reasons, as under normal circumstances, the Lexers
+ * do not pass attributes.
+ */
+class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
+{
+ /**
+ * Token that started this node. Added by MakeWellFormed. Please
+ * do not edit this!
+ */
+ public $start;
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/Start.php b/library/HTMLPurifier/Token/Start.php
new file mode 100644
index 000000000..e0e14fc62
--- /dev/null
+++ b/library/HTMLPurifier/Token/Start.php
@@ -0,0 +1,11 @@
+<?php
+
+/**
+ * Concrete start token class.
+ */
+class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
+{
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/Tag.php b/library/HTMLPurifier/Token/Tag.php
new file mode 100644
index 000000000..798be028e
--- /dev/null
+++ b/library/HTMLPurifier/Token/Tag.php
@@ -0,0 +1,56 @@
+<?php
+
+/**
+ * Abstract class of a tag token (start, end or empty), and its behavior.
+ */
+class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
+{
+ /**
+ * Static bool marker that indicates the class is a tag.
+ *
+ * This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
+ * without having to use a function call <tt>is_a()</tt>.
+ */
+ public $is_tag = true;
+
+ /**
+ * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
+ *
+ * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
+ * be lower-casing them, but these tokens cater to HTML tags, which are
+ * insensitive.
+ */
+ public $name;
+
+ /**
+ * Associative array of the tag's attributes.
+ */
+ public $attr = array();
+
+ /**
+ * Non-overloaded constructor, which lower-cases passed tag name.
+ *
+ * @param $name String name.
+ * @param $attr Associative array of attributes.
+ */
+ public function __construct($name, $attr = array(), $line = null, $col = null) {
+ $this->name = ctype_lower($name) ? $name : strtolower($name);
+ foreach ($attr as $key => $value) {
+ // normalization only necessary when key is not lowercase
+ if (!ctype_lower($key)) {
+ $new_key = strtolower($key);
+ if (!isset($attr[$new_key])) {
+ $attr[$new_key] = $attr[$key];
+ }
+ if ($new_key !== $key) {
+ unset($attr[$key]);
+ }
+ }
+ }
+ $this->attr = $attr;
+ $this->line = $line;
+ $this->col = $col;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/Text.php b/library/HTMLPurifier/Token/Text.php
new file mode 100644
index 000000000..82efd823d
--- /dev/null
+++ b/library/HTMLPurifier/Token/Text.php
@@ -0,0 +1,33 @@
+<?php
+
+/**
+ * Concrete text token class.
+ *
+ * Text tokens comprise of regular parsed character data (PCDATA) and raw
+ * character data (from the CDATA sections). Internally, their
+ * data is parsed with all entities expanded. Surprisingly, the text token
+ * does have a "tag name" called #PCDATA, which is how the DTD represents it
+ * in permissible child nodes.
+ */
+class HTMLPurifier_Token_Text extends HTMLPurifier_Token
+{
+
+ public $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. */
+ public $data; /**< Parsed character data of text. */
+ public $is_whitespace; /**< Bool indicating if node is whitespace. */
+
+ /**
+ * Constructor, accepts data and determines if it is whitespace.
+ *
+ * @param $data String parsed character data.
+ */
+ public function __construct($data, $line = null, $col = null) {
+ $this->data = $data;
+ $this->is_whitespace = ctype_space($data);
+ $this->line = $line;
+ $this->col = $col;
+ }
+
+}
+
+// vim: et sw=4 sts=4