aboutsummaryrefslogtreecommitdiffstats
path: root/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
diff options
context:
space:
mode:
Diffstat (limited to 'library/HTMLPurifier/AttrDef/HTML/Nmtokens.php')
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Nmtokens.php52
1 files changed, 52 insertions, 0 deletions
diff --git a/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
new file mode 100644
index 000000000..aa34120bd
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
@@ -0,0 +1,52 @@
+<?php
+
+/**
+ * Validates contents based on NMTOKENS attribute type.
+ */
+class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
+{
+
+ public function validate($string, $config, $context) {
+
+ $string = trim($string);
+
+ // early abort: '' and '0' (strings that convert to false) are invalid
+ if (!$string) return false;
+
+ $tokens = $this->split($string, $config, $context);
+ $tokens = $this->filter($tokens, $config, $context);
+ if (empty($tokens)) return false;
+ return implode(' ', $tokens);
+
+ }
+
+ /**
+ * Splits a space separated list of tokens into its constituent parts.
+ */
+ protected function split($string, $config, $context) {
+ // OPTIMIZABLE!
+ // do the preg_match, capture all subpatterns for reformulation
+
+ // we don't support U+00A1 and up codepoints or
+ // escaping because I don't know how to do that with regexps
+ // and plus it would complicate optimization efforts (you never
+ // see that anyway).
+ $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
+ '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
+ '(?:(?=\s)|\z)/'; // look ahead for space or string end
+ preg_match_all($pattern, $string, $matches);
+ return $matches[1];
+ }
+
+ /**
+ * Template method for removing certain tokens based on arbitrary criteria.
+ * @note If we wanted to be really functional, we'd do an array_filter
+ * with a callback. But... we're not.
+ */
+ protected function filter($tokens, $config, $context) {
+ return $tokens;
+ }
+
+}
+
+// vim: et sw=4 sts=4