diff options
Diffstat (limited to 'library/HTMLPurifier/AttrDef/HTML/Nmtokens.php')
-rw-r--r-- | library/HTMLPurifier/AttrDef/HTML/Nmtokens.php | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php new file mode 100644 index 000000000..aa34120bd --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php @@ -0,0 +1,52 @@ +<?php + +/** + * Validates contents based on NMTOKENS attribute type. + */ +class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef +{ + + public function validate($string, $config, $context) { + + $string = trim($string); + + // early abort: '' and '0' (strings that convert to false) are invalid + if (!$string) return false; + + $tokens = $this->split($string, $config, $context); + $tokens = $this->filter($tokens, $config, $context); + if (empty($tokens)) return false; + return implode(' ', $tokens); + + } + + /** + * Splits a space separated list of tokens into its constituent parts. + */ + protected function split($string, $config, $context) { + // OPTIMIZABLE! + // do the preg_match, capture all subpatterns for reformulation + + // we don't support U+00A1 and up codepoints or + // escaping because I don't know how to do that with regexps + // and plus it would complicate optimization efforts (you never + // see that anyway). + $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start + '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'. + '(?:(?=\s)|\z)/'; // look ahead for space or string end + preg_match_all($pattern, $string, $matches); + return $matches[1]; + } + + /** + * Template method for removing certain tokens based on arbitrary criteria. + * @note If we wanted to be really functional, we'd do an array_filter + * with a callback. But... we're not. + */ + protected function filter($tokens, $config, $context) { + return $tokens; + } + +} + +// vim: et sw=4 sts=4 |