diff options
Diffstat (limited to 'library/HTMLPurifier/AttrDef/HTML')
-rw-r--r-- | library/HTMLPurifier/AttrDef/HTML/Bool.php | 28 | ||||
-rw-r--r-- | library/HTMLPurifier/AttrDef/HTML/Class.php | 34 | ||||
-rw-r--r-- | library/HTMLPurifier/AttrDef/HTML/Color.php | 32 | ||||
-rw-r--r-- | library/HTMLPurifier/AttrDef/HTML/FrameTarget.php | 21 | ||||
-rw-r--r-- | library/HTMLPurifier/AttrDef/HTML/ID.php | 70 | ||||
-rw-r--r-- | library/HTMLPurifier/AttrDef/HTML/Length.php | 41 | ||||
-rw-r--r-- | library/HTMLPurifier/AttrDef/HTML/LinkTypes.php | 53 | ||||
-rw-r--r-- | library/HTMLPurifier/AttrDef/HTML/MultiLength.php | 41 | ||||
-rw-r--r-- | library/HTMLPurifier/AttrDef/HTML/Nmtokens.php | 52 | ||||
-rw-r--r-- | library/HTMLPurifier/AttrDef/HTML/Pixels.php | 48 |
10 files changed, 420 insertions, 0 deletions
diff --git a/library/HTMLPurifier/AttrDef/HTML/Bool.php b/library/HTMLPurifier/AttrDef/HTML/Bool.php new file mode 100644 index 000000000..e06987eb8 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Bool.php @@ -0,0 +1,28 @@ +<?php + +/** + * Validates a boolean attribute + */ +class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef +{ + + protected $name; + public $minimized = true; + + public function __construct($name = false) {$this->name = $name;} + + public function validate($string, $config, $context) { + if (empty($string)) return false; + return $this->name; + } + + /** + * @param $string Name of attribute + */ + public function make($string) { + return new HTMLPurifier_AttrDef_HTML_Bool($string); + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Class.php b/library/HTMLPurifier/AttrDef/HTML/Class.php new file mode 100644 index 000000000..370068d97 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Class.php @@ -0,0 +1,34 @@ +<?php + +/** + * Implements special behavior for class attribute (normally NMTOKENS) + */ +class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens +{ + protected function split($string, $config, $context) { + // really, this twiddle should be lazy loaded + $name = $config->getDefinition('HTML')->doctype->name; + if ($name == "XHTML 1.1" || $name == "XHTML 2.0") { + return parent::split($string, $config, $context); + } else { + return preg_split('/\s+/', $string); + } + } + protected function filter($tokens, $config, $context) { + $allowed = $config->get('Attr.AllowedClasses'); + $forbidden = $config->get('Attr.ForbiddenClasses'); + $ret = array(); + foreach ($tokens as $token) { + if ( + ($allowed === null || isset($allowed[$token])) && + !isset($forbidden[$token]) && + // We need this O(n) check because of PHP's array + // implementation that casts -0 to 0. + !in_array($token, $ret, true) + ) { + $ret[] = $token; + } + } + return $ret; + } +} diff --git a/library/HTMLPurifier/AttrDef/HTML/Color.php b/library/HTMLPurifier/AttrDef/HTML/Color.php new file mode 100644 index 000000000..d01e20454 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Color.php @@ -0,0 +1,32 @@ +<?php + +/** + * Validates a color according to the HTML spec. + */ +class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef +{ + + public function validate($string, $config, $context) { + + static $colors = null; + if ($colors === null) $colors = $config->get('Core.ColorKeywords'); + + $string = trim($string); + + if (empty($string)) return false; + if (isset($colors[$string])) return $colors[$string]; + if ($string[0] === '#') $hex = substr($string, 1); + else $hex = $string; + + $length = strlen($hex); + if ($length !== 3 && $length !== 6) return false; + if (!ctype_xdigit($hex)) return false; + if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2]; + + return "#$hex"; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php b/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php new file mode 100644 index 000000000..ae6ea7c01 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php @@ -0,0 +1,21 @@ +<?php + +/** + * Special-case enum attribute definition that lazy loads allowed frame targets + */ +class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum +{ + + public $valid_values = false; // uninitialized value + protected $case_sensitive = false; + + public function __construct() {} + + public function validate($string, $config, $context) { + if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets'); + return parent::validate($string, $config, $context); + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/ID.php b/library/HTMLPurifier/AttrDef/HTML/ID.php new file mode 100644 index 000000000..81d03762d --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/ID.php @@ -0,0 +1,70 @@ +<?php + +/** + * Validates the HTML attribute ID. + * @warning Even though this is the id processor, it + * will ignore the directive Attr:IDBlacklist, since it will only + * go according to the ID accumulator. Since the accumulator is + * automatically generated, it will have already absorbed the + * blacklist. If you're hacking around, make sure you use load()! + */ + +class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef +{ + + // ref functionality disabled, since we also have to verify + // whether or not the ID it refers to exists + + public function validate($id, $config, $context) { + + if (!$config->get('Attr.EnableID')) return false; + + $id = trim($id); // trim it first + + if ($id === '') return false; + + $prefix = $config->get('Attr.IDPrefix'); + if ($prefix !== '') { + $prefix .= $config->get('Attr.IDPrefixLocal'); + // prevent re-appending the prefix + if (strpos($id, $prefix) !== 0) $id = $prefix . $id; + } elseif ($config->get('Attr.IDPrefixLocal') !== '') { + trigger_error('%Attr.IDPrefixLocal cannot be used unless '. + '%Attr.IDPrefix is set', E_USER_WARNING); + } + + //if (!$this->ref) { + $id_accumulator =& $context->get('IDAccumulator'); + if (isset($id_accumulator->ids[$id])) return false; + //} + + // we purposely avoid using regex, hopefully this is faster + + if (ctype_alpha($id)) { + $result = true; + } else { + if (!ctype_alpha(@$id[0])) return false; + $trim = trim( // primitive style of regexps, I suppose + $id, + 'A..Za..z0..9:-._' + ); + $result = ($trim === ''); + } + + $regexp = $config->get('Attr.IDBlacklistRegexp'); + if ($regexp && preg_match($regexp, $id)) { + return false; + } + + if (/*!$this->ref && */$result) $id_accumulator->add($id); + + // if no change was made to the ID, return the result + // else, return the new id if stripping whitespace made it + // valid, or return false. + return $result ? $id : false; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Length.php b/library/HTMLPurifier/AttrDef/HTML/Length.php new file mode 100644 index 000000000..a242f9c23 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Length.php @@ -0,0 +1,41 @@ +<?php + +/** + * Validates the HTML type length (not to be confused with CSS's length). + * + * This accepts integer pixels or percentages as lengths for certain + * HTML attributes. + */ + +class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels +{ + + public function validate($string, $config, $context) { + + $string = trim($string); + if ($string === '') return false; + + $parent_result = parent::validate($string, $config, $context); + if ($parent_result !== false) return $parent_result; + + $length = strlen($string); + $last_char = $string[$length - 1]; + + if ($last_char !== '%') return false; + + $points = substr($string, 0, $length - 1); + + if (!is_numeric($points)) return false; + + $points = (int) $points; + + if ($points < 0) return '0%'; + if ($points > 100) return '100%'; + + return ((string) $points) . '%'; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php b/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php new file mode 100644 index 000000000..76d25ed08 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php @@ -0,0 +1,53 @@ +<?php + +/** + * Validates a rel/rev link attribute against a directive of allowed values + * @note We cannot use Enum because link types allow multiple + * values. + * @note Assumes link types are ASCII text + */ +class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef +{ + + /** Name config attribute to pull. */ + protected $name; + + public function __construct($name) { + $configLookup = array( + 'rel' => 'AllowedRel', + 'rev' => 'AllowedRev' + ); + if (!isset($configLookup[$name])) { + trigger_error('Unrecognized attribute name for link '. + 'relationship.', E_USER_ERROR); + return; + } + $this->name = $configLookup[$name]; + } + + public function validate($string, $config, $context) { + + $allowed = $config->get('Attr.' . $this->name); + if (empty($allowed)) return false; + + $string = $this->parseCDATA($string); + $parts = explode(' ', $string); + + // lookup to prevent duplicates + $ret_lookup = array(); + foreach ($parts as $part) { + $part = strtolower(trim($part)); + if (!isset($allowed[$part])) continue; + $ret_lookup[$part] = true; + } + + if (empty($ret_lookup)) return false; + $string = implode(' ', array_keys($ret_lookup)); + + return $string; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/MultiLength.php b/library/HTMLPurifier/AttrDef/HTML/MultiLength.php new file mode 100644 index 000000000..c72fc76e4 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/MultiLength.php @@ -0,0 +1,41 @@ +<?php + +/** + * Validates a MultiLength as defined by the HTML spec. + * + * A multilength is either a integer (pixel count), a percentage, or + * a relative number. + */ +class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length +{ + + public function validate($string, $config, $context) { + + $string = trim($string); + if ($string === '') return false; + + $parent_result = parent::validate($string, $config, $context); + if ($parent_result !== false) return $parent_result; + + $length = strlen($string); + $last_char = $string[$length - 1]; + + if ($last_char !== '*') return false; + + $int = substr($string, 0, $length - 1); + + if ($int == '') return '*'; + if (!is_numeric($int)) return false; + + $int = (int) $int; + + if ($int < 0) return false; + if ($int == 0) return '0'; + if ($int == 1) return '*'; + return ((string) $int) . '*'; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php new file mode 100644 index 000000000..aa34120bd --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php @@ -0,0 +1,52 @@ +<?php + +/** + * Validates contents based on NMTOKENS attribute type. + */ +class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef +{ + + public function validate($string, $config, $context) { + + $string = trim($string); + + // early abort: '' and '0' (strings that convert to false) are invalid + if (!$string) return false; + + $tokens = $this->split($string, $config, $context); + $tokens = $this->filter($tokens, $config, $context); + if (empty($tokens)) return false; + return implode(' ', $tokens); + + } + + /** + * Splits a space separated list of tokens into its constituent parts. + */ + protected function split($string, $config, $context) { + // OPTIMIZABLE! + // do the preg_match, capture all subpatterns for reformulation + + // we don't support U+00A1 and up codepoints or + // escaping because I don't know how to do that with regexps + // and plus it would complicate optimization efforts (you never + // see that anyway). + $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start + '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'. + '(?:(?=\s)|\z)/'; // look ahead for space or string end + preg_match_all($pattern, $string, $matches); + return $matches[1]; + } + + /** + * Template method for removing certain tokens based on arbitrary criteria. + * @note If we wanted to be really functional, we'd do an array_filter + * with a callback. But... we're not. + */ + protected function filter($tokens, $config, $context) { + return $tokens; + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Pixels.php b/library/HTMLPurifier/AttrDef/HTML/Pixels.php new file mode 100644 index 000000000..4cb2c1b85 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Pixels.php @@ -0,0 +1,48 @@ +<?php + +/** + * Validates an integer representation of pixels according to the HTML spec. + */ +class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef +{ + + protected $max; + + public function __construct($max = null) { + $this->max = $max; + } + + public function validate($string, $config, $context) { + + $string = trim($string); + if ($string === '0') return $string; + if ($string === '') return false; + $length = strlen($string); + if (substr($string, $length - 2) == 'px') { + $string = substr($string, 0, $length - 2); + } + if (!is_numeric($string)) return false; + $int = (int) $string; + + if ($int < 0) return '0'; + + // upper-bound value, extremely high values can + // crash operating systems, see <http://ha.ckers.org/imagecrash.html> + // WARNING, above link WILL crash you if you're using Windows + + if ($this->max !== null && $int > $this->max) return (string) $this->max; + + return (string) $int; + + } + + public function make($string) { + if ($string === '') $max = null; + else $max = (int) $string; + $class = get_class($this); + return new $class($max); + } + +} + +// vim: et sw=4 sts=4 |