From ffb1997902facb36b78a7cfa522f41f2b3d71cda Mon Sep 17 00:00:00 2001 From: Mike Macgirvin Date: Wed, 8 Sep 2010 20:14:17 -0700 Subject: mistpark 2.0 infrasturcture lands --- library/HTMLPurifier/AttrDef/HTML/Bool.php | 28 +++++++++ library/HTMLPurifier/AttrDef/HTML/Class.php | 34 +++++++++++ library/HTMLPurifier/AttrDef/HTML/Color.php | 32 +++++++++++ library/HTMLPurifier/AttrDef/HTML/FrameTarget.php | 21 +++++++ library/HTMLPurifier/AttrDef/HTML/ID.php | 70 +++++++++++++++++++++++ library/HTMLPurifier/AttrDef/HTML/Length.php | 41 +++++++++++++ library/HTMLPurifier/AttrDef/HTML/LinkTypes.php | 53 +++++++++++++++++ library/HTMLPurifier/AttrDef/HTML/MultiLength.php | 41 +++++++++++++ library/HTMLPurifier/AttrDef/HTML/Nmtokens.php | 52 +++++++++++++++++ library/HTMLPurifier/AttrDef/HTML/Pixels.php | 48 ++++++++++++++++ 10 files changed, 420 insertions(+) create mode 100644 library/HTMLPurifier/AttrDef/HTML/Bool.php create mode 100644 library/HTMLPurifier/AttrDef/HTML/Class.php create mode 100644 library/HTMLPurifier/AttrDef/HTML/Color.php create mode 100644 library/HTMLPurifier/AttrDef/HTML/FrameTarget.php create mode 100644 library/HTMLPurifier/AttrDef/HTML/ID.php create mode 100644 library/HTMLPurifier/AttrDef/HTML/Length.php create mode 100644 library/HTMLPurifier/AttrDef/HTML/LinkTypes.php create mode 100644 library/HTMLPurifier/AttrDef/HTML/MultiLength.php create mode 100644 library/HTMLPurifier/AttrDef/HTML/Nmtokens.php create mode 100644 library/HTMLPurifier/AttrDef/HTML/Pixels.php (limited to 'library/HTMLPurifier/AttrDef/HTML') diff --git a/library/HTMLPurifier/AttrDef/HTML/Bool.php b/library/HTMLPurifier/AttrDef/HTML/Bool.php new file mode 100644 index 000000000..e06987eb8 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Bool.php @@ -0,0 +1,28 @@ +name = $name;} + + public function validate($string, $config, $context) { + if (empty($string)) return false; + return $this->name; + } + + /** + * @param $string Name of attribute + */ + public function make($string) { + return new HTMLPurifier_AttrDef_HTML_Bool($string); + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Class.php b/library/HTMLPurifier/AttrDef/HTML/Class.php new file mode 100644 index 000000000..370068d97 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Class.php @@ -0,0 +1,34 @@ +getDefinition('HTML')->doctype->name; + if ($name == "XHTML 1.1" || $name == "XHTML 2.0") { + return parent::split($string, $config, $context); + } else { + return preg_split('/\s+/', $string); + } + } + protected function filter($tokens, $config, $context) { + $allowed = $config->get('Attr.AllowedClasses'); + $forbidden = $config->get('Attr.ForbiddenClasses'); + $ret = array(); + foreach ($tokens as $token) { + if ( + ($allowed === null || isset($allowed[$token])) && + !isset($forbidden[$token]) && + // We need this O(n) check because of PHP's array + // implementation that casts -0 to 0. + !in_array($token, $ret, true) + ) { + $ret[] = $token; + } + } + return $ret; + } +} diff --git a/library/HTMLPurifier/AttrDef/HTML/Color.php b/library/HTMLPurifier/AttrDef/HTML/Color.php new file mode 100644 index 000000000..d01e20454 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Color.php @@ -0,0 +1,32 @@ +get('Core.ColorKeywords'); + + $string = trim($string); + + if (empty($string)) return false; + if (isset($colors[$string])) return $colors[$string]; + if ($string[0] === '#') $hex = substr($string, 1); + else $hex = $string; + + $length = strlen($hex); + if ($length !== 3 && $length !== 6) return false; + if (!ctype_xdigit($hex)) return false; + if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2]; + + return "#$hex"; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php b/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php new file mode 100644 index 000000000..ae6ea7c01 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php @@ -0,0 +1,21 @@ +valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets'); + return parent::validate($string, $config, $context); + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/ID.php b/library/HTMLPurifier/AttrDef/HTML/ID.php new file mode 100644 index 000000000..81d03762d --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/ID.php @@ -0,0 +1,70 @@ +get('Attr.EnableID')) return false; + + $id = trim($id); // trim it first + + if ($id === '') return false; + + $prefix = $config->get('Attr.IDPrefix'); + if ($prefix !== '') { + $prefix .= $config->get('Attr.IDPrefixLocal'); + // prevent re-appending the prefix + if (strpos($id, $prefix) !== 0) $id = $prefix . $id; + } elseif ($config->get('Attr.IDPrefixLocal') !== '') { + trigger_error('%Attr.IDPrefixLocal cannot be used unless '. + '%Attr.IDPrefix is set', E_USER_WARNING); + } + + //if (!$this->ref) { + $id_accumulator =& $context->get('IDAccumulator'); + if (isset($id_accumulator->ids[$id])) return false; + //} + + // we purposely avoid using regex, hopefully this is faster + + if (ctype_alpha($id)) { + $result = true; + } else { + if (!ctype_alpha(@$id[0])) return false; + $trim = trim( // primitive style of regexps, I suppose + $id, + 'A..Za..z0..9:-._' + ); + $result = ($trim === ''); + } + + $regexp = $config->get('Attr.IDBlacklistRegexp'); + if ($regexp && preg_match($regexp, $id)) { + return false; + } + + if (/*!$this->ref && */$result) $id_accumulator->add($id); + + // if no change was made to the ID, return the result + // else, return the new id if stripping whitespace made it + // valid, or return false. + return $result ? $id : false; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Length.php b/library/HTMLPurifier/AttrDef/HTML/Length.php new file mode 100644 index 000000000..a242f9c23 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Length.php @@ -0,0 +1,41 @@ + 100) return '100%'; + + return ((string) $points) . '%'; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php b/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php new file mode 100644 index 000000000..76d25ed08 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php @@ -0,0 +1,53 @@ + 'AllowedRel', + 'rev' => 'AllowedRev' + ); + if (!isset($configLookup[$name])) { + trigger_error('Unrecognized attribute name for link '. + 'relationship.', E_USER_ERROR); + return; + } + $this->name = $configLookup[$name]; + } + + public function validate($string, $config, $context) { + + $allowed = $config->get('Attr.' . $this->name); + if (empty($allowed)) return false; + + $string = $this->parseCDATA($string); + $parts = explode(' ', $string); + + // lookup to prevent duplicates + $ret_lookup = array(); + foreach ($parts as $part) { + $part = strtolower(trim($part)); + if (!isset($allowed[$part])) continue; + $ret_lookup[$part] = true; + } + + if (empty($ret_lookup)) return false; + $string = implode(' ', array_keys($ret_lookup)); + + return $string; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/MultiLength.php b/library/HTMLPurifier/AttrDef/HTML/MultiLength.php new file mode 100644 index 000000000..c72fc76e4 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/MultiLength.php @@ -0,0 +1,41 @@ +split($string, $config, $context); + $tokens = $this->filter($tokens, $config, $context); + if (empty($tokens)) return false; + return implode(' ', $tokens); + + } + + /** + * Splits a space separated list of tokens into its constituent parts. + */ + protected function split($string, $config, $context) { + // OPTIMIZABLE! + // do the preg_match, capture all subpatterns for reformulation + + // we don't support U+00A1 and up codepoints or + // escaping because I don't know how to do that with regexps + // and plus it would complicate optimization efforts (you never + // see that anyway). + $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start + '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'. + '(?:(?=\s)|\z)/'; // look ahead for space or string end + preg_match_all($pattern, $string, $matches); + return $matches[1]; + } + + /** + * Template method for removing certain tokens based on arbitrary criteria. + * @note If we wanted to be really functional, we'd do an array_filter + * with a callback. But... we're not. + */ + protected function filter($tokens, $config, $context) { + return $tokens; + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Pixels.php b/library/HTMLPurifier/AttrDef/HTML/Pixels.php new file mode 100644 index 000000000..4cb2c1b85 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Pixels.php @@ -0,0 +1,48 @@ +max = $max; + } + + public function validate($string, $config, $context) { + + $string = trim($string); + if ($string === '0') return $string; + if ($string === '') return false; + $length = strlen($string); + if (substr($string, $length - 2) == 'px') { + $string = substr($string, 0, $length - 2); + } + if (!is_numeric($string)) return false; + $int = (int) $string; + + if ($int < 0) return '0'; + + // upper-bound value, extremely high values can + // crash operating systems, see + // WARNING, above link WILL crash you if you're using Windows + + if ($this->max !== null && $int > $this->max) return (string) $this->max; + + return (string) $int; + + } + + public function make($string) { + if ($string === '') $max = null; + else $max = (int) $string; + $class = get_class($this); + return new $class($max); + } + +} + +// vim: et sw=4 sts=4 -- cgit v1.2.3