aboutsummaryrefslogtreecommitdiffstats
path: root/library/HTMLPurifier/AttrDef
diff options
context:
space:
mode:
authorMike Macgirvin <mike@macgirvin.com>2010-09-08 20:14:17 -0700
committerMike Macgirvin <mike@macgirvin.com>2010-09-08 20:14:17 -0700
commitffb1997902facb36b78a7cfa522f41f2b3d71cda (patch)
treee9fe47acf26c5fd2c742677f2610b60d3008eb26 /library/HTMLPurifier/AttrDef
parentb49858b038a0a05bbe7685929e88071d0e125538 (diff)
downloadvolse-hubzilla-ffb1997902facb36b78a7cfa522f41f2b3d71cda.tar.gz
volse-hubzilla-ffb1997902facb36b78a7cfa522f41f2b3d71cda.tar.bz2
volse-hubzilla-ffb1997902facb36b78a7cfa522f41f2b3d71cda.zip
mistpark 2.0 infrasturcture lands
Diffstat (limited to 'library/HTMLPurifier/AttrDef')
-rw-r--r--library/HTMLPurifier/AttrDef/CSS.php87
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/AlphaValue.php21
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/Background.php87
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php133
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/Border.php43
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/Color.php78
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/Composite.php38
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php28
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/Filter.php54
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/Font.php149
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/FontFamily.php72
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php40
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/Length.php47
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/ListStyle.php78
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/Multiple.php58
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/Number.php69
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/Percentage.php40
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/TextDecoration.php38
-rw-r--r--library/HTMLPurifier/AttrDef/CSS/URI.php52
-rw-r--r--library/HTMLPurifier/AttrDef/Enum.php65
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Bool.php28
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Class.php34
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Color.php32
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/FrameTarget.php21
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/ID.php70
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Length.php41
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/LinkTypes.php53
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/MultiLength.php41
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Nmtokens.php52
-rw-r--r--library/HTMLPurifier/AttrDef/HTML/Pixels.php48
-rw-r--r--library/HTMLPurifier/AttrDef/Integer.php73
-rw-r--r--library/HTMLPurifier/AttrDef/Lang.php73
-rw-r--r--library/HTMLPurifier/AttrDef/Switch.php34
-rw-r--r--library/HTMLPurifier/AttrDef/Text.php15
-rw-r--r--library/HTMLPurifier/AttrDef/URI.php77
-rw-r--r--library/HTMLPurifier/AttrDef/URI/Email.php17
-rw-r--r--library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php21
-rw-r--r--library/HTMLPurifier/AttrDef/URI/Host.php62
-rw-r--r--library/HTMLPurifier/AttrDef/URI/IPv4.php39
-rw-r--r--library/HTMLPurifier/AttrDef/URI/IPv6.php99
40 files changed, 2207 insertions, 0 deletions
diff --git a/library/HTMLPurifier/AttrDef/CSS.php b/library/HTMLPurifier/AttrDef/CSS.php
new file mode 100644
index 000000000..953e70675
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS.php
@@ -0,0 +1,87 @@
+<?php
+
+/**
+ * Validates the HTML attribute style, otherwise known as CSS.
+ * @note We don't implement the whole CSS specification, so it might be
+ * difficult to reuse this component in the context of validating
+ * actual stylesheet declarations.
+ * @note If we were really serious about validating the CSS, we would
+ * tokenize the styles and then parse the tokens. Obviously, we
+ * are not doing that. Doing that could seriously harm performance,
+ * but would make these components a lot more viable for a CSS
+ * filtering solution.
+ */
+class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
+{
+
+ public function validate($css, $config, $context) {
+
+ $css = $this->parseCDATA($css);
+
+ $definition = $config->getCSSDefinition();
+
+ // we're going to break the spec and explode by semicolons.
+ // This is because semicolon rarely appears in escaped form
+ // Doing this is generally flaky but fast
+ // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
+ // for details
+
+ $declarations = explode(';', $css);
+ $propvalues = array();
+
+ /**
+ * Name of the current CSS property being validated.
+ */
+ $property = false;
+ $context->register('CurrentCSSProperty', $property);
+
+ foreach ($declarations as $declaration) {
+ if (!$declaration) continue;
+ if (!strpos($declaration, ':')) continue;
+ list($property, $value) = explode(':', $declaration, 2);
+ $property = trim($property);
+ $value = trim($value);
+ $ok = false;
+ do {
+ if (isset($definition->info[$property])) {
+ $ok = true;
+ break;
+ }
+ if (ctype_lower($property)) break;
+ $property = strtolower($property);
+ if (isset($definition->info[$property])) {
+ $ok = true;
+ break;
+ }
+ } while(0);
+ if (!$ok) continue;
+ // inefficient call, since the validator will do this again
+ if (strtolower(trim($value)) !== 'inherit') {
+ // inherit works for everything (but only on the base property)
+ $result = $definition->info[$property]->validate(
+ $value, $config, $context );
+ } else {
+ $result = 'inherit';
+ }
+ if ($result === false) continue;
+ $propvalues[$property] = $result;
+ }
+
+ $context->destroy('CurrentCSSProperty');
+
+ // procedure does not write the new CSS simultaneously, so it's
+ // slightly inefficient, but it's the only way of getting rid of
+ // duplicates. Perhaps config to optimize it, but not now.
+
+ $new_declarations = '';
+ foreach ($propvalues as $prop => $value) {
+ $new_declarations .= "$prop:$value;";
+ }
+
+ return $new_declarations ? $new_declarations : false;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php b/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php
new file mode 100644
index 000000000..292c040d4
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php
@@ -0,0 +1,21 @@
+<?php
+
+class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
+{
+
+ public function __construct() {
+ parent::__construct(false); // opacity is non-negative, but we will clamp it
+ }
+
+ public function validate($number, $config, $context) {
+ $result = parent::validate($number, $config, $context);
+ if ($result === false) return $result;
+ $float = (float) $result;
+ if ($float < 0.0) $result = '0';
+ if ($float > 1.0) $result = '1';
+ return $result;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/Background.php b/library/HTMLPurifier/AttrDef/CSS/Background.php
new file mode 100644
index 000000000..3a3d20cd6
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/Background.php
@@ -0,0 +1,87 @@
+<?php
+
+/**
+ * Validates shorthand CSS property background.
+ * @warning Does not support url tokens that have internal spaces.
+ */
+class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * Local copy of component validators.
+ * @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
+ */
+ protected $info;
+
+ public function __construct($config) {
+ $def = $config->getCSSDefinition();
+ $this->info['background-color'] = $def->info['background-color'];
+ $this->info['background-image'] = $def->info['background-image'];
+ $this->info['background-repeat'] = $def->info['background-repeat'];
+ $this->info['background-attachment'] = $def->info['background-attachment'];
+ $this->info['background-position'] = $def->info['background-position'];
+ }
+
+ public function validate($string, $config, $context) {
+
+ // regular pre-processing
+ $string = $this->parseCDATA($string);
+ if ($string === '') return false;
+
+ // munge rgb() decl if necessary
+ $string = $this->mungeRgb($string);
+
+ // assumes URI doesn't have spaces in it
+ $bits = explode(' ', strtolower($string)); // bits to process
+
+ $caught = array();
+ $caught['color'] = false;
+ $caught['image'] = false;
+ $caught['repeat'] = false;
+ $caught['attachment'] = false;
+ $caught['position'] = false;
+
+ $i = 0; // number of catches
+ $none = false;
+
+ foreach ($bits as $bit) {
+ if ($bit === '') continue;
+ foreach ($caught as $key => $status) {
+ if ($key != 'position') {
+ if ($status !== false) continue;
+ $r = $this->info['background-' . $key]->validate($bit, $config, $context);
+ } else {
+ $r = $bit;
+ }
+ if ($r === false) continue;
+ if ($key == 'position') {
+ if ($caught[$key] === false) $caught[$key] = '';
+ $caught[$key] .= $r . ' ';
+ } else {
+ $caught[$key] = $r;
+ }
+ $i++;
+ break;
+ }
+ }
+
+ if (!$i) return false;
+ if ($caught['position'] !== false) {
+ $caught['position'] = $this->info['background-position']->
+ validate($caught['position'], $config, $context);
+ }
+
+ $ret = array();
+ foreach ($caught as $value) {
+ if ($value === false) continue;
+ $ret[] = $value;
+ }
+
+ if (empty($ret)) return false;
+ return implode(' ', $ret);
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php b/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
new file mode 100644
index 000000000..fae82eaec
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
@@ -0,0 +1,133 @@
+<?php
+
+/* W3C says:
+ [ // adjective and number must be in correct order, even if
+ // you could switch them without introducing ambiguity.
+ // some browsers support that syntax
+ [
+ <percentage> | <length> | left | center | right
+ ]
+ [
+ <percentage> | <length> | top | center | bottom
+ ]?
+ ] |
+ [ // this signifies that the vertical and horizontal adjectives
+ // can be arbitrarily ordered, however, there can only be two,
+ // one of each, or none at all
+ [
+ left | center | right
+ ] ||
+ [
+ top | center | bottom
+ ]
+ ]
+ top, left = 0%
+ center, (none) = 50%
+ bottom, right = 100%
+*/
+
+/* QuirksMode says:
+ keyword + length/percentage must be ordered correctly, as per W3C
+
+ Internet Explorer and Opera, however, support arbitrary ordering. We
+ should fix it up.
+
+ Minor issue though, not strictly necessary.
+*/
+
+// control freaks may appreciate the ability to convert these to
+// percentages or something, but it's not necessary
+
+/**
+ * Validates the value of background-position.
+ */
+class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
+{
+
+ protected $length;
+ protected $percentage;
+
+ public function __construct() {
+ $this->length = new HTMLPurifier_AttrDef_CSS_Length();
+ $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
+ }
+
+ public function validate($string, $config, $context) {
+ $string = $this->parseCDATA($string);
+ $bits = explode(' ', $string);
+
+ $keywords = array();
+ $keywords['h'] = false; // left, right
+ $keywords['v'] = false; // top, bottom
+ $keywords['ch'] = false; // center (first word)
+ $keywords['cv'] = false; // center (second word)
+ $measures = array();
+
+ $i = 0;
+
+ $lookup = array(
+ 'top' => 'v',
+ 'bottom' => 'v',
+ 'left' => 'h',
+ 'right' => 'h',
+ 'center' => 'c'
+ );
+
+ foreach ($bits as $bit) {
+ if ($bit === '') continue;
+
+ // test for keyword
+ $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
+ if (isset($lookup[$lbit])) {
+ $status = $lookup[$lbit];
+ if ($status == 'c') {
+ if ($i == 0) {
+ $status = 'ch';
+ } else {
+ $status = 'cv';
+ }
+ }
+ $keywords[$status] = $lbit;
+ $i++;
+ }
+
+ // test for length
+ $r = $this->length->validate($bit, $config, $context);
+ if ($r !== false) {
+ $measures[] = $r;
+ $i++;
+ }
+
+ // test for percentage
+ $r = $this->percentage->validate($bit, $config, $context);
+ if ($r !== false) {
+ $measures[] = $r;
+ $i++;
+ }
+
+ }
+
+ if (!$i) return false; // no valid values were caught
+
+ $ret = array();
+
+ // first keyword
+ if ($keywords['h']) $ret[] = $keywords['h'];
+ elseif ($keywords['ch']) {
+ $ret[] = $keywords['ch'];
+ $keywords['cv'] = false; // prevent re-use: center = center center
+ }
+ elseif (count($measures)) $ret[] = array_shift($measures);
+
+ if ($keywords['v']) $ret[] = $keywords['v'];
+ elseif ($keywords['cv']) $ret[] = $keywords['cv'];
+ elseif (count($measures)) $ret[] = array_shift($measures);
+
+ if (empty($ret)) return false;
+ return implode(' ', $ret);
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/Border.php b/library/HTMLPurifier/AttrDef/CSS/Border.php
new file mode 100644
index 000000000..42a1d1b4a
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/Border.php
@@ -0,0 +1,43 @@
+<?php
+
+/**
+ * Validates the border property as defined by CSS.
+ */
+class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * Local copy of properties this property is shorthand for.
+ */
+ protected $info = array();
+
+ public function __construct($config) {
+ $def = $config->getCSSDefinition();
+ $this->info['border-width'] = $def->info['border-width'];
+ $this->info['border-style'] = $def->info['border-style'];
+ $this->info['border-top-color'] = $def->info['border-top-color'];
+ }
+
+ public function validate($string, $config, $context) {
+ $string = $this->parseCDATA($string);
+ $string = $this->mungeRgb($string);
+ $bits = explode(' ', $string);
+ $done = array(); // segments we've finished
+ $ret = ''; // return value
+ foreach ($bits as $bit) {
+ foreach ($this->info as $propname => $validator) {
+ if (isset($done[$propname])) continue;
+ $r = $validator->validate($bit, $config, $context);
+ if ($r !== false) {
+ $ret .= $r . ' ';
+ $done[$propname] = true;
+ break;
+ }
+ }
+ }
+ return rtrim($ret);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/Color.php b/library/HTMLPurifier/AttrDef/CSS/Color.php
new file mode 100644
index 000000000..07f95a671
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/Color.php
@@ -0,0 +1,78 @@
+<?php
+
+/**
+ * Validates Color as defined by CSS.
+ */
+class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
+{
+
+ public function validate($color, $config, $context) {
+
+ static $colors = null;
+ if ($colors === null) $colors = $config->get('Core.ColorKeywords');
+
+ $color = trim($color);
+ if ($color === '') return false;
+
+ $lower = strtolower($color);
+ if (isset($colors[$lower])) return $colors[$lower];
+
+ if (strpos($color, 'rgb(') !== false) {
+ // rgb literal handling
+ $length = strlen($color);
+ if (strpos($color, ')') !== $length - 1) return false;
+ $triad = substr($color, 4, $length - 4 - 1);
+ $parts = explode(',', $triad);
+ if (count($parts) !== 3) return false;
+ $type = false; // to ensure that they're all the same type
+ $new_parts = array();
+ foreach ($parts as $part) {
+ $part = trim($part);
+ if ($part === '') return false;
+ $length = strlen($part);
+ if ($part[$length - 1] === '%') {
+ // handle percents
+ if (!$type) {
+ $type = 'percentage';
+ } elseif ($type !== 'percentage') {
+ return false;
+ }
+ $num = (float) substr($part, 0, $length - 1);
+ if ($num < 0) $num = 0;
+ if ($num > 100) $num = 100;
+ $new_parts[] = "$num%";
+ } else {
+ // handle integers
+ if (!$type) {
+ $type = 'integer';
+ } elseif ($type !== 'integer') {
+ return false;
+ }
+ $num = (int) $part;
+ if ($num < 0) $num = 0;
+ if ($num > 255) $num = 255;
+ $new_parts[] = (string) $num;
+ }
+ }
+ $new_triad = implode(',', $new_parts);
+ $color = "rgb($new_triad)";
+ } else {
+ // hexadecimal handling
+ if ($color[0] === '#') {
+ $hex = substr($color, 1);
+ } else {
+ $hex = $color;
+ $color = '#' . $color;
+ }
+ $length = strlen($hex);
+ if ($length !== 3 && $length !== 6) return false;
+ if (!ctype_xdigit($hex)) return false;
+ }
+
+ return $color;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/Composite.php b/library/HTMLPurifier/AttrDef/CSS/Composite.php
new file mode 100644
index 000000000..de1289cba
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/Composite.php
@@ -0,0 +1,38 @@
+<?php
+
+/**
+ * Allows multiple validators to attempt to validate attribute.
+ *
+ * Composite is just what it sounds like: a composite of many validators.
+ * This means that multiple HTMLPurifier_AttrDef objects will have a whack
+ * at the string. If one of them passes, that's what is returned. This is
+ * especially useful for CSS values, which often are a choice between
+ * an enumerated set of predefined values or a flexible data type.
+ */
+class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * List of HTMLPurifier_AttrDef objects that may process strings
+ * @todo Make protected
+ */
+ public $defs;
+
+ /**
+ * @param $defs List of HTMLPurifier_AttrDef objects
+ */
+ public function __construct($defs) {
+ $this->defs = $defs;
+ }
+
+ public function validate($string, $config, $context) {
+ foreach ($this->defs as $i => $def) {
+ $result = $this->defs[$i]->validate($string, $config, $context);
+ if ($result !== false) return $result;
+ }
+ return false;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php b/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php
new file mode 100644
index 000000000..6599c5b2d
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php
@@ -0,0 +1,28 @@
+<?php
+
+/**
+ * Decorator which enables CSS properties to be disabled for specific elements.
+ */
+class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
+{
+ public $def, $element;
+
+ /**
+ * @param $def Definition to wrap
+ * @param $element Element to deny
+ */
+ public function __construct($def, $element) {
+ $this->def = $def;
+ $this->element = $element;
+ }
+ /**
+ * Checks if CurrentToken is set and equal to $this->element
+ */
+ public function validate($string, $config, $context) {
+ $token = $context->get('CurrentToken', true);
+ if ($token && $token->name == $this->element) return false;
+ return $this->def->validate($string, $config, $context);
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/Filter.php b/library/HTMLPurifier/AttrDef/CSS/Filter.php
new file mode 100644
index 000000000..147894b86
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/Filter.php
@@ -0,0 +1,54 @@
+<?php
+
+/**
+ * Microsoft's proprietary filter: CSS property
+ * @note Currently supports the alpha filter. In the future, this will
+ * probably need an extensible framework
+ */
+class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
+{
+
+ protected $intValidator;
+
+ public function __construct() {
+ $this->intValidator = new HTMLPurifier_AttrDef_Integer();
+ }
+
+ public function validate($value, $config, $context) {
+ $value = $this->parseCDATA($value);
+ if ($value === 'none') return $value;
+ // if we looped this we could support multiple filters
+ $function_length = strcspn($value, '(');
+ $function = trim(substr($value, 0, $function_length));
+ if ($function !== 'alpha' &&
+ $function !== 'Alpha' &&
+ $function !== 'progid:DXImageTransform.Microsoft.Alpha'
+ ) return false;
+ $cursor = $function_length + 1;
+ $parameters_length = strcspn($value, ')', $cursor);
+ $parameters = substr($value, $cursor, $parameters_length);
+ $params = explode(',', $parameters);
+ $ret_params = array();
+ $lookup = array();
+ foreach ($params as $param) {
+ list($key, $value) = explode('=', $param);
+ $key = trim($key);
+ $value = trim($value);
+ if (isset($lookup[$key])) continue;
+ if ($key !== 'opacity') continue;
+ $value = $this->intValidator->validate($value, $config, $context);
+ if ($value === false) continue;
+ $int = (int) $value;
+ if ($int > 100) $value = '100';
+ if ($int < 0) $value = '0';
+ $ret_params[] = "$key=$value";
+ $lookup[$key] = true;
+ }
+ $ret_parameters = implode(',', $ret_params);
+ $ret_function = "$function($ret_parameters)";
+ return $ret_function;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/Font.php b/library/HTMLPurifier/AttrDef/CSS/Font.php
new file mode 100644
index 000000000..699ee0b70
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/Font.php
@@ -0,0 +1,149 @@
+<?php
+
+/**
+ * Validates shorthand CSS property font.
+ */
+class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * Local copy of component validators.
+ *
+ * @note If we moved specific CSS property definitions to their own
+ * classes instead of having them be assembled at run time by
+ * CSSDefinition, this wouldn't be necessary. We'd instantiate
+ * our own copies.
+ */
+ protected $info = array();
+
+ public function __construct($config) {
+ $def = $config->getCSSDefinition();
+ $this->info['font-style'] = $def->info['font-style'];
+ $this->info['font-variant'] = $def->info['font-variant'];
+ $this->info['font-weight'] = $def->info['font-weight'];
+ $this->info['font-size'] = $def->info['font-size'];
+ $this->info['line-height'] = $def->info['line-height'];
+ $this->info['font-family'] = $def->info['font-family'];
+ }
+
+ public function validate($string, $config, $context) {
+
+ static $system_fonts = array(
+ 'caption' => true,
+ 'icon' => true,
+ 'menu' => true,
+ 'message-box' => true,
+ 'small-caption' => true,
+ 'status-bar' => true
+ );
+
+ // regular pre-processing
+ $string = $this->parseCDATA($string);
+ if ($string === '') return false;
+
+ // check if it's one of the keywords
+ $lowercase_string = strtolower($string);
+ if (isset($system_fonts[$lowercase_string])) {
+ return $lowercase_string;
+ }
+
+ $bits = explode(' ', $string); // bits to process
+ $stage = 0; // this indicates what we're looking for
+ $caught = array(); // which stage 0 properties have we caught?
+ $stage_1 = array('font-style', 'font-variant', 'font-weight');
+ $final = ''; // output
+
+ for ($i = 0, $size = count($bits); $i < $size; $i++) {
+ if ($bits[$i] === '') continue;
+ switch ($stage) {
+
+ // attempting to catch font-style, font-variant or font-weight
+ case 0:
+ foreach ($stage_1 as $validator_name) {
+ if (isset($caught[$validator_name])) continue;
+ $r = $this->info[$validator_name]->validate(
+ $bits[$i], $config, $context);
+ if ($r !== false) {
+ $final .= $r . ' ';
+ $caught[$validator_name] = true;
+ break;
+ }
+ }
+ // all three caught, continue on
+ if (count($caught) >= 3) $stage = 1;
+ if ($r !== false) break;
+
+ // attempting to catch font-size and perhaps line-height
+ case 1:
+ $found_slash = false;
+ if (strpos($bits[$i], '/') !== false) {
+ list($font_size, $line_height) =
+ explode('/', $bits[$i]);
+ if ($line_height === '') {
+ // ooh, there's a space after the slash!
+ $line_height = false;
+ $found_slash = true;
+ }
+ } else {
+ $font_size = $bits[$i];
+ $line_height = false;
+ }
+ $r = $this->info['font-size']->validate(
+ $font_size, $config, $context);
+ if ($r !== false) {
+ $final .= $r;
+ // attempt to catch line-height
+ if ($line_height === false) {
+ // we need to scroll forward
+ for ($j = $i + 1; $j < $size; $j++) {
+ if ($bits[$j] === '') continue;
+ if ($bits[$j] === '/') {
+ if ($found_slash) {
+ return false;
+ } else {
+ $found_slash = true;
+ continue;
+ }
+ }
+ $line_height = $bits[$j];
+ break;
+ }
+ } else {
+ // slash already found
+ $found_slash = true;
+ $j = $i;
+ }
+ if ($found_slash) {
+ $i = $j;
+ $r = $this->info['line-height']->validate(
+ $line_height, $config, $context);
+ if ($r !== false) {
+ $final .= '/' . $r;
+ }
+ }
+ $final .= ' ';
+ $stage = 2;
+ break;
+ }
+ return false;
+
+ // attempting to catch font-family
+ case 2:
+ $font_family =
+ implode(' ', array_slice($bits, $i, $size - $i));
+ $r = $this->info['font-family']->validate(
+ $font_family, $config, $context);
+ if ($r !== false) {
+ $final .= $r . ' ';
+ // processing completed successfully
+ return rtrim($final);
+ }
+ return false;
+ }
+ }
+ return false;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
new file mode 100644
index 000000000..42c2054c2
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
@@ -0,0 +1,72 @@
+<?php
+
+/**
+ * Validates a font family list according to CSS spec
+ * @todo whitelisting allowed fonts would be nice
+ */
+class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
+{
+
+ public function validate($string, $config, $context) {
+ static $generic_names = array(
+ 'serif' => true,
+ 'sans-serif' => true,
+ 'monospace' => true,
+ 'fantasy' => true,
+ 'cursive' => true
+ );
+
+ // assume that no font names contain commas in them
+ $fonts = explode(',', $string);
+ $final = '';
+ foreach($fonts as $font) {
+ $font = trim($font);
+ if ($font === '') continue;
+ // match a generic name
+ if (isset($generic_names[$font])) {
+ $final .= $font . ', ';
+ continue;
+ }
+ // match a quoted name
+ if ($font[0] === '"' || $font[0] === "'") {
+ $length = strlen($font);
+ if ($length <= 2) continue;
+ $quote = $font[0];
+ if ($font[$length - 1] !== $quote) continue;
+ $font = substr($font, 1, $length - 2);
+ }
+
+ $font = $this->expandCSSEscape($font);
+
+ // $font is a pure representation of the font name
+
+ if (ctype_alnum($font) && $font !== '') {
+ // very simple font, allow it in unharmed
+ $final .= $font . ', ';
+ continue;
+ }
+
+ // bugger out on whitespace. form feed (0C) really
+ // shouldn't show up regardless
+ $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
+
+ // These ugly transforms don't pose a security
+ // risk (as \\ and \" might). We could try to be clever and
+ // use single-quote wrapping when there is a double quote
+ // present, but I have choosen not to implement that.
+ // (warning: this code relies on the selection of quotation
+ // mark below)
+ $font = str_replace('\\', '\\5C ', $font);
+ $font = str_replace('"', '\\22 ', $font);
+
+ // complicated font, requires quoting
+ $final .= "\"$font\", "; // note that this will later get turned into &quot;
+ }
+ $final = rtrim($final, ', ');
+ if ($final === '') return false;
+ return $final;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php b/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php
new file mode 100644
index 000000000..4e6b35e5a
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php
@@ -0,0 +1,40 @@
+<?php
+
+/**
+ * Decorator which enables !important to be used in CSS values.
+ */
+class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
+{
+ public $def, $allow;
+
+ /**
+ * @param $def Definition to wrap
+ * @param $allow Whether or not to allow !important
+ */
+ public function __construct($def, $allow = false) {
+ $this->def = $def;
+ $this->allow = $allow;
+ }
+ /**
+ * Intercepts and removes !important if necessary
+ */
+ public function validate($string, $config, $context) {
+ // test for ! and important tokens
+ $string = trim($string);
+ $is_important = false;
+ // :TODO: optimization: test directly for !important and ! important
+ if (strlen($string) >= 9 && substr($string, -9) === 'important') {
+ $temp = rtrim(substr($string, 0, -9));
+ // use a temp, because we might want to restore important
+ if (strlen($temp) >= 1 && substr($temp, -1) === '!') {
+ $string = rtrim(substr($temp, 0, -1));
+ $is_important = true;
+ }
+ }
+ $string = $this->def->validate($string, $config, $context);
+ if ($this->allow && $is_important) $string .= ' !important';
+ return $string;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/Length.php b/library/HTMLPurifier/AttrDef/CSS/Length.php
new file mode 100644
index 000000000..a07ec5813
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/Length.php
@@ -0,0 +1,47 @@
+<?php
+
+/**
+ * Represents a Length as defined by CSS.
+ */
+class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
+{
+
+ protected $min, $max;
+
+ /**
+ * @param HTMLPurifier_Length $max Minimum length, or null for no bound. String is also acceptable.
+ * @param HTMLPurifier_Length $max Maximum length, or null for no bound. String is also acceptable.
+ */
+ public function __construct($min = null, $max = null) {
+ $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
+ $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
+ }
+
+ public function validate($string, $config, $context) {
+ $string = $this->parseCDATA($string);
+
+ // Optimizations
+ if ($string === '') return false;
+ if ($string === '0') return '0';
+ if (strlen($string) === 1) return false;
+
+ $length = HTMLPurifier_Length::make($string);
+ if (!$length->isValid()) return false;
+
+ if ($this->min) {
+ $c = $length->compareTo($this->min);
+ if ($c === false) return false;
+ if ($c < 0) return false;
+ }
+ if ($this->max) {
+ $c = $length->compareTo($this->max);
+ if ($c === false) return false;
+ if ($c > 0) return false;
+ }
+
+ return $length->toString();
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/ListStyle.php b/library/HTMLPurifier/AttrDef/CSS/ListStyle.php
new file mode 100644
index 000000000..4406868c0
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/ListStyle.php
@@ -0,0 +1,78 @@
+<?php
+
+/**
+ * Validates shorthand CSS property list-style.
+ * @warning Does not support url tokens that have internal spaces.
+ */
+class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * Local copy of component validators.
+ * @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
+ */
+ protected $info;
+
+ public function __construct($config) {
+ $def = $config->getCSSDefinition();
+ $this->info['list-style-type'] = $def->info['list-style-type'];
+ $this->info['list-style-position'] = $def->info['list-style-position'];
+ $this->info['list-style-image'] = $def->info['list-style-image'];
+ }
+
+ public function validate($string, $config, $context) {
+
+ // regular pre-processing
+ $string = $this->parseCDATA($string);
+ if ($string === '') return false;
+
+ // assumes URI doesn't have spaces in it
+ $bits = explode(' ', strtolower($string)); // bits to process
+
+ $caught = array();
+ $caught['type'] = false;
+ $caught['position'] = false;
+ $caught['image'] = false;
+
+ $i = 0; // number of catches
+ $none = false;
+
+ foreach ($bits as $bit) {
+ if ($i >= 3) return; // optimization bit
+ if ($bit === '') continue;
+ foreach ($caught as $key => $status) {
+ if ($status !== false) continue;
+ $r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
+ if ($r === false) continue;
+ if ($r === 'none') {
+ if ($none) continue;
+ else $none = true;
+ if ($key == 'image') continue;
+ }
+ $caught[$key] = $r;
+ $i++;
+ break;
+ }
+ }
+
+ if (!$i) return false;
+
+ $ret = array();
+
+ // construct type
+ if ($caught['type']) $ret[] = $caught['type'];
+
+ // construct image
+ if ($caught['image']) $ret[] = $caught['image'];
+
+ // construct position
+ if ($caught['position']) $ret[] = $caught['position'];
+
+ if (empty($ret)) return false;
+ return implode(' ', $ret);
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/Multiple.php b/library/HTMLPurifier/AttrDef/CSS/Multiple.php
new file mode 100644
index 000000000..4d62a40d7
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/Multiple.php
@@ -0,0 +1,58 @@
+<?php
+
+/**
+ * Framework class for strings that involve multiple values.
+ *
+ * Certain CSS properties such as border-width and margin allow multiple
+ * lengths to be specified. This class can take a vanilla border-width
+ * definition and multiply it, usually into a max of four.
+ *
+ * @note Even though the CSS specification isn't clear about it, inherit
+ * can only be used alone: it will never manifest as part of a multi
+ * shorthand declaration. Thus, this class does not allow inherit.
+ */
+class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * Instance of component definition to defer validation to.
+ * @todo Make protected
+ */
+ public $single;
+
+ /**
+ * Max number of values allowed.
+ * @todo Make protected
+ */
+ public $max;
+
+ /**
+ * @param $single HTMLPurifier_AttrDef to multiply
+ * @param $max Max number of values allowed (usually four)
+ */
+ public function __construct($single, $max = 4) {
+ $this->single = $single;
+ $this->max = $max;
+ }
+
+ public function validate($string, $config, $context) {
+ $string = $this->parseCDATA($string);
+ if ($string === '') return false;
+ $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
+ $length = count($parts);
+ $final = '';
+ for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
+ if (ctype_space($parts[$i])) continue;
+ $result = $this->single->validate($parts[$i], $config, $context);
+ if ($result !== false) {
+ $final .= $result . ' ';
+ $num++;
+ }
+ }
+ if ($final === '') return false;
+ return rtrim($final);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/Number.php b/library/HTMLPurifier/AttrDef/CSS/Number.php
new file mode 100644
index 000000000..3f99e12ec
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/Number.php
@@ -0,0 +1,69 @@
+<?php
+
+/**
+ * Validates a number as defined by the CSS spec.
+ */
+class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * Bool indicating whether or not only positive values allowed.
+ */
+ protected $non_negative = false;
+
+ /**
+ * @param $non_negative Bool indicating whether negatives are forbidden
+ */
+ public function __construct($non_negative = false) {
+ $this->non_negative = $non_negative;
+ }
+
+ /**
+ * @warning Some contexts do not pass $config, $context. These
+ * variables should not be used without checking HTMLPurifier_Length
+ */
+ public function validate($number, $config, $context) {
+
+ $number = $this->parseCDATA($number);
+
+ if ($number === '') return false;
+ if ($number === '0') return '0';
+
+ $sign = '';
+ switch ($number[0]) {
+ case '-':
+ if ($this->non_negative) return false;
+ $sign = '-';
+ case '+':
+ $number = substr($number, 1);
+ }
+
+ if (ctype_digit($number)) {
+ $number = ltrim($number, '0');
+ return $number ? $sign . $number : '0';
+ }
+
+ // Period is the only non-numeric character allowed
+ if (strpos($number, '.') === false) return false;
+
+ list($left, $right) = explode('.', $number, 2);
+
+ if ($left === '' && $right === '') return false;
+ if ($left !== '' && !ctype_digit($left)) return false;
+
+ $left = ltrim($left, '0');
+ $right = rtrim($right, '0');
+
+ if ($right === '') {
+ return $left ? $sign . $left : '0';
+ } elseif (!ctype_digit($right)) {
+ return false;
+ }
+
+ return $sign . $left . '.' . $right;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/Percentage.php b/library/HTMLPurifier/AttrDef/CSS/Percentage.php
new file mode 100644
index 000000000..c34b8fc3c
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/Percentage.php
@@ -0,0 +1,40 @@
+<?php
+
+/**
+ * Validates a Percentage as defined by the CSS spec.
+ */
+class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
+ */
+ protected $number_def;
+
+ /**
+ * @param Bool indicating whether to forbid negative values
+ */
+ public function __construct($non_negative = false) {
+ $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
+ }
+
+ public function validate($string, $config, $context) {
+
+ $string = $this->parseCDATA($string);
+
+ if ($string === '') return false;
+ $length = strlen($string);
+ if ($length === 1) return false;
+ if ($string[$length - 1] !== '%') return false;
+
+ $number = substr($string, 0, $length - 1);
+ $number = $this->number_def->validate($number, $config, $context);
+
+ if ($number === false) return false;
+ return "$number%";
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php b/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php
new file mode 100644
index 000000000..772c922d8
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php
@@ -0,0 +1,38 @@
+<?php
+
+/**
+ * Validates the value for the CSS property text-decoration
+ * @note This class could be generalized into a version that acts sort of
+ * like Enum except you can compound the allowed values.
+ */
+class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
+{
+
+ public function validate($string, $config, $context) {
+
+ static $allowed_values = array(
+ 'line-through' => true,
+ 'overline' => true,
+ 'underline' => true,
+ );
+
+ $string = strtolower($this->parseCDATA($string));
+
+ if ($string === 'none') return $string;
+
+ $parts = explode(' ', $string);
+ $final = '';
+ foreach ($parts as $part) {
+ if (isset($allowed_values[$part])) {
+ $final .= $part . ' ';
+ }
+ }
+ $final = rtrim($final);
+ if ($final === '') return false;
+ return $final;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/CSS/URI.php b/library/HTMLPurifier/AttrDef/CSS/URI.php
new file mode 100644
index 000000000..1df17dc25
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/CSS/URI.php
@@ -0,0 +1,52 @@
+<?php
+
+/**
+ * Validates a URI in CSS syntax, which uses url('http://example.com')
+ * @note While theoretically speaking a URI in a CSS document could
+ * be non-embedded, as of CSS2 there is no such usage so we're
+ * generalizing it. This may need to be changed in the future.
+ * @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
+ * the separator, you cannot put a literal semicolon in
+ * in the URI. Try percent encoding it, in that case.
+ */
+class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
+{
+
+ public function __construct() {
+ parent::__construct(true); // always embedded
+ }
+
+ public function validate($uri_string, $config, $context) {
+ // parse the URI out of the string and then pass it onto
+ // the parent object
+
+ $uri_string = $this->parseCDATA($uri_string);
+ if (strpos($uri_string, 'url(') !== 0) return false;
+ $uri_string = substr($uri_string, 4);
+ $new_length = strlen($uri_string) - 1;
+ if ($uri_string[$new_length] != ')') return false;
+ $uri = trim(substr($uri_string, 0, $new_length));
+
+ if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
+ $quote = $uri[0];
+ $new_length = strlen($uri) - 1;
+ if ($uri[$new_length] !== $quote) return false;
+ $uri = substr($uri, 1, $new_length - 1);
+ }
+
+ $uri = $this->expandCSSEscape($uri);
+
+ $result = parent::validate($uri, $config, $context);
+
+ if ($result === false) return false;
+
+ // extra sanity check; should have been done by URI
+ $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
+
+ return "url(\"$result\")";
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/Enum.php b/library/HTMLPurifier/AttrDef/Enum.php
new file mode 100644
index 000000000..5d603ebcc
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/Enum.php
@@ -0,0 +1,65 @@
+<?php
+
+// Enum = Enumerated
+/**
+ * Validates a keyword against a list of valid values.
+ * @warning The case-insensitive compare of this function uses PHP's
+ * built-in strtolower and ctype_lower functions, which may
+ * cause problems with international comparisons
+ */
+class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * Lookup table of valid values.
+ * @todo Make protected
+ */
+ public $valid_values = array();
+
+ /**
+ * Bool indicating whether or not enumeration is case sensitive.
+ * @note In general this is always case insensitive.
+ */
+ protected $case_sensitive = false; // values according to W3C spec
+
+ /**
+ * @param $valid_values List of valid values
+ * @param $case_sensitive Bool indicating whether or not case sensitive
+ */
+ public function __construct(
+ $valid_values = array(), $case_sensitive = false
+ ) {
+ $this->valid_values = array_flip($valid_values);
+ $this->case_sensitive = $case_sensitive;
+ }
+
+ public function validate($string, $config, $context) {
+ $string = trim($string);
+ if (!$this->case_sensitive) {
+ // we may want to do full case-insensitive libraries
+ $string = ctype_lower($string) ? $string : strtolower($string);
+ }
+ $result = isset($this->valid_values[$string]);
+
+ return $result ? $string : false;
+ }
+
+ /**
+ * @param $string In form of comma-delimited list of case-insensitive
+ * valid values. Example: "foo,bar,baz". Prepend "s:" to make
+ * case sensitive
+ */
+ public function make($string) {
+ if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
+ $string = substr($string, 2);
+ $sensitive = true;
+ } else {
+ $sensitive = false;
+ }
+ $values = explode(',', $string);
+ return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/Bool.php b/library/HTMLPurifier/AttrDef/HTML/Bool.php
new file mode 100644
index 000000000..e06987eb8
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Bool.php
@@ -0,0 +1,28 @@
+<?php
+
+/**
+ * Validates a boolean attribute
+ */
+class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
+{
+
+ protected $name;
+ public $minimized = true;
+
+ public function __construct($name = false) {$this->name = $name;}
+
+ public function validate($string, $config, $context) {
+ if (empty($string)) return false;
+ return $this->name;
+ }
+
+ /**
+ * @param $string Name of attribute
+ */
+ public function make($string) {
+ return new HTMLPurifier_AttrDef_HTML_Bool($string);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/Class.php b/library/HTMLPurifier/AttrDef/HTML/Class.php
new file mode 100644
index 000000000..370068d97
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Class.php
@@ -0,0 +1,34 @@
+<?php
+
+/**
+ * Implements special behavior for class attribute (normally NMTOKENS)
+ */
+class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
+{
+ protected function split($string, $config, $context) {
+ // really, this twiddle should be lazy loaded
+ $name = $config->getDefinition('HTML')->doctype->name;
+ if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
+ return parent::split($string, $config, $context);
+ } else {
+ return preg_split('/\s+/', $string);
+ }
+ }
+ protected function filter($tokens, $config, $context) {
+ $allowed = $config->get('Attr.AllowedClasses');
+ $forbidden = $config->get('Attr.ForbiddenClasses');
+ $ret = array();
+ foreach ($tokens as $token) {
+ if (
+ ($allowed === null || isset($allowed[$token])) &&
+ !isset($forbidden[$token]) &&
+ // We need this O(n) check because of PHP's array
+ // implementation that casts -0 to 0.
+ !in_array($token, $ret, true)
+ ) {
+ $ret[] = $token;
+ }
+ }
+ return $ret;
+ }
+}
diff --git a/library/HTMLPurifier/AttrDef/HTML/Color.php b/library/HTMLPurifier/AttrDef/HTML/Color.php
new file mode 100644
index 000000000..d01e20454
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Color.php
@@ -0,0 +1,32 @@
+<?php
+
+/**
+ * Validates a color according to the HTML spec.
+ */
+class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
+{
+
+ public function validate($string, $config, $context) {
+
+ static $colors = null;
+ if ($colors === null) $colors = $config->get('Core.ColorKeywords');
+
+ $string = trim($string);
+
+ if (empty($string)) return false;
+ if (isset($colors[$string])) return $colors[$string];
+ if ($string[0] === '#') $hex = substr($string, 1);
+ else $hex = $string;
+
+ $length = strlen($hex);
+ if ($length !== 3 && $length !== 6) return false;
+ if (!ctype_xdigit($hex)) return false;
+ if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2];
+
+ return "#$hex";
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php b/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php
new file mode 100644
index 000000000..ae6ea7c01
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php
@@ -0,0 +1,21 @@
+<?php
+
+/**
+ * Special-case enum attribute definition that lazy loads allowed frame targets
+ */
+class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
+{
+
+ public $valid_values = false; // uninitialized value
+ protected $case_sensitive = false;
+
+ public function __construct() {}
+
+ public function validate($string, $config, $context) {
+ if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets');
+ return parent::validate($string, $config, $context);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/ID.php b/library/HTMLPurifier/AttrDef/HTML/ID.php
new file mode 100644
index 000000000..81d03762d
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/ID.php
@@ -0,0 +1,70 @@
+<?php
+
+/**
+ * Validates the HTML attribute ID.
+ * @warning Even though this is the id processor, it
+ * will ignore the directive Attr:IDBlacklist, since it will only
+ * go according to the ID accumulator. Since the accumulator is
+ * automatically generated, it will have already absorbed the
+ * blacklist. If you're hacking around, make sure you use load()!
+ */
+
+class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
+{
+
+ // ref functionality disabled, since we also have to verify
+ // whether or not the ID it refers to exists
+
+ public function validate($id, $config, $context) {
+
+ if (!$config->get('Attr.EnableID')) return false;
+
+ $id = trim($id); // trim it first
+
+ if ($id === '') return false;
+
+ $prefix = $config->get('Attr.IDPrefix');
+ if ($prefix !== '') {
+ $prefix .= $config->get('Attr.IDPrefixLocal');
+ // prevent re-appending the prefix
+ if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
+ } elseif ($config->get('Attr.IDPrefixLocal') !== '') {
+ trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
+ '%Attr.IDPrefix is set', E_USER_WARNING);
+ }
+
+ //if (!$this->ref) {
+ $id_accumulator =& $context->get('IDAccumulator');
+ if (isset($id_accumulator->ids[$id])) return false;
+ //}
+
+ // we purposely avoid using regex, hopefully this is faster
+
+ if (ctype_alpha($id)) {
+ $result = true;
+ } else {
+ if (!ctype_alpha(@$id[0])) return false;
+ $trim = trim( // primitive style of regexps, I suppose
+ $id,
+ 'A..Za..z0..9:-._'
+ );
+ $result = ($trim === '');
+ }
+
+ $regexp = $config->get('Attr.IDBlacklistRegexp');
+ if ($regexp && preg_match($regexp, $id)) {
+ return false;
+ }
+
+ if (/*!$this->ref && */$result) $id_accumulator->add($id);
+
+ // if no change was made to the ID, return the result
+ // else, return the new id if stripping whitespace made it
+ // valid, or return false.
+ return $result ? $id : false;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/Length.php b/library/HTMLPurifier/AttrDef/HTML/Length.php
new file mode 100644
index 000000000..a242f9c23
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Length.php
@@ -0,0 +1,41 @@
+<?php
+
+/**
+ * Validates the HTML type length (not to be confused with CSS's length).
+ *
+ * This accepts integer pixels or percentages as lengths for certain
+ * HTML attributes.
+ */
+
+class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
+{
+
+ public function validate($string, $config, $context) {
+
+ $string = trim($string);
+ if ($string === '') return false;
+
+ $parent_result = parent::validate($string, $config, $context);
+ if ($parent_result !== false) return $parent_result;
+
+ $length = strlen($string);
+ $last_char = $string[$length - 1];
+
+ if ($last_char !== '%') return false;
+
+ $points = substr($string, 0, $length - 1);
+
+ if (!is_numeric($points)) return false;
+
+ $points = (int) $points;
+
+ if ($points < 0) return '0%';
+ if ($points > 100) return '100%';
+
+ return ((string) $points) . '%';
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php b/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php
new file mode 100644
index 000000000..76d25ed08
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php
@@ -0,0 +1,53 @@
+<?php
+
+/**
+ * Validates a rel/rev link attribute against a directive of allowed values
+ * @note We cannot use Enum because link types allow multiple
+ * values.
+ * @note Assumes link types are ASCII text
+ */
+class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
+{
+
+ /** Name config attribute to pull. */
+ protected $name;
+
+ public function __construct($name) {
+ $configLookup = array(
+ 'rel' => 'AllowedRel',
+ 'rev' => 'AllowedRev'
+ );
+ if (!isset($configLookup[$name])) {
+ trigger_error('Unrecognized attribute name for link '.
+ 'relationship.', E_USER_ERROR);
+ return;
+ }
+ $this->name = $configLookup[$name];
+ }
+
+ public function validate($string, $config, $context) {
+
+ $allowed = $config->get('Attr.' . $this->name);
+ if (empty($allowed)) return false;
+
+ $string = $this->parseCDATA($string);
+ $parts = explode(' ', $string);
+
+ // lookup to prevent duplicates
+ $ret_lookup = array();
+ foreach ($parts as $part) {
+ $part = strtolower(trim($part));
+ if (!isset($allowed[$part])) continue;
+ $ret_lookup[$part] = true;
+ }
+
+ if (empty($ret_lookup)) return false;
+ $string = implode(' ', array_keys($ret_lookup));
+
+ return $string;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/MultiLength.php b/library/HTMLPurifier/AttrDef/HTML/MultiLength.php
new file mode 100644
index 000000000..c72fc76e4
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/MultiLength.php
@@ -0,0 +1,41 @@
+<?php
+
+/**
+ * Validates a MultiLength as defined by the HTML spec.
+ *
+ * A multilength is either a integer (pixel count), a percentage, or
+ * a relative number.
+ */
+class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
+{
+
+ public function validate($string, $config, $context) {
+
+ $string = trim($string);
+ if ($string === '') return false;
+
+ $parent_result = parent::validate($string, $config, $context);
+ if ($parent_result !== false) return $parent_result;
+
+ $length = strlen($string);
+ $last_char = $string[$length - 1];
+
+ if ($last_char !== '*') return false;
+
+ $int = substr($string, 0, $length - 1);
+
+ if ($int == '') return '*';
+ if (!is_numeric($int)) return false;
+
+ $int = (int) $int;
+
+ if ($int < 0) return false;
+ if ($int == 0) return '0';
+ if ($int == 1) return '*';
+ return ((string) $int) . '*';
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
new file mode 100644
index 000000000..aa34120bd
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
@@ -0,0 +1,52 @@
+<?php
+
+/**
+ * Validates contents based on NMTOKENS attribute type.
+ */
+class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
+{
+
+ public function validate($string, $config, $context) {
+
+ $string = trim($string);
+
+ // early abort: '' and '0' (strings that convert to false) are invalid
+ if (!$string) return false;
+
+ $tokens = $this->split($string, $config, $context);
+ $tokens = $this->filter($tokens, $config, $context);
+ if (empty($tokens)) return false;
+ return implode(' ', $tokens);
+
+ }
+
+ /**
+ * Splits a space separated list of tokens into its constituent parts.
+ */
+ protected function split($string, $config, $context) {
+ // OPTIMIZABLE!
+ // do the preg_match, capture all subpatterns for reformulation
+
+ // we don't support U+00A1 and up codepoints or
+ // escaping because I don't know how to do that with regexps
+ // and plus it would complicate optimization efforts (you never
+ // see that anyway).
+ $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
+ '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
+ '(?:(?=\s)|\z)/'; // look ahead for space or string end
+ preg_match_all($pattern, $string, $matches);
+ return $matches[1];
+ }
+
+ /**
+ * Template method for removing certain tokens based on arbitrary criteria.
+ * @note If we wanted to be really functional, we'd do an array_filter
+ * with a callback. But... we're not.
+ */
+ protected function filter($tokens, $config, $context) {
+ return $tokens;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/HTML/Pixels.php b/library/HTMLPurifier/AttrDef/HTML/Pixels.php
new file mode 100644
index 000000000..4cb2c1b85
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/HTML/Pixels.php
@@ -0,0 +1,48 @@
+<?php
+
+/**
+ * Validates an integer representation of pixels according to the HTML spec.
+ */
+class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
+{
+
+ protected $max;
+
+ public function __construct($max = null) {
+ $this->max = $max;
+ }
+
+ public function validate($string, $config, $context) {
+
+ $string = trim($string);
+ if ($string === '0') return $string;
+ if ($string === '') return false;
+ $length = strlen($string);
+ if (substr($string, $length - 2) == 'px') {
+ $string = substr($string, 0, $length - 2);
+ }
+ if (!is_numeric($string)) return false;
+ $int = (int) $string;
+
+ if ($int < 0) return '0';
+
+ // upper-bound value, extremely high values can
+ // crash operating systems, see <http://ha.ckers.org/imagecrash.html>
+ // WARNING, above link WILL crash you if you're using Windows
+
+ if ($this->max !== null && $int > $this->max) return (string) $this->max;
+
+ return (string) $int;
+
+ }
+
+ public function make($string) {
+ if ($string === '') $max = null;
+ else $max = (int) $string;
+ $class = get_class($this);
+ return new $class($max);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/Integer.php b/library/HTMLPurifier/AttrDef/Integer.php
new file mode 100644
index 000000000..d59738d2a
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/Integer.php
@@ -0,0 +1,73 @@
+<?php
+
+/**
+ * Validates an integer.
+ * @note While this class was modeled off the CSS definition, no currently
+ * allowed CSS uses this type. The properties that do are: widows,
+ * orphans, z-index, counter-increment, counter-reset. Some of the
+ * HTML attributes, however, find use for a non-negative version of this.
+ */
+class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * Bool indicating whether or not negative values are allowed
+ */
+ protected $negative = true;
+
+ /**
+ * Bool indicating whether or not zero is allowed
+ */
+ protected $zero = true;
+
+ /**
+ * Bool indicating whether or not positive values are allowed
+ */
+ protected $positive = true;
+
+ /**
+ * @param $negative Bool indicating whether or not negative values are allowed
+ * @param $zero Bool indicating whether or not zero is allowed
+ * @param $positive Bool indicating whether or not positive values are allowed
+ */
+ public function __construct(
+ $negative = true, $zero = true, $positive = true
+ ) {
+ $this->negative = $negative;
+ $this->zero = $zero;
+ $this->positive = $positive;
+ }
+
+ public function validate($integer, $config, $context) {
+
+ $integer = $this->parseCDATA($integer);
+ if ($integer === '') return false;
+
+ // we could possibly simply typecast it to integer, but there are
+ // certain fringe cases that must not return an integer.
+
+ // clip leading sign
+ if ( $this->negative && $integer[0] === '-' ) {
+ $digits = substr($integer, 1);
+ if ($digits === '0') $integer = '0'; // rm minus sign for zero
+ } elseif( $this->positive && $integer[0] === '+' ) {
+ $digits = $integer = substr($integer, 1); // rm unnecessary plus
+ } else {
+ $digits = $integer;
+ }
+
+ // test if it's numeric
+ if (!ctype_digit($digits)) return false;
+
+ // perform scope tests
+ if (!$this->zero && $integer == 0) return false;
+ if (!$this->positive && $integer > 0) return false;
+ if (!$this->negative && $integer < 0) return false;
+
+ return $integer;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/Lang.php b/library/HTMLPurifier/AttrDef/Lang.php
new file mode 100644
index 000000000..10e6da56d
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/Lang.php
@@ -0,0 +1,73 @@
+<?php
+
+/**
+ * Validates the HTML attribute lang, effectively a language code.
+ * @note Built according to RFC 3066, which obsoleted RFC 1766
+ */
+class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
+{
+
+ public function validate($string, $config, $context) {
+
+ $string = trim($string);
+ if (!$string) return false;
+
+ $subtags = explode('-', $string);
+ $num_subtags = count($subtags);
+
+ if ($num_subtags == 0) return false; // sanity check
+
+ // process primary subtag : $subtags[0]
+ $length = strlen($subtags[0]);
+ switch ($length) {
+ case 0:
+ return false;
+ case 1:
+ if (! ($subtags[0] == 'x' || $subtags[0] == 'i') ) {
+ return false;
+ }
+ break;
+ case 2:
+ case 3:
+ if (! ctype_alpha($subtags[0]) ) {
+ return false;
+ } elseif (! ctype_lower($subtags[0]) ) {
+ $subtags[0] = strtolower($subtags[0]);
+ }
+ break;
+ default:
+ return false;
+ }
+
+ $new_string = $subtags[0];
+ if ($num_subtags == 1) return $new_string;
+
+ // process second subtag : $subtags[1]
+ $length = strlen($subtags[1]);
+ if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
+ return $new_string;
+ }
+ if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
+
+ $new_string .= '-' . $subtags[1];
+ if ($num_subtags == 2) return $new_string;
+
+ // process all other subtags, index 2 and up
+ for ($i = 2; $i < $num_subtags; $i++) {
+ $length = strlen($subtags[$i]);
+ if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
+ return $new_string;
+ }
+ if (!ctype_lower($subtags[$i])) {
+ $subtags[$i] = strtolower($subtags[$i]);
+ }
+ $new_string .= '-' . $subtags[$i];
+ }
+
+ return $new_string;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/Switch.php b/library/HTMLPurifier/AttrDef/Switch.php
new file mode 100644
index 000000000..c9e3ed193
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/Switch.php
@@ -0,0 +1,34 @@
+<?php
+
+/**
+ * Decorator that, depending on a token, switches between two definitions.
+ */
+class HTMLPurifier_AttrDef_Switch
+{
+
+ protected $tag;
+ protected $withTag, $withoutTag;
+
+ /**
+ * @param string $tag Tag name to switch upon
+ * @param HTMLPurifier_AttrDef $with_tag Call if token matches tag
+ * @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token
+ */
+ public function __construct($tag, $with_tag, $without_tag) {
+ $this->tag = $tag;
+ $this->withTag = $with_tag;
+ $this->withoutTag = $without_tag;
+ }
+
+ public function validate($string, $config, $context) {
+ $token = $context->get('CurrentToken', true);
+ if (!$token || $token->name !== $this->tag) {
+ return $this->withoutTag->validate($string, $config, $context);
+ } else {
+ return $this->withTag->validate($string, $config, $context);
+ }
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/Text.php b/library/HTMLPurifier/AttrDef/Text.php
new file mode 100644
index 000000000..c6216cc53
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/Text.php
@@ -0,0 +1,15 @@
+<?php
+
+/**
+ * Validates arbitrary text according to the HTML spec.
+ */
+class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
+{
+
+ public function validate($string, $config, $context) {
+ return $this->parseCDATA($string);
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php
new file mode 100644
index 000000000..01a6d83e9
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/URI.php
@@ -0,0 +1,77 @@
+<?php
+
+/**
+ * Validates a URI as defined by RFC 3986.
+ * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
+ */
+class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
+{
+
+ protected $parser;
+ protected $embedsResource;
+
+ /**
+ * @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
+ */
+ public function __construct($embeds_resource = false) {
+ $this->parser = new HTMLPurifier_URIParser();
+ $this->embedsResource = (bool) $embeds_resource;
+ }
+
+ public function make($string) {
+ $embeds = (bool) $string;
+ return new HTMLPurifier_AttrDef_URI($embeds);
+ }
+
+ public function validate($uri, $config, $context) {
+
+ if ($config->get('URI.Disable')) return false;
+
+ $uri = $this->parseCDATA($uri);
+
+ // parse the URI
+ $uri = $this->parser->parse($uri);
+ if ($uri === false) return false;
+
+ // add embedded flag to context for validators
+ $context->register('EmbeddedURI', $this->embedsResource);
+
+ $ok = false;
+ do {
+
+ // generic validation
+ $result = $uri->validate($config, $context);
+ if (!$result) break;
+
+ // chained filtering
+ $uri_def = $config->getDefinition('URI');
+ $result = $uri_def->filter($uri, $config, $context);
+ if (!$result) break;
+
+ // scheme-specific validation
+ $scheme_obj = $uri->getSchemeObj($config, $context);
+ if (!$scheme_obj) break;
+ if ($this->embedsResource && !$scheme_obj->browsable) break;
+ $result = $scheme_obj->validate($uri, $config, $context);
+ if (!$result) break;
+
+ // Post chained filtering
+ $result = $uri_def->postFilter($uri, $config, $context);
+ if (!$result) break;
+
+ // survived gauntlet
+ $ok = true;
+
+ } while (false);
+
+ $context->destroy('EmbeddedURI');
+ if (!$ok) return false;
+
+ // back to string
+ return $uri->toString();
+
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/URI/Email.php b/library/HTMLPurifier/AttrDef/URI/Email.php
new file mode 100644
index 000000000..bfee9d166
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/URI/Email.php
@@ -0,0 +1,17 @@
+<?php
+
+abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * Unpacks a mailbox into its display-name and address
+ */
+ function unpack($string) {
+ // needs to be implemented
+ }
+
+}
+
+// sub-implementations
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php b/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php
new file mode 100644
index 000000000..94c715ab4
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php
@@ -0,0 +1,21 @@
+<?php
+
+/**
+ * Primitive email validation class based on the regexp found at
+ * http://www.regular-expressions.info/email.html
+ */
+class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
+{
+
+ public function validate($string, $config, $context) {
+ // no support for named mailboxes i.e. "Bob <bob@example.com>"
+ // that needs more percent encoding to be done
+ if ($string == '') return false;
+ $string = trim($string);
+ $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
+ return $result ? $string : false;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/URI/Host.php b/library/HTMLPurifier/AttrDef/URI/Host.php
new file mode 100644
index 000000000..2156c10c6
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/URI/Host.php
@@ -0,0 +1,62 @@
+<?php
+
+/**
+ * Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
+ */
+class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
+ */
+ protected $ipv4;
+
+ /**
+ * Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
+ */
+ protected $ipv6;
+
+ public function __construct() {
+ $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
+ $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
+ }
+
+ public function validate($string, $config, $context) {
+ $length = strlen($string);
+ if ($string === '') return '';
+ if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
+ //IPv6
+ $ip = substr($string, 1, $length - 2);
+ $valid = $this->ipv6->validate($ip, $config, $context);
+ if ($valid === false) return false;
+ return '['. $valid . ']';
+ }
+
+ // need to do checks on unusual encodings too
+ $ipv4 = $this->ipv4->validate($string, $config, $context);
+ if ($ipv4 !== false) return $ipv4;
+
+ // A regular domain name.
+
+ // This breaks I18N domain names, but we don't have proper IRI support,
+ // so force users to insert Punycode. If there's complaining we'll
+ // try to fix things into an international friendly form.
+
+ // The productions describing this are:
+ $a = '[a-z]'; // alpha
+ $an = '[a-z0-9]'; // alphanum
+ $and = '[a-z0-9-]'; // alphanum | "-"
+ // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
+ $domainlabel = "$an($and*$an)?";
+ // toplabel = alpha | alpha *( alphanum | "-" ) alphanum
+ $toplabel = "$a($and*$an)?";
+ // hostname = *( domainlabel "." ) toplabel [ "." ]
+ $match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
+ if (!$match) return false;
+
+ return $string;
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/URI/IPv4.php b/library/HTMLPurifier/AttrDef/URI/IPv4.php
new file mode 100644
index 000000000..ec4cf591b
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/URI/IPv4.php
@@ -0,0 +1,39 @@
+<?php
+
+/**
+ * Validates an IPv4 address
+ * @author Feyd @ forums.devnetwork.net (public domain)
+ */
+class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
+{
+
+ /**
+ * IPv4 regex, protected so that IPv6 can reuse it
+ */
+ protected $ip4;
+
+ public function validate($aIP, $config, $context) {
+
+ if (!$this->ip4) $this->_loadRegex();
+
+ if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
+ {
+ return $aIP;
+ }
+
+ return false;
+
+ }
+
+ /**
+ * Lazy load function to prevent regex from being stuffed in
+ * cache.
+ */
+ protected function _loadRegex() {
+ $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
+ $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/AttrDef/URI/IPv6.php b/library/HTMLPurifier/AttrDef/URI/IPv6.php
new file mode 100644
index 000000000..9454e9be5
--- /dev/null
+++ b/library/HTMLPurifier/AttrDef/URI/IPv6.php
@@ -0,0 +1,99 @@
+<?php
+
+/**
+ * Validates an IPv6 address.
+ * @author Feyd @ forums.devnetwork.net (public domain)
+ * @note This function requires brackets to have been removed from address
+ * in URI.
+ */
+class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
+{
+
+ public function validate($aIP, $config, $context) {
+
+ if (!$this->ip4) $this->_loadRegex();
+
+ $original = $aIP;
+
+ $hex = '[0-9a-fA-F]';
+ $blk = '(?:' . $hex . '{1,4})';
+ $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128
+
+ // prefix check
+ if (strpos($aIP, '/') !== false)
+ {
+ if (preg_match('#' . $pre . '$#s', $aIP, $find))
+ {
+ $aIP = substr($aIP, 0, 0-strlen($find[0]));
+ unset($find);
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ // IPv4-compatiblity check
+ if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
+ {
+ $aIP = substr($aIP, 0, 0-strlen($find[0]));
+ $ip = explode('.', $find[0]);
+ $ip = array_map('dechex', $ip);
+ $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
+ unset($find, $ip);
+ }
+
+ // compression check
+ $aIP = explode('::', $aIP);
+ $c = count($aIP);
+ if ($c > 2)
+ {
+ return false;
+ }
+ elseif ($c == 2)
+ {
+ list($first, $second) = $aIP;
+ $first = explode(':', $first);
+ $second = explode(':', $second);
+
+ if (count($first) + count($second) > 8)
+ {
+ return false;
+ }
+
+ while(count($first) < 8)
+ {
+ array_push($first, '0');
+ }
+
+ array_splice($first, 8 - count($second), 8, $second);
+ $aIP = $first;
+ unset($first,$second);
+ }
+ else
+ {
+ $aIP = explode(':', $aIP[0]);
+ }
+ $c = count($aIP);
+
+ if ($c != 8)
+ {
+ return false;
+ }
+
+ // All the pieces should be 16-bit hex strings. Are they?
+ foreach ($aIP as $piece)
+ {
+ if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
+ {
+ return false;
+ }
+ }
+
+ return $original;
+
+ }
+
+}
+
+// vim: et sw=4 sts=4