some important stuff we'll need

author: friendica <info@friendica.com> 2012-05-12 17:57:41 -0700
committer: friendica <info@friendica.com> 2012-07-18 20:40:31 +1000
commit: 7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a (patch)
tree: a9c3d91209cff770bb4b613b1b95e61a7bbc5a2b /lib/htmlpurifier/library/HTMLPurifier/AttrDef
parent: cd727cb26b78a1dade09d510b071446898477356 (diff)
download: volse-hubzilla-7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a.tar.gz
volse-hubzilla-7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a.tar.bz2
volse-hubzilla-7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a.zip
42 files changed, 2442 insertions, 0 deletions
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS.php
new file mode 100644
index 000000000..953e70675
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS.php
@@ -0,0 +1,87 @@
+<?php
+
+/**
+ * Validates the HTML attribute style, otherwise known as CSS.
+ * @note We don't implement the whole CSS specification, so it might be
+ *       difficult to reuse this component in the context of validating
+ *       actual stylesheet declarations.
+ * @note If we were really serious about validating the CSS, we would
+ *       tokenize the styles and then parse the tokens. Obviously, we
+ *       are not doing that. Doing that could seriously harm performance,
+ *       but would make these components a lot more viable for a CSS
+ *       filtering solution.
+ */
+class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
+{
+
+    public function validate($css, $config, $context) {
+
+        $css = $this->parseCDATA($css);
+
+        $definition = $config->getCSSDefinition();
+
+        // we're going to break the spec and explode by semicolons.
+        // This is because semicolon rarely appears in escaped form
+        // Doing this is generally flaky but fast
+        // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
+        // for details
+
+        $declarations = explode(';', $css);
+        $propvalues = array();
+
+        /**
+         * Name of the current CSS property being validated.
+         */
+        $property = false;
+        $context->register('CurrentCSSProperty', $property);
+
+        foreach ($declarations as $declaration) {
+            if (!$declaration) continue;
+            if (!strpos($declaration, ':')) continue;
+            list($property, $value) = explode(':', $declaration, 2);
+            $property = trim($property);
+            $value    = trim($value);
+            $ok = false;
+            do {
+                if (isset($definition->info[$property])) {
+                    $ok = true;
+                    break;
+                }
+                if (ctype_lower($property)) break;
+                $property = strtolower($property);
+                if (isset($definition->info[$property])) {
+                    $ok = true;
+                    break;
+                }
+            } while(0);
+            if (!$ok) continue;
+            // inefficient call, since the validator will do this again
+            if (strtolower(trim($value)) !== 'inherit') {
+                // inherit works for everything (but only on the base property)
+                $result = $definition->info[$property]->validate(
+                    $value, $config, $context );
+            } else {
+                $result = 'inherit';
+            }
+            if ($result === false) continue;
+            $propvalues[$property] = $result;
+        }
+
+        $context->destroy('CurrentCSSProperty');
+
+        // procedure does not write the new CSS simultaneously, so it's
+        // slightly inefficient, but it's the only way of getting rid of
+        // duplicates. Perhaps config to optimize it, but not now.
+
+        $new_declarations = '';
+        foreach ($propvalues as $prop => $value) {
+            $new_declarations .= "$prop:$value;";
+        }
+
+        return $new_declarations ? $new_declarations : false;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php
new file mode 100644
index 000000000..292c040d4
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php
@@ -0,0 +1,21 @@
+<?php
+
+class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
+{
+
+    public function __construct() {
+        parent::__construct(false); // opacity is non-negative, but we will clamp it
+    }
+
+    public function validate($number, $config, $context) {
+        $result = parent::validate($number, $config, $context);
+        if ($result === false) return $result;
+        $float = (float) $result;
+        if ($float < 0.0) $result = '0';
+        if ($float > 1.0) $result = '1';
+        return $result;
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Background.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Background.php
new file mode 100644
index 000000000..3a3d20cd6
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Background.php
@@ -0,0 +1,87 @@
+<?php
+
+/**
+ * Validates shorthand CSS property background.
+ * @warning Does not support url tokens that have internal spaces.
+ */
+class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * Local copy of component validators.
+     * @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
+     */
+    protected $info;
+
+    public function __construct($config) {
+        $def = $config->getCSSDefinition();
+        $this->info['background-color'] = $def->info['background-color'];
+        $this->info['background-image'] = $def->info['background-image'];
+        $this->info['background-repeat'] = $def->info['background-repeat'];
+        $this->info['background-attachment'] = $def->info['background-attachment'];
+        $this->info['background-position'] = $def->info['background-position'];
+    }
+
+    public function validate($string, $config, $context) {
+
+        // regular pre-processing
+        $string = $this->parseCDATA($string);
+        if ($string === '') return false;
+
+        // munge rgb() decl if necessary
+        $string = $this->mungeRgb($string);
+
+        // assumes URI doesn't have spaces in it
+        $bits = explode(' ', strtolower($string)); // bits to process
+
+        $caught = array();
+        $caught['color']    = false;
+        $caught['image']    = false;
+        $caught['repeat']   = false;
+        $caught['attachment'] = false;
+        $caught['position'] = false;
+
+        $i = 0; // number of catches
+        $none = false;
+
+        foreach ($bits as $bit) {
+            if ($bit === '') continue;
+            foreach ($caught as $key => $status) {
+                if ($key != 'position') {
+                    if ($status !== false) continue;
+                    $r = $this->info['background-' . $key]->validate($bit, $config, $context);
+                } else {
+                    $r = $bit;
+                }
+                if ($r === false) continue;
+                if ($key == 'position') {
+                    if ($caught[$key] === false) $caught[$key] = '';
+                    $caught[$key] .= $r . ' ';
+                } else {
+                    $caught[$key] = $r;
+                }
+                $i++;
+                break;
+            }
+        }
+
+        if (!$i) return false;
+        if ($caught['position'] !== false) {
+            $caught['position'] = $this->info['background-position']->
+                validate($caught['position'], $config, $context);
+        }
+
+        $ret = array();
+        foreach ($caught as $value) {
+            if ($value === false) continue;
+            $ret[] = $value;
+        }
+
+        if (empty($ret)) return false;
+        return implode(' ', $ret);
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
new file mode 100644
index 000000000..fae82eaec
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
@@ -0,0 +1,133 @@
+<?php
+
+/* W3C says:
+    [ // adjective and number must be in correct order, even if
+      // you could switch them without introducing ambiguity.
+      // some browsers support that syntax
+        [
+            <percentage> | <length> | left | center | right
+        ]
+        [
+            <percentage> | <length> | top | center | bottom
+        ]?
+    ] |
+    [ // this signifies that the vertical and horizontal adjectives
+      // can be arbitrarily ordered, however, there can only be two,
+      // one of each, or none at all
+        [
+            left | center | right
+        ] ||
+        [
+            top | center | bottom
+        ]
+    ]
+    top, left = 0%
+    center, (none) = 50%
+    bottom, right = 100%
+*/
+
+/* QuirksMode says:
+    keyword + length/percentage must be ordered correctly, as per W3C
+
+    Internet Explorer and Opera, however, support arbitrary ordering. We
+    should fix it up.
+
+    Minor issue though, not strictly necessary.
+*/
+
+// control freaks may appreciate the ability to convert these to
+// percentages or something, but it's not necessary
+
+/**
+ * Validates the value of background-position.
+ */
+class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
+{
+
+    protected $length;
+    protected $percentage;
+
+    public function __construct() {
+        $this->length     = new HTMLPurifier_AttrDef_CSS_Length();
+        $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
+    }
+
+    public function validate($string, $config, $context) {
+        $string = $this->parseCDATA($string);
+        $bits = explode(' ', $string);
+
+        $keywords = array();
+        $keywords['h'] = false; // left, right
+        $keywords['v'] = false; // top, bottom
+        $keywords['ch'] = false; // center (first word)
+        $keywords['cv'] = false; // center (second word)
+        $measures = array();
+
+        $i = 0;
+
+        $lookup = array(
+            'top' => 'v',
+            'bottom' => 'v',
+            'left' => 'h',
+            'right' => 'h',
+            'center' => 'c'
+        );
+
+        foreach ($bits as $bit) {
+            if ($bit === '') continue;
+
+            // test for keyword
+            $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
+            if (isset($lookup[$lbit])) {
+                $status = $lookup[$lbit];
+                if ($status == 'c') {
+                    if ($i == 0) {
+                        $status = 'ch';
+                    } else {
+                        $status = 'cv';
+                    }
+                }
+                $keywords[$status] = $lbit;
+                $i++;
+            }
+
+            // test for length
+            $r = $this->length->validate($bit, $config, $context);
+            if ($r !== false) {
+                $measures[] = $r;
+                $i++;
+            }
+
+            // test for percentage
+            $r = $this->percentage->validate($bit, $config, $context);
+            if ($r !== false) {
+                $measures[] = $r;
+                $i++;
+            }
+
+        }
+
+        if (!$i) return false; // no valid values were caught
+
+        $ret = array();
+
+        // first keyword
+        if     ($keywords['h'])     $ret[] = $keywords['h'];
+        elseif ($keywords['ch']) {
+            $ret[] = $keywords['ch'];
+            $keywords['cv'] = false; // prevent re-use: center = center center
+        }
+        elseif (count($measures))   $ret[] = array_shift($measures);
+
+        if     ($keywords['v'])     $ret[] = $keywords['v'];
+        elseif ($keywords['cv'])    $ret[] = $keywords['cv'];
+        elseif (count($measures))   $ret[] = array_shift($measures);
+
+        if (empty($ret)) return false;
+        return implode(' ', $ret);
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Border.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Border.php
new file mode 100644
index 000000000..42a1d1b4a
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Border.php
@@ -0,0 +1,43 @@
+<?php
+
+/**
+ * Validates the border property as defined by CSS.
+ */
+class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * Local copy of properties this property is shorthand for.
+     */
+    protected $info = array();
+
+    public function __construct($config) {
+        $def = $config->getCSSDefinition();
+        $this->info['border-width'] = $def->info['border-width'];
+        $this->info['border-style'] = $def->info['border-style'];
+        $this->info['border-top-color'] = $def->info['border-top-color'];
+    }
+
+    public function validate($string, $config, $context) {
+        $string = $this->parseCDATA($string);
+        $string = $this->mungeRgb($string);
+        $bits = explode(' ', $string);
+        $done = array(); // segments we've finished
+        $ret = ''; // return value
+        foreach ($bits as $bit) {
+            foreach ($this->info as $propname => $validator) {
+                if (isset($done[$propname])) continue;
+                $r = $validator->validate($bit, $config, $context);
+                if ($r !== false) {
+                    $ret .= $r . ' ';
+                    $done[$propname] = true;
+                    break;
+                }
+            }
+        }
+        return rtrim($ret);
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Color.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Color.php
new file mode 100644
index 000000000..07f95a671
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Color.php
@@ -0,0 +1,78 @@
+<?php
+
+/**
+ * Validates Color as defined by CSS.
+ */
+class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
+{
+
+    public function validate($color, $config, $context) {
+
+        static $colors = null;
+        if ($colors === null) $colors = $config->get('Core.ColorKeywords');
+
+        $color = trim($color);
+        if ($color === '') return false;
+
+        $lower = strtolower($color);
+        if (isset($colors[$lower])) return $colors[$lower];
+
+        if (strpos($color, 'rgb(') !== false) {
+            // rgb literal handling
+            $length = strlen($color);
+            if (strpos($color, ')') !== $length - 1) return false;
+            $triad = substr($color, 4, $length - 4 - 1);
+            $parts = explode(',', $triad);
+            if (count($parts) !== 3) return false;
+            $type = false; // to ensure that they're all the same type
+            $new_parts = array();
+            foreach ($parts as $part) {
+                $part = trim($part);
+                if ($part === '') return false;
+                $length = strlen($part);
+                if ($part[$length - 1] === '%') {
+                    // handle percents
+                    if (!$type) {
+                        $type = 'percentage';
+                    } elseif ($type !== 'percentage') {
+                        return false;
+                    }
+                    $num = (float) substr($part, 0, $length - 1);
+                    if ($num < 0) $num = 0;
+                    if ($num > 100) $num = 100;
+                    $new_parts[] = "$num%";
+                } else {
+                    // handle integers
+                    if (!$type) {
+                        $type = 'integer';
+                    } elseif ($type !== 'integer') {
+                        return false;
+                    }
+                    $num = (int) $part;
+                    if ($num < 0) $num = 0;
+                    if ($num > 255) $num = 255;
+                    $new_parts[] = (string) $num;
+                }
+            }
+            $new_triad = implode(',', $new_parts);
+            $color = "rgb($new_triad)";
+        } else {
+            // hexadecimal handling
+            if ($color[0] === '#') {
+                $hex = substr($color, 1);
+            } else {
+                $hex = $color;
+                $color = '#' . $color;
+            }
+            $length = strlen($hex);
+            if ($length !== 3 && $length !== 6) return false;
+            if (!ctype_xdigit($hex)) return false;
+        }
+
+        return $color;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Composite.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Composite.php
new file mode 100644
index 000000000..de1289cba
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Composite.php
@@ -0,0 +1,38 @@
+<?php
+
+/**
+ * Allows multiple validators to attempt to validate attribute.
+ *
+ * Composite is just what it sounds like: a composite of many validators.
+ * This means that multiple HTMLPurifier_AttrDef objects will have a whack
+ * at the string.  If one of them passes, that's what is returned.  This is
+ * especially useful for CSS values, which often are a choice between
+ * an enumerated set of predefined values or a flexible data type.
+ */
+class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * List of HTMLPurifier_AttrDef objects that may process strings
+     * @todo Make protected
+     */
+    public $defs;
+
+    /**
+     * @param $defs List of HTMLPurifier_AttrDef objects
+     */
+    public function __construct($defs) {
+        $this->defs = $defs;
+    }
+
+    public function validate($string, $config, $context) {
+        foreach ($this->defs as $i => $def) {
+            $result = $this->defs[$i]->validate($string, $config, $context);
+            if ($result !== false) return $result;
+        }
+        return false;
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php
new file mode 100644
index 000000000..6599c5b2d
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php
@@ -0,0 +1,28 @@
+<?php
+
+/**
+ * Decorator which enables CSS properties to be disabled for specific elements.
+ */
+class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
+{
+    public $def, $element;
+
+    /**
+     * @param $def Definition to wrap
+     * @param $element Element to deny
+     */
+    public function __construct($def, $element) {
+        $this->def = $def;
+        $this->element = $element;
+    }
+    /**
+     * Checks if CurrentToken is set and equal to $this->element
+     */
+    public function validate($string, $config, $context) {
+        $token = $context->get('CurrentToken', true);
+        if ($token && $token->name == $this->element) return false;
+        return $this->def->validate($string, $config, $context);
+    }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Filter.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Filter.php
new file mode 100644
index 000000000..147894b86
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Filter.php
@@ -0,0 +1,54 @@
+<?php
+
+/**
+ * Microsoft's proprietary filter: CSS property
+ * @note Currently supports the alpha filter. In the future, this will
+ *       probably need an extensible framework
+ */
+class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
+{
+
+    protected $intValidator;
+
+    public function __construct() {
+        $this->intValidator = new HTMLPurifier_AttrDef_Integer();
+    }
+
+    public function validate($value, $config, $context) {
+        $value = $this->parseCDATA($value);
+        if ($value === 'none') return $value;
+        // if we looped this we could support multiple filters
+        $function_length = strcspn($value, '(');
+        $function = trim(substr($value, 0, $function_length));
+        if ($function !== 'alpha' &&
+            $function !== 'Alpha' &&
+            $function !== 'progid:DXImageTransform.Microsoft.Alpha'
+            ) return false;
+        $cursor = $function_length + 1;
+        $parameters_length = strcspn($value, ')', $cursor);
+        $parameters = substr($value, $cursor, $parameters_length);
+        $params = explode(',', $parameters);
+        $ret_params = array();
+        $lookup = array();
+        foreach ($params as $param) {
+            list($key, $value) = explode('=', $param);
+            $key   = trim($key);
+            $value = trim($value);
+            if (isset($lookup[$key])) continue;
+            if ($key !== 'opacity') continue;
+            $value = $this->intValidator->validate($value, $config, $context);
+            if ($value === false) continue;
+            $int = (int) $value;
+            if ($int > 100) $value = '100';
+            if ($int < 0) $value = '0';
+            $ret_params[] = "$key=$value";
+            $lookup[$key] = true;
+        }
+        $ret_parameters = implode(',', $ret_params);
+        $ret_function = "$function($ret_parameters)";
+        return $ret_function;
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Font.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Font.php
new file mode 100644
index 000000000..699ee0b70
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Font.php
@@ -0,0 +1,149 @@
+<?php
+
+/**
+ * Validates shorthand CSS property font.
+ */
+class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * Local copy of component validators.
+     *
+     * @note If we moved specific CSS property definitions to their own
+     *       classes instead of having them be assembled at run time by
+     *       CSSDefinition, this wouldn't be necessary.  We'd instantiate
+     *       our own copies.
+     */
+    protected $info = array();
+
+    public function __construct($config) {
+        $def = $config->getCSSDefinition();
+        $this->info['font-style']   = $def->info['font-style'];
+        $this->info['font-variant'] = $def->info['font-variant'];
+        $this->info['font-weight']  = $def->info['font-weight'];
+        $this->info['font-size']    = $def->info['font-size'];
+        $this->info['line-height']  = $def->info['line-height'];
+        $this->info['font-family']  = $def->info['font-family'];
+    }
+
+    public function validate($string, $config, $context) {
+
+        static $system_fonts = array(
+            'caption' => true,
+            'icon' => true,
+            'menu' => true,
+            'message-box' => true,
+            'small-caption' => true,
+            'status-bar' => true
+        );
+
+        // regular pre-processing
+        $string = $this->parseCDATA($string);
+        if ($string === '') return false;
+
+        // check if it's one of the keywords
+        $lowercase_string = strtolower($string);
+        if (isset($system_fonts[$lowercase_string])) {
+            return $lowercase_string;
+        }
+
+        $bits = explode(' ', $string); // bits to process
+        $stage = 0; // this indicates what we're looking for
+        $caught = array(); // which stage 0 properties have we caught?
+        $stage_1 = array('font-style', 'font-variant', 'font-weight');
+        $final = ''; // output
+
+        for ($i = 0, $size = count($bits); $i < $size; $i++) {
+            if ($bits[$i] === '') continue;
+            switch ($stage) {
+
+                // attempting to catch font-style, font-variant or font-weight
+                case 0:
+                    foreach ($stage_1 as $validator_name) {
+                        if (isset($caught[$validator_name])) continue;
+                        $r = $this->info[$validator_name]->validate(
+                                                $bits[$i], $config, $context);
+                        if ($r !== false) {
+                            $final .= $r . ' ';
+                            $caught[$validator_name] = true;
+                            break;
+                        }
+                    }
+                    // all three caught, continue on
+                    if (count($caught) >= 3) $stage = 1;
+                    if ($r !== false) break;
+
+                // attempting to catch font-size and perhaps line-height
+                case 1:
+                    $found_slash = false;
+                    if (strpos($bits[$i], '/') !== false) {
+                        list($font_size, $line_height) =
+                                                    explode('/', $bits[$i]);
+                        if ($line_height === '') {
+                            // ooh, there's a space after the slash!
+                            $line_height = false;
+                            $found_slash = true;
+                        }
+                    } else {
+                        $font_size = $bits[$i];
+                        $line_height = false;
+                    }
+                    $r = $this->info['font-size']->validate(
+                                              $font_size, $config, $context);
+                    if ($r !== false) {
+                        $final .= $r;
+                        // attempt to catch line-height
+                        if ($line_height === false) {
+                            // we need to scroll forward
+                            for ($j = $i + 1; $j < $size; $j++) {
+                                if ($bits[$j] === '') continue;
+                                if ($bits[$j] === '/') {
+                                    if ($found_slash) {
+                                        return false;
+                                    } else {
+                                        $found_slash = true;
+                                        continue;
+                                    }
+                                }
+                                $line_height = $bits[$j];
+                                break;
+                            }
+                        } else {
+                            // slash already found
+                            $found_slash = true;
+                            $j = $i;
+                        }
+                        if ($found_slash) {
+                            $i = $j;
+                            $r = $this->info['line-height']->validate(
+                                              $line_height, $config, $context);
+                            if ($r !== false) {
+                                $final .= '/' . $r;
+                            }
+                        }
+                        $final .= ' ';
+                        $stage = 2;
+                        break;
+                    }
+                    return false;
+
+                // attempting to catch font-family
+                case 2:
+                    $font_family =
+                        implode(' ', array_slice($bits, $i, $size - $i));
+                    $r = $this->info['font-family']->validate(
+                                              $font_family, $config, $context);
+                    if ($r !== false) {
+                        $final .= $r . ' ';
+                        // processing completed successfully
+                        return rtrim($final);
+                    }
+                    return false;
+            }
+        }
+        return false;
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
new file mode 100644
index 000000000..0d9a4e12c
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
@@ -0,0 +1,197 @@
+<?php
+
+/**
+ * Validates a font family list according to CSS spec
+ */
+class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
+{
+
+    protected $mask = null;
+
+    public function __construct() {
+        $this->mask = '- ';
+        for ($c = 'a'; $c <= 'z'; $c++) $this->mask .= $c;
+        for ($c = 'A'; $c <= 'Z'; $c++) $this->mask .= $c;
+        for ($c = '0'; $c <= '9'; $c++) $this->mask .= $c; // cast-y, but should be fine
+        // special bytes used by UTF-8
+        for ($i = 0x80; $i <= 0xFF; $i++) {
+            // We don't bother excluding invalid bytes in this range,
+            // because the our restriction of well-formed UTF-8 will
+            // prevent these from ever occurring.
+            $this->mask .= chr($i);
+        }
+
+        /*
+            PHP's internal strcspn implementation is
+            O(length of string * length of mask), making it inefficient
+            for large masks.  However, it's still faster than
+            preg_match 8)
+          for (p = s1;;) {
+            spanp = s2;
+            do {
+              if (*spanp == c || p == s1_end) {
+                return p - s1;
+              }
+            } while (spanp++ < (s2_end - 1));
+            c = *++p;
+          }
+         */
+        // possible optimization: invert the mask.
+    }
+
+    public function validate($string, $config, $context) {
+        static $generic_names = array(
+            'serif' => true,
+            'sans-serif' => true,
+            'monospace' => true,
+            'fantasy' => true,
+            'cursive' => true
+        );
+        $allowed_fonts = $config->get('CSS.AllowedFonts');
+
+        // assume that no font names contain commas in them
+        $fonts = explode(',', $string);
+        $final = '';
+        foreach($fonts as $font) {
+            $font = trim($font);
+            if ($font === '') continue;
+            // match a generic name
+            if (isset($generic_names[$font])) {
+                if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
+                    $final .= $font . ', ';
+                }
+                continue;
+            }
+            // match a quoted name
+            if ($font[0] === '"' || $font[0] === "'") {
+                $length = strlen($font);
+                if ($length <= 2) continue;
+                $quote = $font[0];
+                if ($font[$length - 1] !== $quote) continue;
+                $font = substr($font, 1, $length - 2);
+            }
+
+            $font = $this->expandCSSEscape($font);
+
+            // $font is a pure representation of the font name
+
+            if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
+                continue;
+            }
+
+            if (ctype_alnum($font) && $font !== '') {
+                // very simple font, allow it in unharmed
+                $final .= $font . ', ';
+                continue;
+            }
+
+            // bugger out on whitespace.  form feed (0C) really
+            // shouldn't show up regardless
+            $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
+
+            // Here, there are various classes of characters which need
+            // to be treated differently:
+            //  - Alphanumeric characters are essentially safe.  We
+            //    handled these above.
+            //  - Spaces require quoting, though most parsers will do
+            //    the right thing if there aren't any characters that
+            //    can be misinterpreted
+            //  - Dashes rarely occur, but they fairly unproblematic
+            //    for parsing/rendering purposes.
+            //  The above characters cover the majority of Western font
+            //  names.
+            //  - Arbitrary Unicode characters not in ASCII.  Because
+            //    most parsers give little thought to Unicode, treatment
+            //    of these codepoints is basically uniform, even for
+            //    punctuation-like codepoints.  These characters can
+            //    show up in non-Western pages and are supported by most
+            //    major browsers, for example: "ＭＳ 明朝" is a
+            //    legitimate font-name
+            //    <http://ja.wikipedia.org/wiki/MS_明朝>.  See
+            //    the CSS3 spec for more examples:
+            //    <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
+            //    You can see live samples of these on the Internet:
+            //    <http://www.google.co.jp/search?q=font-family+ＭＳ+明朝|ゴシック>
+            //    However, most of these fonts have ASCII equivalents:
+            //    for example, 'MS Mincho', and it's considered
+            //    professional to use ASCII font names instead of
+            //    Unicode font names.  Thanks Takeshi Terada for
+            //    providing this information.
+            //  The following characters, to my knowledge, have not been
+            //  used to name font names.
+            //  - Single quote.  While theoretically you might find a
+            //    font name that has a single quote in its name (serving
+            //    as an apostrophe, e.g. Dave's Scribble), I haven't
+            //    been able to find any actual examples of this.
+            //    Internet Explorer's cssText translation (which I
+            //    believe is invoked by innerHTML) normalizes any
+            //    quoting to single quotes, and fails to escape single
+            //    quotes.  (Note that this is not IE's behavior for all
+            //    CSS properties, just some sort of special casing for
+            //    font-family).  So a single quote *cannot* be used
+            //    safely in the font-family context if there will be an
+            //    innerHTML/cssText translation.  Note that Firefox 3.x
+            //    does this too.
+            //  - Double quote.  In IE, these get normalized to
+            //    single-quotes, no matter what the encoding.  (Fun
+            //    fact, in IE8, the 'content' CSS property gained
+            //    support, where they special cased to preserve encoded
+            //    double quotes, but still translate unadorned double
+            //    quotes into single quotes.)  So, because their
+            //    fixpoint behavior is identical to single quotes, they
+            //    cannot be allowed either.  Firefox 3.x displays
+            //    single-quote style behavior.
+            //  - Backslashes are reduced by one (so \\ -> \) every
+            //    iteration, so they cannot be used safely.  This shows
+            //    up in IE7, IE8 and FF3
+            //  - Semicolons, commas and backticks are handled properly.
+            //  - The rest of the ASCII punctuation is handled properly.
+            // We haven't checked what browsers do to unadorned
+            // versions, but this is not important as long as the
+            // browser doesn't /remove/ surrounding quotes (as IE does
+            // for HTML).
+            //
+            // With these results in hand, we conclude that there are
+            // various levels of safety:
+            //  - Paranoid: alphanumeric, spaces and dashes(?)
+            //  - International: Paranoid + non-ASCII Unicode
+            //  - Edgy: Everything except quotes, backslashes
+            //  - NoJS: Standards compliance, e.g. sod IE. Note that
+            //    with some judicious character escaping (since certain
+            //    types of escaping doesn't work) this is theoretically
+            //    OK as long as innerHTML/cssText is not called.
+            // We believe that international is a reasonable default
+            // (that we will implement now), and once we do more
+            // extensive research, we may feel comfortable with dropping
+            // it down to edgy.
+
+            // Edgy: alphanumeric, spaces, dashes and Unicode.  Use of
+            // str(c)spn assumes that the string was already well formed
+            // Unicode (which of course it is).
+            if (strspn($font, $this->mask) !== strlen($font)) {
+                continue;
+            }
+
+            // Historical:
+            // In the absence of innerHTML/cssText, these ugly
+            // transforms don't pose a security risk (as \\ and \"
+            // might--these escapes are not supported by most browsers).
+            // We could try to be clever and use single-quote wrapping
+            // when there is a double quote present, but I have choosen
+            // not to implement that.  (NOTE: you can reduce the amount
+            // of escapes by one depending on what quoting style you use)
+            // $font = str_replace('\\', '\\5C ', $font);
+            // $font = str_replace('"',  '\\22 ', $font);
+            // $font = str_replace("'",  '\\27 ', $font);
+
+            // font possibly with spaces, requires quoting
+            $final .= "'$font', ";
+        }
+        $final = rtrim($final, ', ');
+        if ($final === '') return false;
+        return $final;
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php
new file mode 100644
index 000000000..779794a0b
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php
@@ -0,0 +1,24 @@
+<?php
+
+/**
+ * Validates based on {ident} CSS grammar production
+ */
+class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
+{
+
+    public function validate($string, $config, $context) {
+
+        $string = trim($string);
+
+        // early abort: '' and '0' (strings that convert to false) are invalid
+        if (!$string) return false;
+
+        $pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
+        if (!preg_match($pattern, $string)) return false;
+        return $string;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php
new file mode 100644
index 000000000..4e6b35e5a
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php
@@ -0,0 +1,40 @@
+<?php
+
+/**
+ * Decorator which enables !important to be used in CSS values.
+ */
+class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
+{
+    public $def, $allow;
+
+    /**
+     * @param $def Definition to wrap
+     * @param $allow Whether or not to allow !important
+     */
+    public function __construct($def, $allow = false) {
+        $this->def = $def;
+        $this->allow = $allow;
+    }
+    /**
+     * Intercepts and removes !important if necessary
+     */
+    public function validate($string, $config, $context) {
+        // test for ! and important tokens
+        $string = trim($string);
+        $is_important = false;
+        // :TODO: optimization: test directly for !important and ! important
+        if (strlen($string) >= 9 && substr($string, -9) === 'important') {
+            $temp = rtrim(substr($string, 0, -9));
+            // use a temp, because we might want to restore important
+            if (strlen($temp) >= 1 && substr($temp, -1) === '!') {
+                $string = rtrim(substr($temp, 0, -1));
+                $is_important = true;
+            }
+        }
+        $string = $this->def->validate($string, $config, $context);
+        if ($this->allow && $is_important) $string .= ' !important';
+        return $string;
+    }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Length.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Length.php
new file mode 100644
index 000000000..a07ec5813
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Length.php
@@ -0,0 +1,47 @@
+<?php
+
+/**
+ * Represents a Length as defined by CSS.
+ */
+class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
+{
+
+    protected $min, $max;
+
+    /**
+     * @param HTMLPurifier_Length $max Minimum length, or null for no bound. String is also acceptable.
+     * @param HTMLPurifier_Length $max Maximum length, or null for no bound. String is also acceptable.
+     */
+    public function __construct($min = null, $max = null) {
+        $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
+        $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
+    }
+
+    public function validate($string, $config, $context) {
+        $string = $this->parseCDATA($string);
+
+        // Optimizations
+        if ($string === '') return false;
+        if ($string === '0') return '0';
+        if (strlen($string) === 1) return false;
+
+        $length = HTMLPurifier_Length::make($string);
+        if (!$length->isValid()) return false;
+
+        if ($this->min) {
+            $c = $length->compareTo($this->min);
+            if ($c === false) return false;
+            if ($c < 0) return false;
+        }
+        if ($this->max) {
+            $c = $length->compareTo($this->max);
+            if ($c === false) return false;
+            if ($c > 0) return false;
+        }
+
+        return $length->toString();
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ListStyle.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ListStyle.php
new file mode 100644
index 000000000..4406868c0
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ListStyle.php
@@ -0,0 +1,78 @@
+<?php
+
+/**
+ * Validates shorthand CSS property list-style.
+ * @warning Does not support url tokens that have internal spaces.
+ */
+class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * Local copy of component validators.
+     * @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
+     */
+    protected $info;
+
+    public function __construct($config) {
+        $def = $config->getCSSDefinition();
+        $this->info['list-style-type']     = $def->info['list-style-type'];
+        $this->info['list-style-position'] = $def->info['list-style-position'];
+        $this->info['list-style-image'] = $def->info['list-style-image'];
+    }
+
+    public function validate($string, $config, $context) {
+
+        // regular pre-processing
+        $string = $this->parseCDATA($string);
+        if ($string === '') return false;
+
+        // assumes URI doesn't have spaces in it
+        $bits = explode(' ', strtolower($string)); // bits to process
+
+        $caught = array();
+        $caught['type']     = false;
+        $caught['position'] = false;
+        $caught['image']    = false;
+
+        $i = 0; // number of catches
+        $none = false;
+
+        foreach ($bits as $bit) {
+            if ($i >= 3) return; // optimization bit
+            if ($bit === '') continue;
+            foreach ($caught as $key => $status) {
+                if ($status !== false) continue;
+                $r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
+                if ($r === false) continue;
+                if ($r === 'none') {
+                    if ($none) continue;
+                    else $none = true;
+                    if ($key == 'image') continue;
+                }
+                $caught[$key] = $r;
+                $i++;
+                break;
+            }
+        }
+
+        if (!$i) return false;
+
+        $ret = array();
+
+        // construct type
+        if ($caught['type']) $ret[] = $caught['type'];
+
+        // construct image
+        if ($caught['image']) $ret[] = $caught['image'];
+
+        // construct position
+        if ($caught['position']) $ret[] = $caught['position'];
+
+        if (empty($ret)) return false;
+        return implode(' ', $ret);
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Multiple.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Multiple.php
new file mode 100644
index 000000000..4d62a40d7
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Multiple.php
@@ -0,0 +1,58 @@
+<?php
+
+/**
+ * Framework class for strings that involve multiple values.
+ *
+ * Certain CSS properties such as border-width and margin allow multiple
+ * lengths to be specified.  This class can take a vanilla border-width
+ * definition and multiply it, usually into a max of four.
+ *
+ * @note Even though the CSS specification isn't clear about it, inherit
+ *       can only be used alone: it will never manifest as part of a multi
+ *       shorthand declaration.  Thus, this class does not allow inherit.
+ */
+class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * Instance of component definition to defer validation to.
+     * @todo Make protected
+     */
+    public $single;
+
+    /**
+     * Max number of values allowed.
+     * @todo Make protected
+     */
+    public $max;
+
+    /**
+     * @param $single HTMLPurifier_AttrDef to multiply
+     * @param $max Max number of values allowed (usually four)
+     */
+    public function __construct($single, $max = 4) {
+        $this->single = $single;
+        $this->max = $max;
+    }
+
+    public function validate($string, $config, $context) {
+        $string = $this->parseCDATA($string);
+        if ($string === '') return false;
+        $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
+        $length = count($parts);
+        $final = '';
+        for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
+            if (ctype_space($parts[$i])) continue;
+            $result = $this->single->validate($parts[$i], $config, $context);
+            if ($result !== false) {
+                $final .= $result . ' ';
+                $num++;
+            }
+        }
+        if ($final === '') return false;
+        return rtrim($final);
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Number.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Number.php
new file mode 100644
index 000000000..3f99e12ec
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Number.php
@@ -0,0 +1,69 @@
+<?php
+
+/**
+ * Validates a number as defined by the CSS spec.
+ */
+class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * Bool indicating whether or not only positive values allowed.
+     */
+    protected $non_negative = false;
+
+    /**
+     * @param $non_negative Bool indicating whether negatives are forbidden
+     */
+    public function __construct($non_negative = false) {
+        $this->non_negative = $non_negative;
+    }
+
+    /**
+     * @warning Some contexts do not pass $config, $context. These
+     *          variables should not be used without checking HTMLPurifier_Length
+     */
+    public function validate($number, $config, $context) {
+
+        $number = $this->parseCDATA($number);
+
+        if ($number === '') return false;
+        if ($number === '0') return '0';
+
+        $sign = '';
+        switch ($number[0]) {
+            case '-':
+                if ($this->non_negative) return false;
+                $sign = '-';
+            case '+':
+                $number = substr($number, 1);
+        }
+
+        if (ctype_digit($number)) {
+            $number = ltrim($number, '0');
+            return $number ? $sign . $number : '0';
+        }
+
+        // Period is the only non-numeric character allowed
+        if (strpos($number, '.') === false) return false;
+
+        list($left, $right) = explode('.', $number, 2);
+
+        if ($left === '' && $right === '') return false;
+        if ($left !== '' && !ctype_digit($left)) return false;
+
+        $left  = ltrim($left,  '0');
+        $right = rtrim($right, '0');
+
+        if ($right === '') {
+            return $left ? $sign . $left : '0';
+        } elseif (!ctype_digit($right)) {
+            return false;
+        }
+
+        return $sign . $left . '.' . $right;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Percentage.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Percentage.php
new file mode 100644
index 000000000..c34b8fc3c
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Percentage.php
@@ -0,0 +1,40 @@
+<?php
+
+/**
+ * Validates a Percentage as defined by the CSS spec.
+ */
+class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
+     */
+    protected $number_def;
+
+    /**
+     * @param Bool indicating whether to forbid negative values
+     */
+    public function __construct($non_negative = false) {
+        $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
+    }
+
+    public function validate($string, $config, $context) {
+
+        $string = $this->parseCDATA($string);
+
+        if ($string === '') return false;
+        $length = strlen($string);
+        if ($length === 1) return false;
+        if ($string[$length - 1] !== '%') return false;
+
+        $number = substr($string, 0, $length - 1);
+        $number = $this->number_def->validate($number, $config, $context);
+
+        if ($number === false) return false;
+        return "$number%";
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php
new file mode 100644
index 000000000..772c922d8
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php
@@ -0,0 +1,38 @@
+<?php
+
+/**
+ * Validates the value for the CSS property text-decoration
+ * @note This class could be generalized into a version that acts sort of
+ *       like Enum except you can compound the allowed values.
+ */
+class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
+{
+
+    public function validate($string, $config, $context) {
+
+        static $allowed_values = array(
+            'line-through' => true,
+            'overline' => true,
+            'underline' => true,
+        );
+
+        $string = strtolower($this->parseCDATA($string));
+
+        if ($string === 'none') return $string;
+
+        $parts = explode(' ', $string);
+        $final = '';
+        foreach ($parts as $part) {
+            if (isset($allowed_values[$part])) {
+                $final .= $part . ' ';
+            }
+        }
+        $final = rtrim($final);
+        if ($final === '') return false;
+        return $final;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php
new file mode 100644
index 000000000..c2f767e57
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php
@@ -0,0 +1,61 @@
+<?php
+
+/**
+ * Validates a URI in CSS syntax, which uses url('http://example.com')
+ * @note While theoretically speaking a URI in a CSS document could
+ *       be non-embedded, as of CSS2 there is no such usage so we're
+ *       generalizing it. This may need to be changed in the future.
+ * @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
+ *          the separator, you cannot put a literal semicolon in
+ *          in the URI. Try percent encoding it, in that case.
+ */
+class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
+{
+
+    public function __construct() {
+        parent::__construct(true); // always embedded
+    }
+
+    public function validate($uri_string, $config, $context) {
+        // parse the URI out of the string and then pass it onto
+        // the parent object
+
+        $uri_string = $this->parseCDATA($uri_string);
+        if (strpos($uri_string, 'url(') !== 0) return false;
+        $uri_string = substr($uri_string, 4);
+        $new_length = strlen($uri_string) - 1;
+        if ($uri_string[$new_length] != ')') return false;
+        $uri = trim(substr($uri_string, 0, $new_length));
+
+        if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
+            $quote = $uri[0];
+            $new_length = strlen($uri) - 1;
+            if ($uri[$new_length] !== $quote) return false;
+            $uri = substr($uri, 1, $new_length - 1);
+        }
+
+        $uri = $this->expandCSSEscape($uri);
+
+        $result = parent::validate($uri, $config, $context);
+
+        if ($result === false) return false;
+
+        // extra sanity check; should have been done by URI
+        $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
+
+        // suspicious characters are ()'; we're going to percent encode
+        // them for safety.
+        $result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
+
+        // there's an extra bug where ampersands lose their escaping on
+        // an innerHTML cycle, so a very unlucky query parameter could
+        // then change the meaning of the URL.  Unfortunately, there's
+        // not much we can do about that...
+
+        return "url(\"$result\")";
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php
new file mode 100644
index 000000000..ce68dbd54
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php
@@ -0,0 +1,28 @@
+<?php
+
+/**
+ * Dummy AttrDef that mimics another AttrDef, BUT it generates clones
+ * with make.
+ */
+class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
+{
+    /**
+     * What we're cloning
+     */
+    protected $clone;
+
+    public function __construct($clone) {
+        $this->clone = $clone;
+    }
+
+    public function validate($v, $config, $context) {
+        return $this->clone->validate($v, $config, $context);
+    }
+
+    public function make($string) {
+        return clone $this->clone;
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Enum.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Enum.php
new file mode 100644
index 000000000..5d603ebcc
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Enum.php
@@ -0,0 +1,65 @@
+<?php
+
+// Enum = Enumerated
+/**
+ * Validates a keyword against a list of valid values.
+ * @warning The case-insensitive compare of this function uses PHP's
+ *          built-in strtolower and ctype_lower functions, which may
+ *          cause problems with international comparisons
+ */
+class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * Lookup table of valid values.
+     * @todo Make protected
+     */
+    public $valid_values   = array();
+
+    /**
+     * Bool indicating whether or not enumeration is case sensitive.
+     * @note In general this is always case insensitive.
+     */
+    protected $case_sensitive = false; // values according to W3C spec
+
+    /**
+     * @param $valid_values List of valid values
+     * @param $case_sensitive Bool indicating whether or not case sensitive
+     */
+    public function __construct(
+        $valid_values = array(), $case_sensitive = false
+    ) {
+        $this->valid_values = array_flip($valid_values);
+        $this->case_sensitive = $case_sensitive;
+    }
+
+    public function validate($string, $config, $context) {
+        $string = trim($string);
+        if (!$this->case_sensitive) {
+            // we may want to do full case-insensitive libraries
+            $string = ctype_lower($string) ? $string : strtolower($string);
+        }
+        $result = isset($this->valid_values[$string]);
+
+        return $result ? $string : false;
+    }
+
+    /**
+     * @param $string In form of comma-delimited list of case-insensitive
+     *      valid values. Example: "foo,bar,baz". Prepend "s:" to make
+     *      case sensitive
+     */
+    public function make($string) {
+        if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
+            $string = substr($string, 2);
+            $sensitive = true;
+        } else {
+            $sensitive = false;
+        }
+        $values = explode(',', $string);
+        return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Bool.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Bool.php
new file mode 100644
index 000000000..e06987eb8
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Bool.php
@@ -0,0 +1,28 @@
+<?php
+
+/**
+ * Validates a boolean attribute
+ */
+class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
+{
+
+    protected $name;
+    public $minimized = true;
+
+    public function __construct($name = false) {$this->name = $name;}
+
+    public function validate($string, $config, $context) {
+        if (empty($string)) return false;
+        return $this->name;
+    }
+
+    /**
+     * @param $string Name of attribute
+     */
+    public function make($string) {
+        return new HTMLPurifier_AttrDef_HTML_Bool($string);
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Class.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Class.php
new file mode 100644
index 000000000..370068d97
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Class.php
@@ -0,0 +1,34 @@
+<?php
+
+/**
+ * Implements special behavior for class attribute (normally NMTOKENS)
+ */
+class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
+{
+    protected function split($string, $config, $context) {
+        // really, this twiddle should be lazy loaded
+        $name = $config->getDefinition('HTML')->doctype->name;
+        if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
+            return parent::split($string, $config, $context);
+        } else {
+            return preg_split('/\s+/', $string);
+        }
+    }
+    protected function filter($tokens, $config, $context) {
+        $allowed = $config->get('Attr.AllowedClasses');
+        $forbidden = $config->get('Attr.ForbiddenClasses');
+        $ret = array();
+        foreach ($tokens as $token) {
+            if (
+                ($allowed === null || isset($allowed[$token])) &&
+                !isset($forbidden[$token]) &&
+                // We need this O(n) check because of PHP's array
+                // implementation that casts -0 to 0.
+                !in_array($token, $ret, true)
+            ) {
+                $ret[] = $token;
+            }
+        }
+        return $ret;
+    }
+}
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php
new file mode 100644
index 000000000..00d865723
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php
@@ -0,0 +1,32 @@
+<?php
+
+/**
+ * Validates a color according to the HTML spec.
+ */
+class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
+{
+
+    public function validate($string, $config, $context) {
+
+        static $colors = null;
+        if ($colors === null) $colors = $config->get('Core.ColorKeywords');
+
+        $string = trim($string);
+
+        if (empty($string)) return false;
+        if (isset($colors[strtolower($string)])) return $colors[$string];
+        if ($string[0] === '#') $hex = substr($string, 1);
+        else $hex = $string;
+
+        $length = strlen($hex);
+        if ($length !== 3 && $length !== 6) return false;
+        if (!ctype_xdigit($hex)) return false;
+        if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2];
+
+        return "#$hex";
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php
new file mode 100644
index 000000000..ae6ea7c01
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php
@@ -0,0 +1,21 @@
+<?php
+
+/**
+ * Special-case enum attribute definition that lazy loads allowed frame targets
+ */
+class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
+{
+
+    public $valid_values = false; // uninitialized value
+    protected $case_sensitive = false;
+
+    public function __construct() {}
+
+    public function validate($string, $config, $context) {
+        if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets');
+        return parent::validate($string, $config, $context);
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php
new file mode 100644
index 000000000..0015fa1eb
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php
@@ -0,0 +1,80 @@
+<?php
+
+/**
+ * Validates the HTML attribute ID.
+ * @warning Even though this is the id processor, it
+ *          will ignore the directive Attr:IDBlacklist, since it will only
+ *          go according to the ID accumulator. Since the accumulator is
+ *          automatically generated, it will have already absorbed the
+ *          blacklist. If you're hacking around, make sure you use load()!
+ */
+
+class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
+{
+
+    // selector is NOT a valid thing to use for IDREFs, because IDREFs
+    // *must* target IDs that exist, whereas selector #ids do not.
+
+    /**
+     * Determines whether or not we're validating an ID in a CSS
+     * selector context.
+     */
+    protected $selector;
+
+    public function __construct($selector = false) {
+        $this->selector = $selector;
+    }
+
+    public function validate($id, $config, $context) {
+
+        if (!$this->selector && !$config->get('Attr.EnableID')) return false;
+
+        $id = trim($id); // trim it first
+
+        if ($id === '') return false;
+
+        $prefix = $config->get('Attr.IDPrefix');
+        if ($prefix !== '') {
+            $prefix .= $config->get('Attr.IDPrefixLocal');
+            // prevent re-appending the prefix
+            if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
+        } elseif ($config->get('Attr.IDPrefixLocal') !== '') {
+            trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
+                '%Attr.IDPrefix is set', E_USER_WARNING);
+        }
+
+        if (!$this->selector) {
+            $id_accumulator =& $context->get('IDAccumulator');
+            if (isset($id_accumulator->ids[$id])) return false;
+        }
+
+        // we purposely avoid using regex, hopefully this is faster
+
+        if (ctype_alpha($id)) {
+            $result = true;
+        } else {
+            if (!ctype_alpha(@$id[0])) return false;
+            $trim = trim( // primitive style of regexps, I suppose
+                $id,
+                'A..Za..z0..9:-._'
+              );
+            $result = ($trim === '');
+        }
+
+        $regexp = $config->get('Attr.IDBlacklistRegexp');
+        if ($regexp && preg_match($regexp, $id)) {
+            return false;
+        }
+
+        if (!$this->selector && $result) $id_accumulator->add($id);
+
+        // if no change was made to the ID, return the result
+        // else, return the new id if stripping whitespace made it
+        //     valid, or return false.
+        return $result ? $id : false;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Length.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Length.php
new file mode 100644
index 000000000..a242f9c23
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Length.php
@@ -0,0 +1,41 @@
+<?php
+
+/**
+ * Validates the HTML type length (not to be confused with CSS's length).
+ *
+ * This accepts integer pixels or percentages as lengths for certain
+ * HTML attributes.
+ */
+
+class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
+{
+
+    public function validate($string, $config, $context) {
+
+        $string = trim($string);
+        if ($string === '') return false;
+
+        $parent_result = parent::validate($string, $config, $context);
+        if ($parent_result !== false) return $parent_result;
+
+        $length = strlen($string);
+        $last_char = $string[$length - 1];
+
+        if ($last_char !== '%') return false;
+
+        $points = substr($string, 0, $length - 1);
+
+        if (!is_numeric($points)) return false;
+
+        $points = (int) $points;
+
+        if ($points < 0) return '0%';
+        if ($points > 100) return '100%';
+
+        return ((string) $points) . '%';
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php
new file mode 100644
index 000000000..76d25ed08
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php
@@ -0,0 +1,53 @@
+<?php
+
+/**
+ * Validates a rel/rev link attribute against a directive of allowed values
+ * @note We cannot use Enum because link types allow multiple
+ *       values.
+ * @note Assumes link types are ASCII text
+ */
+class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
+{
+
+    /** Name config attribute to pull. */
+    protected $name;
+
+    public function __construct($name) {
+        $configLookup = array(
+            'rel' => 'AllowedRel',
+            'rev' => 'AllowedRev'
+        );
+        if (!isset($configLookup[$name])) {
+            trigger_error('Unrecognized attribute name for link '.
+                'relationship.', E_USER_ERROR);
+            return;
+        }
+        $this->name = $configLookup[$name];
+    }
+
+    public function validate($string, $config, $context) {
+
+        $allowed = $config->get('Attr.' . $this->name);
+        if (empty($allowed)) return false;
+
+        $string = $this->parseCDATA($string);
+        $parts = explode(' ', $string);
+
+        // lookup to prevent duplicates
+        $ret_lookup = array();
+        foreach ($parts as $part) {
+            $part = strtolower(trim($part));
+            if (!isset($allowed[$part])) continue;
+            $ret_lookup[$part] = true;
+        }
+
+        if (empty($ret_lookup)) return false;
+        $string = implode(' ', array_keys($ret_lookup));
+
+        return $string;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/MultiLength.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/MultiLength.php
new file mode 100644
index 000000000..c72fc76e4
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/MultiLength.php
@@ -0,0 +1,41 @@
+<?php
+
+/**
+ * Validates a MultiLength as defined by the HTML spec.
+ *
+ * A multilength is either a integer (pixel count), a percentage, or
+ * a relative number.
+ */
+class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
+{
+
+    public function validate($string, $config, $context) {
+
+        $string = trim($string);
+        if ($string === '') return false;
+
+        $parent_result = parent::validate($string, $config, $context);
+        if ($parent_result !== false) return $parent_result;
+
+        $length = strlen($string);
+        $last_char = $string[$length - 1];
+
+        if ($last_char !== '*') return false;
+
+        $int = substr($string, 0, $length - 1);
+
+        if ($int == '') return '*';
+        if (!is_numeric($int)) return false;
+
+        $int = (int) $int;
+
+        if ($int < 0) return false;
+        if ($int == 0) return '0';
+        if ($int == 1) return '*';
+        return ((string) $int) . '*';
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
new file mode 100644
index 000000000..aa34120bd
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php
@@ -0,0 +1,52 @@
+<?php
+
+/**
+ * Validates contents based on NMTOKENS attribute type.
+ */
+class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
+{
+
+    public function validate($string, $config, $context) {
+
+        $string = trim($string);
+
+        // early abort: '' and '0' (strings that convert to false) are invalid
+        if (!$string) return false;
+
+        $tokens = $this->split($string, $config, $context);
+        $tokens = $this->filter($tokens, $config, $context);
+        if (empty($tokens)) return false;
+        return implode(' ', $tokens);
+
+    }
+
+    /**
+     * Splits a space separated list of tokens into its constituent parts.
+     */
+    protected function split($string, $config, $context) {
+        // OPTIMIZABLE!
+        // do the preg_match, capture all subpatterns for reformulation
+
+        // we don't support U+00A1 and up codepoints or
+        // escaping because I don't know how to do that with regexps
+        // and plus it would complicate optimization efforts (you never
+        // see that anyway).
+        $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
+                   '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
+                   '(?:(?=\s)|\z)/'; // look ahead for space or string end
+        preg_match_all($pattern, $string, $matches);
+        return $matches[1];
+    }
+
+    /**
+     * Template method for removing certain tokens based on arbitrary criteria.
+     * @note If we wanted to be really functional, we'd do an array_filter
+     *       with a callback. But... we're not.
+     */
+    protected function filter($tokens, $config, $context) {
+        return $tokens;
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Pixels.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Pixels.php
new file mode 100644
index 000000000..4cb2c1b85
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Pixels.php
@@ -0,0 +1,48 @@
+<?php
+
+/**
+ * Validates an integer representation of pixels according to the HTML spec.
+ */
+class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
+{
+
+    protected $max;
+
+    public function __construct($max = null) {
+        $this->max = $max;
+    }
+
+    public function validate($string, $config, $context) {
+
+        $string = trim($string);
+        if ($string === '0') return $string;
+        if ($string === '')  return false;
+        $length = strlen($string);
+        if (substr($string, $length - 2) == 'px') {
+            $string = substr($string, 0, $length - 2);
+        }
+        if (!is_numeric($string)) return false;
+        $int = (int) $string;
+
+        if ($int < 0) return '0';
+
+        // upper-bound value, extremely high values can
+        // crash operating systems, see <http://ha.ckers.org/imagecrash.html>
+        // WARNING, above link WILL crash you if you're using Windows
+
+        if ($this->max !== null && $int > $this->max) return (string) $this->max;
+
+        return (string) $int;
+
+    }
+
+    public function make($string) {
+        if ($string === '') $max = null;
+        else $max = (int) $string;
+        $class = get_class($this);
+        return new $class($max);
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Integer.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Integer.php
new file mode 100644
index 000000000..d59738d2a
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Integer.php
@@ -0,0 +1,73 @@
+<?php
+
+/**
+ * Validates an integer.
+ * @note While this class was modeled off the CSS definition, no currently
+ *       allowed CSS uses this type.  The properties that do are: widows,
+ *       orphans, z-index, counter-increment, counter-reset.  Some of the
+ *       HTML attributes, however, find use for a non-negative version of this.
+ */
+class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * Bool indicating whether or not negative values are allowed
+     */
+    protected $negative = true;
+
+    /**
+     * Bool indicating whether or not zero is allowed
+     */
+    protected $zero = true;
+
+    /**
+     * Bool indicating whether or not positive values are allowed
+     */
+    protected $positive = true;
+
+    /**
+     * @param $negative Bool indicating whether or not negative values are allowed
+     * @param $zero Bool indicating whether or not zero is allowed
+     * @param $positive Bool indicating whether or not positive values are allowed
+     */
+    public function __construct(
+        $negative = true, $zero = true, $positive = true
+    ) {
+        $this->negative = $negative;
+        $this->zero     = $zero;
+        $this->positive = $positive;
+    }
+
+    public function validate($integer, $config, $context) {
+
+        $integer = $this->parseCDATA($integer);
+        if ($integer === '') return false;
+
+        // we could possibly simply typecast it to integer, but there are
+        // certain fringe cases that must not return an integer.
+
+        // clip leading sign
+        if ( $this->negative && $integer[0] === '-' ) {
+            $digits = substr($integer, 1);
+            if ($digits === '0') $integer = '0'; // rm minus sign for zero
+        } elseif( $this->positive && $integer[0] === '+' ) {
+            $digits = $integer = substr($integer, 1); // rm unnecessary plus
+        } else {
+            $digits = $integer;
+        }
+
+        // test if it's numeric
+        if (!ctype_digit($digits)) return false;
+
+        // perform scope tests
+        if (!$this->zero     && $integer == 0) return false;
+        if (!$this->positive && $integer > 0) return false;
+        if (!$this->negative && $integer < 0) return false;
+
+        return $integer;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Lang.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Lang.php
new file mode 100644
index 000000000..10e6da56d
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Lang.php
@@ -0,0 +1,73 @@
+<?php
+
+/**
+ * Validates the HTML attribute lang, effectively a language code.
+ * @note Built according to RFC 3066, which obsoleted RFC 1766
+ */
+class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
+{
+
+    public function validate($string, $config, $context) {
+
+        $string = trim($string);
+        if (!$string) return false;
+
+        $subtags = explode('-', $string);
+        $num_subtags = count($subtags);
+
+        if ($num_subtags == 0) return false; // sanity check
+
+        // process primary subtag : $subtags[0]
+        $length = strlen($subtags[0]);
+        switch ($length) {
+            case 0:
+                return false;
+            case 1:
+                if (! ($subtags[0] == 'x' || $subtags[0] == 'i') ) {
+                    return false;
+                }
+                break;
+            case 2:
+            case 3:
+                if (! ctype_alpha($subtags[0]) ) {
+                    return false;
+                } elseif (! ctype_lower($subtags[0]) ) {
+                    $subtags[0] = strtolower($subtags[0]);
+                }
+                break;
+            default:
+                return false;
+        }
+
+        $new_string = $subtags[0];
+        if ($num_subtags == 1) return $new_string;
+
+        // process second subtag : $subtags[1]
+        $length = strlen($subtags[1]);
+        if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
+            return $new_string;
+        }
+        if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
+
+        $new_string .= '-' . $subtags[1];
+        if ($num_subtags == 2) return $new_string;
+
+        // process all other subtags, index 2 and up
+        for ($i = 2; $i < $num_subtags; $i++) {
+            $length = strlen($subtags[$i]);
+            if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
+                return $new_string;
+            }
+            if (!ctype_lower($subtags[$i])) {
+                $subtags[$i] = strtolower($subtags[$i]);
+            }
+            $new_string .= '-' . $subtags[$i];
+        }
+
+        return $new_string;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Switch.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Switch.php
new file mode 100644
index 000000000..c9e3ed193
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Switch.php
@@ -0,0 +1,34 @@
+<?php
+
+/**
+ * Decorator that, depending on a token, switches between two definitions.
+ */
+class HTMLPurifier_AttrDef_Switch
+{
+
+    protected $tag;
+    protected $withTag, $withoutTag;
+
+    /**
+     * @param string $tag Tag name to switch upon
+     * @param HTMLPurifier_AttrDef $with_tag Call if token matches tag
+     * @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token
+     */
+    public function __construct($tag, $with_tag, $without_tag) {
+        $this->tag = $tag;
+        $this->withTag = $with_tag;
+        $this->withoutTag = $without_tag;
+    }
+
+    public function validate($string, $config, $context) {
+        $token = $context->get('CurrentToken', true);
+        if (!$token || $token->name !== $this->tag) {
+            return $this->withoutTag->validate($string, $config, $context);
+        } else {
+            return $this->withTag->validate($string, $config, $context);
+        }
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Text.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Text.php
new file mode 100644
index 000000000..c6216cc53
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/Text.php
@@ -0,0 +1,15 @@
+<?php
+
+/**
+ * Validates arbitrary text according to the HTML spec.
+ */
+class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
+{
+
+    public function validate($string, $config, $context) {
+        return $this->parseCDATA($string);
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php
new file mode 100644
index 000000000..c2b684671
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php
@@ -0,0 +1,77 @@
+<?php
+
+/**
+ * Validates a URI as defined by RFC 3986.
+ * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
+ */
+class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
+{
+
+    protected $parser;
+    protected $embedsResource;
+
+    /**
+     * @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
+     */
+    public function __construct($embeds_resource = false) {
+        $this->parser = new HTMLPurifier_URIParser();
+        $this->embedsResource = (bool) $embeds_resource;
+    }
+
+    public function make($string) {
+        $embeds = ($string === 'embedded');
+        return new HTMLPurifier_AttrDef_URI($embeds);
+    }
+
+    public function validate($uri, $config, $context) {
+
+        if ($config->get('URI.Disable')) return false;
+
+        $uri = $this->parseCDATA($uri);
+
+        // parse the URI
+        $uri = $this->parser->parse($uri);
+        if ($uri === false) return false;
+
+        // add embedded flag to context for validators
+        $context->register('EmbeddedURI', $this->embedsResource);
+
+        $ok = false;
+        do {
+
+            // generic validation
+            $result = $uri->validate($config, $context);
+            if (!$result) break;
+
+            // chained filtering
+            $uri_def = $config->getDefinition('URI');
+            $result = $uri_def->filter($uri, $config, $context);
+            if (!$result) break;
+
+            // scheme-specific validation
+            $scheme_obj = $uri->getSchemeObj($config, $context);
+            if (!$scheme_obj) break;
+            if ($this->embedsResource && !$scheme_obj->browsable) break;
+            $result = $scheme_obj->validate($uri, $config, $context);
+            if (!$result) break;
+
+            // Post chained filtering
+            $result = $uri_def->postFilter($uri, $config, $context);
+            if (!$result) break;
+
+            // survived gauntlet
+            $ok = true;
+
+        } while (false);
+
+        $context->destroy('EmbeddedURI');
+        if (!$ok) return false;
+
+        // back to string
+        return $uri->toString();
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email.php
new file mode 100644
index 000000000..bfee9d166
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email.php
@@ -0,0 +1,17 @@
+<?php
+
+abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * Unpacks a mailbox into its display-name and address
+     */
+    function unpack($string) {
+        // needs to be implemented
+    }
+
+}
+
+// sub-implementations
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php
new file mode 100644
index 000000000..94c715ab4
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php
@@ -0,0 +1,21 @@
+<?php
+
+/**
+ * Primitive email validation class based on the regexp found at
+ * http://www.regular-expressions.info/email.html
+ */
+class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
+{
+
+    public function validate($string, $config, $context) {
+        // no support for named mailboxes i.e. "Bob <bob@example.com>"
+        // that needs more percent encoding to be done
+        if ($string == '') return false;
+        $string = trim($string);
+        $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
+        return $result ? $string : false;
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php
new file mode 100644
index 000000000..125decb2d
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php
@@ -0,0 +1,101 @@
+<?php
+
+/**
+ * Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
+ */
+class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
+     */
+    protected $ipv4;
+
+    /**
+     * Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
+     */
+    protected $ipv6;
+
+    public function __construct() {
+        $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
+        $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
+    }
+
+    public function validate($string, $config, $context) {
+        $length = strlen($string);
+        // empty hostname is OK; it's usually semantically equivalent:
+        // the default host as defined by a URI scheme is used:
+        //
+        //      If the URI scheme defines a default for host, then that
+        //      default applies when the host subcomponent is undefined
+        //      or when the registered name is empty (zero length).
+        if ($string === '') return '';
+        if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
+            //IPv6
+            $ip = substr($string, 1, $length - 2);
+            $valid = $this->ipv6->validate($ip, $config, $context);
+            if ($valid === false) return false;
+            return '['. $valid . ']';
+        }
+
+        // need to do checks on unusual encodings too
+        $ipv4 = $this->ipv4->validate($string, $config, $context);
+        if ($ipv4 !== false) return $ipv4;
+
+        // A regular domain name.
+
+        // This doesn't match I18N domain names, but we don't have proper IRI support,
+        // so force users to insert Punycode.
+
+        // The productions describing this are:
+        $a   = '[a-z]';     // alpha
+        $an  = '[a-z0-9]';  // alphanum
+        $and = '[a-z0-9-]'; // alphanum | "-"
+        // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
+        $domainlabel   = "$an($and*$an)?";
+        // toplabel    = alpha | alpha *( alphanum | "-" ) alphanum
+        $toplabel      = "$a($and*$an)?";
+        // hostname    = *( domainlabel "." ) toplabel [ "." ]
+        if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
+            return $string;
+        }
+
+        // If we have Net_IDNA2 support, we can support IRIs by
+        // punycoding them. (This is the most portable thing to do,
+        // since otherwise we have to assume browsers support
+
+        if ($config->get('Core.EnableIDNA')) {
+            $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
+            // we need to encode each period separately
+            $parts = explode('.', $string);
+            try {
+                $new_parts = array();
+                foreach ($parts as $part) {
+                    $encodable = false;
+                    for ($i = 0, $c = strlen($part); $i < $c; $i++) {
+                        if (ord($part[$i]) > 0x7a) {
+                            $encodable = true;
+                            break;
+                        }
+                    }
+                    if (!$encodable) {
+                        $new_parts[] = $part;
+                    } else {
+                        $new_parts[] = $idna->encode($part);
+                    }
+                }
+                $string = implode('.', $new_parts);
+                if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
+                    return $string;
+                }
+            } catch (Exception $e) {
+                // XXX error reporting
+            }
+        }
+
+        return false;
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv4.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv4.php
new file mode 100644
index 000000000..ec4cf591b
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv4.php
@@ -0,0 +1,39 @@
+<?php
+
+/**
+ * Validates an IPv4 address
+ * @author Feyd @ forums.devnetwork.net (public domain)
+ */
+class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
+{
+
+    /**
+     * IPv4 regex, protected so that IPv6 can reuse it
+     */
+    protected $ip4;
+
+    public function validate($aIP, $config, $context) {
+
+        if (!$this->ip4) $this->_loadRegex();
+
+        if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
+        {
+                return $aIP;
+        }
+
+        return false;
+
+    }
+
+    /**
+     * Lazy load function to prevent regex from being stuffed in
+     * cache.
+     */
+    protected function _loadRegex() {
+        $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
+        $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
+    }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv6.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv6.php
new file mode 100644
index 000000000..9454e9be5
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv6.php
@@ -0,0 +1,99 @@
+<?php
+
+/**
+ * Validates an IPv6 address.
+ * @author Feyd @ forums.devnetwork.net (public domain)
+ * @note This function requires brackets to have been removed from address
+ *       in URI.
+ */
+class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
+{
+
+    public function validate($aIP, $config, $context) {
+
+        if (!$this->ip4) $this->_loadRegex();
+
+        $original = $aIP;
+
+        $hex = '[0-9a-fA-F]';
+        $blk = '(?:' . $hex . '{1,4})';
+        $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))';   // /0 - /128
+
+        //      prefix check
+        if (strpos($aIP, '/') !== false)
+        {
+                if (preg_match('#' . $pre . '$#s', $aIP, $find))
+                {
+                        $aIP = substr($aIP, 0, 0-strlen($find[0]));
+                        unset($find);
+                }
+                else
+                {
+                        return false;
+                }
+        }
+
+        //      IPv4-compatiblity check
+        if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
+        {
+                $aIP = substr($aIP, 0, 0-strlen($find[0]));
+                $ip = explode('.', $find[0]);
+                $ip = array_map('dechex', $ip);
+                $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
+                unset($find, $ip);
+        }
+
+        //      compression check
+        $aIP = explode('::', $aIP);
+        $c = count($aIP);
+        if ($c > 2)
+        {
+                return false;
+        }
+        elseif ($c == 2)
+        {
+                list($first, $second) = $aIP;
+                $first = explode(':', $first);
+                $second = explode(':', $second);
+
+                if (count($first) + count($second) > 8)
+                {
+                        return false;
+                }
+
+                while(count($first) < 8)
+                {
+                        array_push($first, '0');
+                }
+
+                array_splice($first, 8 - count($second), 8, $second);
+                $aIP = $first;
+                unset($first,$second);
+        }
+        else
+        {
+                $aIP = explode(':', $aIP[0]);
+        }
+        $c = count($aIP);
+
+        if ($c != 8)
+        {
+                return false;
+        }
+
+        //      All the pieces should be 16-bit hex strings. Are they?
+        foreach ($aIP as $piece)
+        {
+                if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
+                {
+                        return false;
+                }
+        }
+
+        return $original;
+
+    }
+
+}
+
+// vim: et sw=4 sts=4
author	friendica <info@friendica.com>	2012-05-12 17:57:41 -0700
committer	friendica <info@friendica.com>	2012-07-18 20:40:31 +1000
commit	7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a (patch)
tree	a9c3d91209cff770bb4b613b1b95e61a7bbc5a2b /lib/htmlpurifier/library/HTMLPurifier/AttrDef
parent	cd727cb26b78a1dade09d510b071446898477356 (diff)
download	volse-hubzilla-7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a.tar.gz volse-hubzilla-7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a.tar.bz2 volse-hubzilla-7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a.zip