diff options
Diffstat (limited to 'library/HTMLPurifier/AttrDef')
42 files changed, 0 insertions, 3156 deletions
diff --git a/library/HTMLPurifier/AttrDef/CSS.php b/library/HTMLPurifier/AttrDef/CSS.php deleted file mode 100644 index 02c1641fb..000000000 --- a/library/HTMLPurifier/AttrDef/CSS.php +++ /dev/null @@ -1,106 +0,0 @@ -<?php - -/** - * Validates the HTML attribute style, otherwise known as CSS. - * @note We don't implement the whole CSS specification, so it might be - * difficult to reuse this component in the context of validating - * actual stylesheet declarations. - * @note If we were really serious about validating the CSS, we would - * tokenize the styles and then parse the tokens. Obviously, we - * are not doing that. Doing that could seriously harm performance, - * but would make these components a lot more viable for a CSS - * filtering solution. - */ -class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef -{ - - /** - * @param string $css - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($css, $config, $context) - { - $css = $this->parseCDATA($css); - - $definition = $config->getCSSDefinition(); - - // we're going to break the spec and explode by semicolons. - // This is because semicolon rarely appears in escaped form - // Doing this is generally flaky but fast - // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI - // for details - - $declarations = explode(';', $css); - $propvalues = array(); - - /** - * Name of the current CSS property being validated. - */ - $property = false; - $context->register('CurrentCSSProperty', $property); - - foreach ($declarations as $declaration) { - if (!$declaration) { - continue; - } - if (!strpos($declaration, ':')) { - continue; - } - list($property, $value) = explode(':', $declaration, 2); - $property = trim($property); - $value = trim($value); - $ok = false; - do { - if (isset($definition->info[$property])) { - $ok = true; - break; - } - if (ctype_lower($property)) { - break; - } - $property = strtolower($property); - if (isset($definition->info[$property])) { - $ok = true; - break; - } - } while (0); - if (!$ok) { - continue; - } - // inefficient call, since the validator will do this again - if (strtolower(trim($value)) !== 'inherit') { - // inherit works for everything (but only on the base property) - $result = $definition->info[$property]->validate( - $value, - $config, - $context - ); - } else { - $result = 'inherit'; - } - if ($result === false) { - continue; - } - $propvalues[$property] = $result; - } - - $context->destroy('CurrentCSSProperty'); - - // procedure does not write the new CSS simultaneously, so it's - // slightly inefficient, but it's the only way of getting rid of - // duplicates. Perhaps config to optimize it, but not now. - - $new_declarations = ''; - foreach ($propvalues as $prop => $value) { - $new_declarations .= "$prop:$value;"; - } - - return $new_declarations ? $new_declarations : false; - - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php b/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php deleted file mode 100644 index af2b83dff..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php +++ /dev/null @@ -1,34 +0,0 @@ -<?php - -class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number -{ - - public function __construct() - { - parent::__construct(false); // opacity is non-negative, but we will clamp it - } - - /** - * @param string $number - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return string - */ - public function validate($number, $config, $context) - { - $result = parent::validate($number, $config, $context); - if ($result === false) { - return $result; - } - $float = (float)$result; - if ($float < 0.0) { - $result = '0'; - } - if ($float > 1.0) { - $result = '1'; - } - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Background.php b/library/HTMLPurifier/AttrDef/CSS/Background.php deleted file mode 100644 index 7f1ea3b0f..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Background.php +++ /dev/null @@ -1,111 +0,0 @@ -<?php - -/** - * Validates shorthand CSS property background. - * @warning Does not support url tokens that have internal spaces. - */ -class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef -{ - - /** - * Local copy of component validators. - * @type HTMLPurifier_AttrDef[] - * @note See HTMLPurifier_AttrDef_Font::$info for a similar impl. - */ - protected $info; - - /** - * @param HTMLPurifier_Config $config - */ - public function __construct($config) - { - $def = $config->getCSSDefinition(); - $this->info['background-color'] = $def->info['background-color']; - $this->info['background-image'] = $def->info['background-image']; - $this->info['background-repeat'] = $def->info['background-repeat']; - $this->info['background-attachment'] = $def->info['background-attachment']; - $this->info['background-position'] = $def->info['background-position']; - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - // regular pre-processing - $string = $this->parseCDATA($string); - if ($string === '') { - return false; - } - - // munge rgb() decl if necessary - $string = $this->mungeRgb($string); - - // assumes URI doesn't have spaces in it - $bits = explode(' ', $string); // bits to process - - $caught = array(); - $caught['color'] = false; - $caught['image'] = false; - $caught['repeat'] = false; - $caught['attachment'] = false; - $caught['position'] = false; - - $i = 0; // number of catches - - foreach ($bits as $bit) { - if ($bit === '') { - continue; - } - foreach ($caught as $key => $status) { - if ($key != 'position') { - if ($status !== false) { - continue; - } - $r = $this->info['background-' . $key]->validate($bit, $config, $context); - } else { - $r = $bit; - } - if ($r === false) { - continue; - } - if ($key == 'position') { - if ($caught[$key] === false) { - $caught[$key] = ''; - } - $caught[$key] .= $r . ' '; - } else { - $caught[$key] = $r; - } - $i++; - break; - } - } - - if (!$i) { - return false; - } - if ($caught['position'] !== false) { - $caught['position'] = $this->info['background-position']-> - validate($caught['position'], $config, $context); - } - - $ret = array(); - foreach ($caught as $value) { - if ($value === false) { - continue; - } - $ret[] = $value; - } - - if (empty($ret)) { - return false; - } - return implode(' ', $ret); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php b/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php deleted file mode 100644 index 4580ef5a9..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php +++ /dev/null @@ -1,157 +0,0 @@ -<?php - -/* W3C says: - [ // adjective and number must be in correct order, even if - // you could switch them without introducing ambiguity. - // some browsers support that syntax - [ - <percentage> | <length> | left | center | right - ] - [ - <percentage> | <length> | top | center | bottom - ]? - ] | - [ // this signifies that the vertical and horizontal adjectives - // can be arbitrarily ordered, however, there can only be two, - // one of each, or none at all - [ - left | center | right - ] || - [ - top | center | bottom - ] - ] - top, left = 0% - center, (none) = 50% - bottom, right = 100% -*/ - -/* QuirksMode says: - keyword + length/percentage must be ordered correctly, as per W3C - - Internet Explorer and Opera, however, support arbitrary ordering. We - should fix it up. - - Minor issue though, not strictly necessary. -*/ - -// control freaks may appreciate the ability to convert these to -// percentages or something, but it's not necessary - -/** - * Validates the value of background-position. - */ -class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef -{ - - /** - * @type HTMLPurifier_AttrDef_CSS_Length - */ - protected $length; - - /** - * @type HTMLPurifier_AttrDef_CSS_Percentage - */ - protected $percentage; - - public function __construct() - { - $this->length = new HTMLPurifier_AttrDef_CSS_Length(); - $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage(); - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $string = $this->parseCDATA($string); - $bits = explode(' ', $string); - - $keywords = array(); - $keywords['h'] = false; // left, right - $keywords['v'] = false; // top, bottom - $keywords['ch'] = false; // center (first word) - $keywords['cv'] = false; // center (second word) - $measures = array(); - - $i = 0; - - $lookup = array( - 'top' => 'v', - 'bottom' => 'v', - 'left' => 'h', - 'right' => 'h', - 'center' => 'c' - ); - - foreach ($bits as $bit) { - if ($bit === '') { - continue; - } - - // test for keyword - $lbit = ctype_lower($bit) ? $bit : strtolower($bit); - if (isset($lookup[$lbit])) { - $status = $lookup[$lbit]; - if ($status == 'c') { - if ($i == 0) { - $status = 'ch'; - } else { - $status = 'cv'; - } - } - $keywords[$status] = $lbit; - $i++; - } - - // test for length - $r = $this->length->validate($bit, $config, $context); - if ($r !== false) { - $measures[] = $r; - $i++; - } - - // test for percentage - $r = $this->percentage->validate($bit, $config, $context); - if ($r !== false) { - $measures[] = $r; - $i++; - } - } - - if (!$i) { - return false; - } // no valid values were caught - - $ret = array(); - - // first keyword - if ($keywords['h']) { - $ret[] = $keywords['h']; - } elseif ($keywords['ch']) { - $ret[] = $keywords['ch']; - $keywords['cv'] = false; // prevent re-use: center = center center - } elseif (count($measures)) { - $ret[] = array_shift($measures); - } - - if ($keywords['v']) { - $ret[] = $keywords['v']; - } elseif ($keywords['cv']) { - $ret[] = $keywords['cv']; - } elseif (count($measures)) { - $ret[] = array_shift($measures); - } - - if (empty($ret)) { - return false; - } - return implode(' ', $ret); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Border.php b/library/HTMLPurifier/AttrDef/CSS/Border.php deleted file mode 100644 index 16243ba1e..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Border.php +++ /dev/null @@ -1,56 +0,0 @@ -<?php - -/** - * Validates the border property as defined by CSS. - */ -class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef -{ - - /** - * Local copy of properties this property is shorthand for. - * @type HTMLPurifier_AttrDef[] - */ - protected $info = array(); - - /** - * @param HTMLPurifier_Config $config - */ - public function __construct($config) - { - $def = $config->getCSSDefinition(); - $this->info['border-width'] = $def->info['border-width']; - $this->info['border-style'] = $def->info['border-style']; - $this->info['border-top-color'] = $def->info['border-top-color']; - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $string = $this->parseCDATA($string); - $string = $this->mungeRgb($string); - $bits = explode(' ', $string); - $done = array(); // segments we've finished - $ret = ''; // return value - foreach ($bits as $bit) { - foreach ($this->info as $propname => $validator) { - if (isset($done[$propname])) { - continue; - } - $r = $validator->validate($bit, $config, $context); - if ($r !== false) { - $ret .= $r . ' '; - $done[$propname] = true; - break; - } - } - } - return rtrim($ret); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Color.php b/library/HTMLPurifier/AttrDef/CSS/Color.php deleted file mode 100644 index 16d2a6b98..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Color.php +++ /dev/null @@ -1,105 +0,0 @@ -<?php - -/** - * Validates Color as defined by CSS. - */ -class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef -{ - - /** - * @param string $color - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($color, $config, $context) - { - static $colors = null; - if ($colors === null) { - $colors = $config->get('Core.ColorKeywords'); - } - - $color = trim($color); - if ($color === '') { - return false; - } - - $lower = strtolower($color); - if (isset($colors[$lower])) { - return $colors[$lower]; - } - - if (strpos($color, 'rgb(') !== false) { - // rgb literal handling - $length = strlen($color); - if (strpos($color, ')') !== $length - 1) { - return false; - } - $triad = substr($color, 4, $length - 4 - 1); - $parts = explode(',', $triad); - if (count($parts) !== 3) { - return false; - } - $type = false; // to ensure that they're all the same type - $new_parts = array(); - foreach ($parts as $part) { - $part = trim($part); - if ($part === '') { - return false; - } - $length = strlen($part); - if ($part[$length - 1] === '%') { - // handle percents - if (!$type) { - $type = 'percentage'; - } elseif ($type !== 'percentage') { - return false; - } - $num = (float)substr($part, 0, $length - 1); - if ($num < 0) { - $num = 0; - } - if ($num > 100) { - $num = 100; - } - $new_parts[] = "$num%"; - } else { - // handle integers - if (!$type) { - $type = 'integer'; - } elseif ($type !== 'integer') { - return false; - } - $num = (int)$part; - if ($num < 0) { - $num = 0; - } - if ($num > 255) { - $num = 255; - } - $new_parts[] = (string)$num; - } - } - $new_triad = implode(',', $new_parts); - $color = "rgb($new_triad)"; - } else { - // hexadecimal handling - if ($color[0] === '#') { - $hex = substr($color, 1); - } else { - $hex = $color; - $color = '#' . $color; - } - $length = strlen($hex); - if ($length !== 3 && $length !== 6) { - return false; - } - if (!ctype_xdigit($hex)) { - return false; - } - } - return $color; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Composite.php b/library/HTMLPurifier/AttrDef/CSS/Composite.php deleted file mode 100644 index 9c1750554..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Composite.php +++ /dev/null @@ -1,48 +0,0 @@ -<?php - -/** - * Allows multiple validators to attempt to validate attribute. - * - * Composite is just what it sounds like: a composite of many validators. - * This means that multiple HTMLPurifier_AttrDef objects will have a whack - * at the string. If one of them passes, that's what is returned. This is - * especially useful for CSS values, which often are a choice between - * an enumerated set of predefined values or a flexible data type. - */ -class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef -{ - - /** - * List of objects that may process strings. - * @type HTMLPurifier_AttrDef[] - * @todo Make protected - */ - public $defs; - - /** - * @param HTMLPurifier_AttrDef[] $defs List of HTMLPurifier_AttrDef objects - */ - public function __construct($defs) - { - $this->defs = $defs; - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - foreach ($this->defs as $i => $def) { - $result = $this->defs[$i]->validate($string, $config, $context); - if ($result !== false) { - return $result; - } - } - return false; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php b/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php deleted file mode 100644 index 9d77cc9aa..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php +++ /dev/null @@ -1,44 +0,0 @@ -<?php - -/** - * Decorator which enables CSS properties to be disabled for specific elements. - */ -class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef -{ - /** - * @type HTMLPurifier_AttrDef - */ - public $def; - /** - * @type string - */ - public $element; - - /** - * @param HTMLPurifier_AttrDef $def Definition to wrap - * @param string $element Element to deny - */ - public function __construct($def, $element) - { - $this->def = $def; - $this->element = $element; - } - - /** - * Checks if CurrentToken is set and equal to $this->element - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $token = $context->get('CurrentToken', true); - if ($token && $token->name == $this->element) { - return false; - } - return $this->def->validate($string, $config, $context); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Filter.php b/library/HTMLPurifier/AttrDef/CSS/Filter.php deleted file mode 100644 index bde4c3301..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Filter.php +++ /dev/null @@ -1,77 +0,0 @@ -<?php - -/** - * Microsoft's proprietary filter: CSS property - * @note Currently supports the alpha filter. In the future, this will - * probably need an extensible framework - */ -class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef -{ - /** - * @type HTMLPurifier_AttrDef_Integer - */ - protected $intValidator; - - public function __construct() - { - $this->intValidator = new HTMLPurifier_AttrDef_Integer(); - } - - /** - * @param string $value - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($value, $config, $context) - { - $value = $this->parseCDATA($value); - if ($value === 'none') { - return $value; - } - // if we looped this we could support multiple filters - $function_length = strcspn($value, '('); - $function = trim(substr($value, 0, $function_length)); - if ($function !== 'alpha' && - $function !== 'Alpha' && - $function !== 'progid:DXImageTransform.Microsoft.Alpha' - ) { - return false; - } - $cursor = $function_length + 1; - $parameters_length = strcspn($value, ')', $cursor); - $parameters = substr($value, $cursor, $parameters_length); - $params = explode(',', $parameters); - $ret_params = array(); - $lookup = array(); - foreach ($params as $param) { - list($key, $value) = explode('=', $param); - $key = trim($key); - $value = trim($value); - if (isset($lookup[$key])) { - continue; - } - if ($key !== 'opacity') { - continue; - } - $value = $this->intValidator->validate($value, $config, $context); - if ($value === false) { - continue; - } - $int = (int)$value; - if ($int > 100) { - $value = '100'; - } - if ($int < 0) { - $value = '0'; - } - $ret_params[] = "$key=$value"; - $lookup[$key] = true; - } - $ret_parameters = implode(',', $ret_params); - $ret_function = "$function($ret_parameters)"; - return $ret_function; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Font.php b/library/HTMLPurifier/AttrDef/CSS/Font.php deleted file mode 100644 index 579b97ef1..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Font.php +++ /dev/null @@ -1,176 +0,0 @@ -<?php - -/** - * Validates shorthand CSS property font. - */ -class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef -{ - - /** - * Local copy of validators - * @type HTMLPurifier_AttrDef[] - * @note If we moved specific CSS property definitions to their own - * classes instead of having them be assembled at run time by - * CSSDefinition, this wouldn't be necessary. We'd instantiate - * our own copies. - */ - protected $info = array(); - - /** - * @param HTMLPurifier_Config $config - */ - public function __construct($config) - { - $def = $config->getCSSDefinition(); - $this->info['font-style'] = $def->info['font-style']; - $this->info['font-variant'] = $def->info['font-variant']; - $this->info['font-weight'] = $def->info['font-weight']; - $this->info['font-size'] = $def->info['font-size']; - $this->info['line-height'] = $def->info['line-height']; - $this->info['font-family'] = $def->info['font-family']; - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - static $system_fonts = array( - 'caption' => true, - 'icon' => true, - 'menu' => true, - 'message-box' => true, - 'small-caption' => true, - 'status-bar' => true - ); - - // regular pre-processing - $string = $this->parseCDATA($string); - if ($string === '') { - return false; - } - - // check if it's one of the keywords - $lowercase_string = strtolower($string); - if (isset($system_fonts[$lowercase_string])) { - return $lowercase_string; - } - - $bits = explode(' ', $string); // bits to process - $stage = 0; // this indicates what we're looking for - $caught = array(); // which stage 0 properties have we caught? - $stage_1 = array('font-style', 'font-variant', 'font-weight'); - $final = ''; // output - - for ($i = 0, $size = count($bits); $i < $size; $i++) { - if ($bits[$i] === '') { - continue; - } - switch ($stage) { - case 0: // attempting to catch font-style, font-variant or font-weight - foreach ($stage_1 as $validator_name) { - if (isset($caught[$validator_name])) { - continue; - } - $r = $this->info[$validator_name]->validate( - $bits[$i], - $config, - $context - ); - if ($r !== false) { - $final .= $r . ' '; - $caught[$validator_name] = true; - break; - } - } - // all three caught, continue on - if (count($caught) >= 3) { - $stage = 1; - } - if ($r !== false) { - break; - } - case 1: // attempting to catch font-size and perhaps line-height - $found_slash = false; - if (strpos($bits[$i], '/') !== false) { - list($font_size, $line_height) = - explode('/', $bits[$i]); - if ($line_height === '') { - // ooh, there's a space after the slash! - $line_height = false; - $found_slash = true; - } - } else { - $font_size = $bits[$i]; - $line_height = false; - } - $r = $this->info['font-size']->validate( - $font_size, - $config, - $context - ); - if ($r !== false) { - $final .= $r; - // attempt to catch line-height - if ($line_height === false) { - // we need to scroll forward - for ($j = $i + 1; $j < $size; $j++) { - if ($bits[$j] === '') { - continue; - } - if ($bits[$j] === '/') { - if ($found_slash) { - return false; - } else { - $found_slash = true; - continue; - } - } - $line_height = $bits[$j]; - break; - } - } else { - // slash already found - $found_slash = true; - $j = $i; - } - if ($found_slash) { - $i = $j; - $r = $this->info['line-height']->validate( - $line_height, - $config, - $context - ); - if ($r !== false) { - $final .= '/' . $r; - } - } - $final .= ' '; - $stage = 2; - break; - } - return false; - case 2: // attempting to catch font-family - $font_family = - implode(' ', array_slice($bits, $i, $size - $i)); - $r = $this->info['font-family']->validate( - $font_family, - $config, - $context - ); - if ($r !== false) { - $final .= $r . ' '; - // processing completed successfully - return rtrim($final); - } - return false; - } - } - return false; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php deleted file mode 100644 index 74e24c881..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php +++ /dev/null @@ -1,219 +0,0 @@ -<?php - -/** - * Validates a font family list according to CSS spec - */ -class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef -{ - - protected $mask = null; - - public function __construct() - { - $this->mask = '_- '; - for ($c = 'a'; $c <= 'z'; $c++) { - $this->mask .= $c; - } - for ($c = 'A'; $c <= 'Z'; $c++) { - $this->mask .= $c; - } - for ($c = '0'; $c <= '9'; $c++) { - $this->mask .= $c; - } // cast-y, but should be fine - // special bytes used by UTF-8 - for ($i = 0x80; $i <= 0xFF; $i++) { - // We don't bother excluding invalid bytes in this range, - // because the our restriction of well-formed UTF-8 will - // prevent these from ever occurring. - $this->mask .= chr($i); - } - - /* - PHP's internal strcspn implementation is - O(length of string * length of mask), making it inefficient - for large masks. However, it's still faster than - preg_match 8) - for (p = s1;;) { - spanp = s2; - do { - if (*spanp == c || p == s1_end) { - return p - s1; - } - } while (spanp++ < (s2_end - 1)); - c = *++p; - } - */ - // possible optimization: invert the mask. - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - static $generic_names = array( - 'serif' => true, - 'sans-serif' => true, - 'monospace' => true, - 'fantasy' => true, - 'cursive' => true - ); - $allowed_fonts = $config->get('CSS.AllowedFonts'); - - // assume that no font names contain commas in them - $fonts = explode(',', $string); - $final = ''; - foreach ($fonts as $font) { - $font = trim($font); - if ($font === '') { - continue; - } - // match a generic name - if (isset($generic_names[$font])) { - if ($allowed_fonts === null || isset($allowed_fonts[$font])) { - $final .= $font . ', '; - } - continue; - } - // match a quoted name - if ($font[0] === '"' || $font[0] === "'") { - $length = strlen($font); - if ($length <= 2) { - continue; - } - $quote = $font[0]; - if ($font[$length - 1] !== $quote) { - continue; - } - $font = substr($font, 1, $length - 2); - } - - $font = $this->expandCSSEscape($font); - - // $font is a pure representation of the font name - - if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) { - continue; - } - - if (ctype_alnum($font) && $font !== '') { - // very simple font, allow it in unharmed - $final .= $font . ', '; - continue; - } - - // bugger out on whitespace. form feed (0C) really - // shouldn't show up regardless - $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font); - - // Here, there are various classes of characters which need - // to be treated differently: - // - Alphanumeric characters are essentially safe. We - // handled these above. - // - Spaces require quoting, though most parsers will do - // the right thing if there aren't any characters that - // can be misinterpreted - // - Dashes rarely occur, but they fairly unproblematic - // for parsing/rendering purposes. - // The above characters cover the majority of Western font - // names. - // - Arbitrary Unicode characters not in ASCII. Because - // most parsers give little thought to Unicode, treatment - // of these codepoints is basically uniform, even for - // punctuation-like codepoints. These characters can - // show up in non-Western pages and are supported by most - // major browsers, for example: "MS 明朝" is a - // legitimate font-name - // <http://ja.wikipedia.org/wiki/MS_明朝>. See - // the CSS3 spec for more examples: - // <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png> - // You can see live samples of these on the Internet: - // <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック> - // However, most of these fonts have ASCII equivalents: - // for example, 'MS Mincho', and it's considered - // professional to use ASCII font names instead of - // Unicode font names. Thanks Takeshi Terada for - // providing this information. - // The following characters, to my knowledge, have not been - // used to name font names. - // - Single quote. While theoretically you might find a - // font name that has a single quote in its name (serving - // as an apostrophe, e.g. Dave's Scribble), I haven't - // been able to find any actual examples of this. - // Internet Explorer's cssText translation (which I - // believe is invoked by innerHTML) normalizes any - // quoting to single quotes, and fails to escape single - // quotes. (Note that this is not IE's behavior for all - // CSS properties, just some sort of special casing for - // font-family). So a single quote *cannot* be used - // safely in the font-family context if there will be an - // innerHTML/cssText translation. Note that Firefox 3.x - // does this too. - // - Double quote. In IE, these get normalized to - // single-quotes, no matter what the encoding. (Fun - // fact, in IE8, the 'content' CSS property gained - // support, where they special cased to preserve encoded - // double quotes, but still translate unadorned double - // quotes into single quotes.) So, because their - // fixpoint behavior is identical to single quotes, they - // cannot be allowed either. Firefox 3.x displays - // single-quote style behavior. - // - Backslashes are reduced by one (so \\ -> \) every - // iteration, so they cannot be used safely. This shows - // up in IE7, IE8 and FF3 - // - Semicolons, commas and backticks are handled properly. - // - The rest of the ASCII punctuation is handled properly. - // We haven't checked what browsers do to unadorned - // versions, but this is not important as long as the - // browser doesn't /remove/ surrounding quotes (as IE does - // for HTML). - // - // With these results in hand, we conclude that there are - // various levels of safety: - // - Paranoid: alphanumeric, spaces and dashes(?) - // - International: Paranoid + non-ASCII Unicode - // - Edgy: Everything except quotes, backslashes - // - NoJS: Standards compliance, e.g. sod IE. Note that - // with some judicious character escaping (since certain - // types of escaping doesn't work) this is theoretically - // OK as long as innerHTML/cssText is not called. - // We believe that international is a reasonable default - // (that we will implement now), and once we do more - // extensive research, we may feel comfortable with dropping - // it down to edgy. - - // Edgy: alphanumeric, spaces, dashes, underscores and Unicode. Use of - // str(c)spn assumes that the string was already well formed - // Unicode (which of course it is). - if (strspn($font, $this->mask) !== strlen($font)) { - continue; - } - - // Historical: - // In the absence of innerHTML/cssText, these ugly - // transforms don't pose a security risk (as \\ and \" - // might--these escapes are not supported by most browsers). - // We could try to be clever and use single-quote wrapping - // when there is a double quote present, but I have choosen - // not to implement that. (NOTE: you can reduce the amount - // of escapes by one depending on what quoting style you use) - // $font = str_replace('\\', '\\5C ', $font); - // $font = str_replace('"', '\\22 ', $font); - // $font = str_replace("'", '\\27 ', $font); - - // font possibly with spaces, requires quoting - $final .= "'$font', "; - } - $final = rtrim($final, ', '); - if ($final === '') { - return false; - } - return $final; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Ident.php b/library/HTMLPurifier/AttrDef/CSS/Ident.php deleted file mode 100644 index 973002c17..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Ident.php +++ /dev/null @@ -1,32 +0,0 @@ -<?php - -/** - * Validates based on {ident} CSS grammar production - */ -class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef -{ - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $string = trim($string); - - // early abort: '' and '0' (strings that convert to false) are invalid - if (!$string) { - return false; - } - - $pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/'; - if (!preg_match($pattern, $string)) { - return false; - } - return $string; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php b/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php deleted file mode 100644 index ffc989fe8..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php +++ /dev/null @@ -1,56 +0,0 @@ -<?php - -/** - * Decorator which enables !important to be used in CSS values. - */ -class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef -{ - /** - * @type HTMLPurifier_AttrDef - */ - public $def; - /** - * @type bool - */ - public $allow; - - /** - * @param HTMLPurifier_AttrDef $def Definition to wrap - * @param bool $allow Whether or not to allow !important - */ - public function __construct($def, $allow = false) - { - $this->def = $def; - $this->allow = $allow; - } - - /** - * Intercepts and removes !important if necessary - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - // test for ! and important tokens - $string = trim($string); - $is_important = false; - // :TODO: optimization: test directly for !important and ! important - if (strlen($string) >= 9 && substr($string, -9) === 'important') { - $temp = rtrim(substr($string, 0, -9)); - // use a temp, because we might want to restore important - if (strlen($temp) >= 1 && substr($temp, -1) === '!') { - $string = rtrim(substr($temp, 0, -1)); - $is_important = true; - } - } - $string = $this->def->validate($string, $config, $context); - if ($this->allow && $is_important) { - $string .= ' !important'; - } - return $string; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Length.php b/library/HTMLPurifier/AttrDef/CSS/Length.php deleted file mode 100644 index f12453a04..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Length.php +++ /dev/null @@ -1,77 +0,0 @@ -<?php - -/** - * Represents a Length as defined by CSS. - */ -class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef -{ - - /** - * @type HTMLPurifier_Length|string - */ - protected $min; - - /** - * @type HTMLPurifier_Length|string - */ - protected $max; - - /** - * @param HTMLPurifier_Length|string $min Minimum length, or null for no bound. String is also acceptable. - * @param HTMLPurifier_Length|string $max Maximum length, or null for no bound. String is also acceptable. - */ - public function __construct($min = null, $max = null) - { - $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null; - $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null; - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $string = $this->parseCDATA($string); - - // Optimizations - if ($string === '') { - return false; - } - if ($string === '0') { - return '0'; - } - if (strlen($string) === 1) { - return false; - } - - $length = HTMLPurifier_Length::make($string); - if (!$length->isValid()) { - return false; - } - - if ($this->min) { - $c = $length->compareTo($this->min); - if ($c === false) { - return false; - } - if ($c < 0) { - return false; - } - } - if ($this->max) { - $c = $length->compareTo($this->max); - if ($c === false) { - return false; - } - if ($c > 0) { - return false; - } - } - return $length->toString(); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/ListStyle.php b/library/HTMLPurifier/AttrDef/CSS/ListStyle.php deleted file mode 100644 index e74d42654..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/ListStyle.php +++ /dev/null @@ -1,112 +0,0 @@ -<?php - -/** - * Validates shorthand CSS property list-style. - * @warning Does not support url tokens that have internal spaces. - */ -class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef -{ - - /** - * Local copy of validators. - * @type HTMLPurifier_AttrDef[] - * @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl. - */ - protected $info; - - /** - * @param HTMLPurifier_Config $config - */ - public function __construct($config) - { - $def = $config->getCSSDefinition(); - $this->info['list-style-type'] = $def->info['list-style-type']; - $this->info['list-style-position'] = $def->info['list-style-position']; - $this->info['list-style-image'] = $def->info['list-style-image']; - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - // regular pre-processing - $string = $this->parseCDATA($string); - if ($string === '') { - return false; - } - - // assumes URI doesn't have spaces in it - $bits = explode(' ', strtolower($string)); // bits to process - - $caught = array(); - $caught['type'] = false; - $caught['position'] = false; - $caught['image'] = false; - - $i = 0; // number of catches - $none = false; - - foreach ($bits as $bit) { - if ($i >= 3) { - return; - } // optimization bit - if ($bit === '') { - continue; - } - foreach ($caught as $key => $status) { - if ($status !== false) { - continue; - } - $r = $this->info['list-style-' . $key]->validate($bit, $config, $context); - if ($r === false) { - continue; - } - if ($r === 'none') { - if ($none) { - continue; - } else { - $none = true; - } - if ($key == 'image') { - continue; - } - } - $caught[$key] = $r; - $i++; - break; - } - } - - if (!$i) { - return false; - } - - $ret = array(); - - // construct type - if ($caught['type']) { - $ret[] = $caught['type']; - } - - // construct image - if ($caught['image']) { - $ret[] = $caught['image']; - } - - // construct position - if ($caught['position']) { - $ret[] = $caught['position']; - } - - if (empty($ret)) { - return false; - } - return implode(' ', $ret); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Multiple.php b/library/HTMLPurifier/AttrDef/CSS/Multiple.php deleted file mode 100644 index 9f266cdd1..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Multiple.php +++ /dev/null @@ -1,71 +0,0 @@ -<?php - -/** - * Framework class for strings that involve multiple values. - * - * Certain CSS properties such as border-width and margin allow multiple - * lengths to be specified. This class can take a vanilla border-width - * definition and multiply it, usually into a max of four. - * - * @note Even though the CSS specification isn't clear about it, inherit - * can only be used alone: it will never manifest as part of a multi - * shorthand declaration. Thus, this class does not allow inherit. - */ -class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef -{ - /** - * Instance of component definition to defer validation to. - * @type HTMLPurifier_AttrDef - * @todo Make protected - */ - public $single; - - /** - * Max number of values allowed. - * @todo Make protected - */ - public $max; - - /** - * @param HTMLPurifier_AttrDef $single HTMLPurifier_AttrDef to multiply - * @param int $max Max number of values allowed (usually four) - */ - public function __construct($single, $max = 4) - { - $this->single = $single; - $this->max = $max; - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $string = $this->parseCDATA($string); - if ($string === '') { - return false; - } - $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n - $length = count($parts); - $final = ''; - for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) { - if (ctype_space($parts[$i])) { - continue; - } - $result = $this->single->validate($parts[$i], $config, $context); - if ($result !== false) { - $final .= $result . ' '; - $num++; - } - } - if ($final === '') { - return false; - } - return rtrim($final); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Number.php b/library/HTMLPurifier/AttrDef/CSS/Number.php deleted file mode 100644 index 8edc159e7..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Number.php +++ /dev/null @@ -1,84 +0,0 @@ -<?php - -/** - * Validates a number as defined by the CSS spec. - */ -class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef -{ - - /** - * Indicates whether or not only positive values are allowed. - * @type bool - */ - protected $non_negative = false; - - /** - * @param bool $non_negative indicates whether negatives are forbidden - */ - public function __construct($non_negative = false) - { - $this->non_negative = $non_negative; - } - - /** - * @param string $number - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return string|bool - * @warning Some contexts do not pass $config, $context. These - * variables should not be used without checking HTMLPurifier_Length - */ - public function validate($number, $config, $context) - { - $number = $this->parseCDATA($number); - - if ($number === '') { - return false; - } - if ($number === '0') { - return '0'; - } - - $sign = ''; - switch ($number[0]) { - case '-': - if ($this->non_negative) { - return false; - } - $sign = '-'; - case '+': - $number = substr($number, 1); - } - - if (ctype_digit($number)) { - $number = ltrim($number, '0'); - return $number ? $sign . $number : '0'; - } - - // Period is the only non-numeric character allowed - if (strpos($number, '.') === false) { - return false; - } - - list($left, $right) = explode('.', $number, 2); - - if ($left === '' && $right === '') { - return false; - } - if ($left !== '' && !ctype_digit($left)) { - return false; - } - - $left = ltrim($left, '0'); - $right = rtrim($right, '0'); - - if ($right === '') { - return $left ? $sign . $left : '0'; - } elseif (!ctype_digit($right)) { - return false; - } - return $sign . $left . '.' . $right; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Percentage.php b/library/HTMLPurifier/AttrDef/CSS/Percentage.php deleted file mode 100644 index f0f25c50a..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Percentage.php +++ /dev/null @@ -1,54 +0,0 @@ -<?php - -/** - * Validates a Percentage as defined by the CSS spec. - */ -class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef -{ - - /** - * Instance to defer number validation to. - * @type HTMLPurifier_AttrDef_CSS_Number - */ - protected $number_def; - - /** - * @param bool $non_negative Whether to forbid negative values - */ - public function __construct($non_negative = false) - { - $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative); - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $string = $this->parseCDATA($string); - - if ($string === '') { - return false; - } - $length = strlen($string); - if ($length === 1) { - return false; - } - if ($string[$length - 1] !== '%') { - return false; - } - - $number = substr($string, 0, $length - 1); - $number = $this->number_def->validate($number, $config, $context); - - if ($number === false) { - return false; - } - return "$number%"; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php b/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php deleted file mode 100644 index 5fd4b7f7b..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/TextDecoration.php +++ /dev/null @@ -1,46 +0,0 @@ -<?php - -/** - * Validates the value for the CSS property text-decoration - * @note This class could be generalized into a version that acts sort of - * like Enum except you can compound the allowed values. - */ -class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef -{ - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - static $allowed_values = array( - 'line-through' => true, - 'overline' => true, - 'underline' => true, - ); - - $string = strtolower($this->parseCDATA($string)); - - if ($string === 'none') { - return $string; - } - - $parts = explode(' ', $string); - $final = ''; - foreach ($parts as $part) { - if (isset($allowed_values[$part])) { - $final .= $part . ' '; - } - } - $final = rtrim($final); - if ($final === '') { - return false; - } - return $final; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/URI.php b/library/HTMLPurifier/AttrDef/CSS/URI.php deleted file mode 100644 index f9434230e..000000000 --- a/library/HTMLPurifier/AttrDef/CSS/URI.php +++ /dev/null @@ -1,74 +0,0 @@ -<?php - -/** - * Validates a URI in CSS syntax, which uses url('http://example.com') - * @note While theoretically speaking a URI in a CSS document could - * be non-embedded, as of CSS2 there is no such usage so we're - * generalizing it. This may need to be changed in the future. - * @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as - * the separator, you cannot put a literal semicolon in - * in the URI. Try percent encoding it, in that case. - */ -class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI -{ - - public function __construct() - { - parent::__construct(true); // always embedded - } - - /** - * @param string $uri_string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($uri_string, $config, $context) - { - // parse the URI out of the string and then pass it onto - // the parent object - - $uri_string = $this->parseCDATA($uri_string); - if (strpos($uri_string, 'url(') !== 0) { - return false; - } - $uri_string = substr($uri_string, 4); - $new_length = strlen($uri_string) - 1; - if ($uri_string[$new_length] != ')') { - return false; - } - $uri = trim(substr($uri_string, 0, $new_length)); - - if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) { - $quote = $uri[0]; - $new_length = strlen($uri) - 1; - if ($uri[$new_length] !== $quote) { - return false; - } - $uri = substr($uri, 1, $new_length - 1); - } - - $uri = $this->expandCSSEscape($uri); - - $result = parent::validate($uri, $config, $context); - - if ($result === false) { - return false; - } - - // extra sanity check; should have been done by URI - $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result); - - // suspicious characters are ()'; we're going to percent encode - // them for safety. - $result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result); - - // there's an extra bug where ampersands lose their escaping on - // an innerHTML cycle, so a very unlucky query parameter could - // then change the meaning of the URL. Unfortunately, there's - // not much we can do about that... - return "url(\"$result\")"; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/Clone.php b/library/HTMLPurifier/AttrDef/Clone.php deleted file mode 100644 index 6698a00c0..000000000 --- a/library/HTMLPurifier/AttrDef/Clone.php +++ /dev/null @@ -1,44 +0,0 @@ -<?php - -/** - * Dummy AttrDef that mimics another AttrDef, BUT it generates clones - * with make. - */ -class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef -{ - /** - * What we're cloning. - * @type HTMLPurifier_AttrDef - */ - protected $clone; - - /** - * @param HTMLPurifier_AttrDef $clone - */ - public function __construct($clone) - { - $this->clone = $clone; - } - - /** - * @param string $v - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($v, $config, $context) - { - return $this->clone->validate($v, $config, $context); - } - - /** - * @param string $string - * @return HTMLPurifier_AttrDef - */ - public function make($string) - { - return clone $this->clone; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/Enum.php b/library/HTMLPurifier/AttrDef/Enum.php deleted file mode 100644 index 8abda7f6e..000000000 --- a/library/HTMLPurifier/AttrDef/Enum.php +++ /dev/null @@ -1,73 +0,0 @@ -<?php - -// Enum = Enumerated -/** - * Validates a keyword against a list of valid values. - * @warning The case-insensitive compare of this function uses PHP's - * built-in strtolower and ctype_lower functions, which may - * cause problems with international comparisons - */ -class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef -{ - - /** - * Lookup table of valid values. - * @type array - * @todo Make protected - */ - public $valid_values = array(); - - /** - * Bool indicating whether or not enumeration is case sensitive. - * @note In general this is always case insensitive. - */ - protected $case_sensitive = false; // values according to W3C spec - - /** - * @param array $valid_values List of valid values - * @param bool $case_sensitive Whether or not case sensitive - */ - public function __construct($valid_values = array(), $case_sensitive = false) - { - $this->valid_values = array_flip($valid_values); - $this->case_sensitive = $case_sensitive; - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $string = trim($string); - if (!$this->case_sensitive) { - // we may want to do full case-insensitive libraries - $string = ctype_lower($string) ? $string : strtolower($string); - } - $result = isset($this->valid_values[$string]); - - return $result ? $string : false; - } - - /** - * @param string $string In form of comma-delimited list of case-insensitive - * valid values. Example: "foo,bar,baz". Prepend "s:" to make - * case sensitive - * @return HTMLPurifier_AttrDef_Enum - */ - public function make($string) - { - if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') { - $string = substr($string, 2); - $sensitive = true; - } else { - $sensitive = false; - } - $values = explode(',', $string); - return new HTMLPurifier_AttrDef_Enum($values, $sensitive); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Bool.php b/library/HTMLPurifier/AttrDef/HTML/Bool.php deleted file mode 100644 index 036a240e1..000000000 --- a/library/HTMLPurifier/AttrDef/HTML/Bool.php +++ /dev/null @@ -1,51 +0,0 @@ -<?php - -/** - * Validates a boolean attribute - */ -class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef -{ - - /** - * @type bool - */ - protected $name; - - /** - * @type bool - */ - public $minimized = true; - - /** - * @param bool $name - */ - public function __construct($name = false) - { - $this->name = $name; - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - if (empty($string)) { - return false; - } - return $this->name; - } - - /** - * @param string $string Name of attribute - * @return HTMLPurifier_AttrDef_HTML_Bool - */ - public function make($string) - { - return new HTMLPurifier_AttrDef_HTML_Bool($string); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Class.php b/library/HTMLPurifier/AttrDef/HTML/Class.php deleted file mode 100644 index d5013488f..000000000 --- a/library/HTMLPurifier/AttrDef/HTML/Class.php +++ /dev/null @@ -1,48 +0,0 @@ -<?php - -/** - * Implements special behavior for class attribute (normally NMTOKENS) - */ -class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens -{ - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - protected function split($string, $config, $context) - { - // really, this twiddle should be lazy loaded - $name = $config->getDefinition('HTML')->doctype->name; - if ($name == "XHTML 1.1" || $name == "XHTML 2.0") { - return parent::split($string, $config, $context); - } else { - return preg_split('/\s+/', $string); - } - } - - /** - * @param array $tokens - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return array - */ - protected function filter($tokens, $config, $context) - { - $allowed = $config->get('Attr.AllowedClasses'); - $forbidden = $config->get('Attr.ForbiddenClasses'); - $ret = array(); - foreach ($tokens as $token) { - if (($allowed === null || isset($allowed[$token])) && - !isset($forbidden[$token]) && - // We need this O(n) check because of PHP's array - // implementation that casts -0 to 0. - !in_array($token, $ret, true) - ) { - $ret[] = $token; - } - } - return $ret; - } -} diff --git a/library/HTMLPurifier/AttrDef/HTML/Color.php b/library/HTMLPurifier/AttrDef/HTML/Color.php deleted file mode 100644 index 946ebb782..000000000 --- a/library/HTMLPurifier/AttrDef/HTML/Color.php +++ /dev/null @@ -1,51 +0,0 @@ -<?php - -/** - * Validates a color according to the HTML spec. - */ -class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef -{ - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - static $colors = null; - if ($colors === null) { - $colors = $config->get('Core.ColorKeywords'); - } - - $string = trim($string); - - if (empty($string)) { - return false; - } - $lower = strtolower($string); - if (isset($colors[$lower])) { - return $colors[$lower]; - } - if ($string[0] === '#') { - $hex = substr($string, 1); - } else { - $hex = $string; - } - - $length = strlen($hex); - if ($length !== 3 && $length !== 6) { - return false; - } - if (!ctype_xdigit($hex)) { - return false; - } - if ($length === 3) { - $hex = $hex[0] . $hex[0] . $hex[1] . $hex[1] . $hex[2] . $hex[2]; - } - return "#$hex"; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php b/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php deleted file mode 100644 index d79ba12b3..000000000 --- a/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php +++ /dev/null @@ -1,38 +0,0 @@ -<?php - -/** - * Special-case enum attribute definition that lazy loads allowed frame targets - */ -class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum -{ - - /** - * @type array - */ - public $valid_values = false; // uninitialized value - - /** - * @type bool - */ - protected $case_sensitive = false; - - public function __construct() - { - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - if ($this->valid_values === false) { - $this->valid_values = $config->get('Attr.AllowedFrameTargets'); - } - return parent::validate($string, $config, $context); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/ID.php b/library/HTMLPurifier/AttrDef/HTML/ID.php deleted file mode 100644 index 3d86efb44..000000000 --- a/library/HTMLPurifier/AttrDef/HTML/ID.php +++ /dev/null @@ -1,105 +0,0 @@ -<?php - -/** - * Validates the HTML attribute ID. - * @warning Even though this is the id processor, it - * will ignore the directive Attr:IDBlacklist, since it will only - * go according to the ID accumulator. Since the accumulator is - * automatically generated, it will have already absorbed the - * blacklist. If you're hacking around, make sure you use load()! - */ - -class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef -{ - - // selector is NOT a valid thing to use for IDREFs, because IDREFs - // *must* target IDs that exist, whereas selector #ids do not. - - /** - * Determines whether or not we're validating an ID in a CSS - * selector context. - * @type bool - */ - protected $selector; - - /** - * @param bool $selector - */ - public function __construct($selector = false) - { - $this->selector = $selector; - } - - /** - * @param string $id - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($id, $config, $context) - { - if (!$this->selector && !$config->get('Attr.EnableID')) { - return false; - } - - $id = trim($id); // trim it first - - if ($id === '') { - return false; - } - - $prefix = $config->get('Attr.IDPrefix'); - if ($prefix !== '') { - $prefix .= $config->get('Attr.IDPrefixLocal'); - // prevent re-appending the prefix - if (strpos($id, $prefix) !== 0) { - $id = $prefix . $id; - } - } elseif ($config->get('Attr.IDPrefixLocal') !== '') { - trigger_error( - '%Attr.IDPrefixLocal cannot be used unless ' . - '%Attr.IDPrefix is set', - E_USER_WARNING - ); - } - - if (!$this->selector) { - $id_accumulator =& $context->get('IDAccumulator'); - if (isset($id_accumulator->ids[$id])) { - return false; - } - } - - // we purposely avoid using regex, hopefully this is faster - - if (ctype_alpha($id)) { - $result = true; - } else { - if (!ctype_alpha(@$id[0])) { - return false; - } - // primitive style of regexps, I suppose - $trim = trim( - $id, - 'A..Za..z0..9:-._' - ); - $result = ($trim === ''); - } - - $regexp = $config->get('Attr.IDBlacklistRegexp'); - if ($regexp && preg_match($regexp, $id)) { - return false; - } - - if (!$this->selector && $result) { - $id_accumulator->add($id); - } - - // if no change was made to the ID, return the result - // else, return the new id if stripping whitespace made it - // valid, or return false. - return $result ? $id : false; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Length.php b/library/HTMLPurifier/AttrDef/HTML/Length.php deleted file mode 100644 index 1c4006fbb..000000000 --- a/library/HTMLPurifier/AttrDef/HTML/Length.php +++ /dev/null @@ -1,56 +0,0 @@ -<?php - -/** - * Validates the HTML type length (not to be confused with CSS's length). - * - * This accepts integer pixels or percentages as lengths for certain - * HTML attributes. - */ - -class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels -{ - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $string = trim($string); - if ($string === '') { - return false; - } - - $parent_result = parent::validate($string, $config, $context); - if ($parent_result !== false) { - return $parent_result; - } - - $length = strlen($string); - $last_char = $string[$length - 1]; - - if ($last_char !== '%') { - return false; - } - - $points = substr($string, 0, $length - 1); - - if (!is_numeric($points)) { - return false; - } - - $points = (int)$points; - - if ($points < 0) { - return '0%'; - } - if ($points > 100) { - return '100%'; - } - return ((string)$points) . '%'; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php b/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php deleted file mode 100644 index 63fa04c15..000000000 --- a/library/HTMLPurifier/AttrDef/HTML/LinkTypes.php +++ /dev/null @@ -1,72 +0,0 @@ -<?php - -/** - * Validates a rel/rev link attribute against a directive of allowed values - * @note We cannot use Enum because link types allow multiple - * values. - * @note Assumes link types are ASCII text - */ -class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef -{ - - /** - * Name config attribute to pull. - * @type string - */ - protected $name; - - /** - * @param string $name - */ - public function __construct($name) - { - $configLookup = array( - 'rel' => 'AllowedRel', - 'rev' => 'AllowedRev' - ); - if (!isset($configLookup[$name])) { - trigger_error( - 'Unrecognized attribute name for link ' . - 'relationship.', - E_USER_ERROR - ); - return; - } - $this->name = $configLookup[$name]; - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $allowed = $config->get('Attr.' . $this->name); - if (empty($allowed)) { - return false; - } - - $string = $this->parseCDATA($string); - $parts = explode(' ', $string); - - // lookup to prevent duplicates - $ret_lookup = array(); - foreach ($parts as $part) { - $part = strtolower(trim($part)); - if (!isset($allowed[$part])) { - continue; - } - $ret_lookup[$part] = true; - } - - if (empty($ret_lookup)) { - return false; - } - $string = implode(' ', array_keys($ret_lookup)); - return $string; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/MultiLength.php b/library/HTMLPurifier/AttrDef/HTML/MultiLength.php deleted file mode 100644 index bbb20f2f8..000000000 --- a/library/HTMLPurifier/AttrDef/HTML/MultiLength.php +++ /dev/null @@ -1,60 +0,0 @@ -<?php - -/** - * Validates a MultiLength as defined by the HTML spec. - * - * A multilength is either a integer (pixel count), a percentage, or - * a relative number. - */ -class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length -{ - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $string = trim($string); - if ($string === '') { - return false; - } - - $parent_result = parent::validate($string, $config, $context); - if ($parent_result !== false) { - return $parent_result; - } - - $length = strlen($string); - $last_char = $string[$length - 1]; - - if ($last_char !== '*') { - return false; - } - - $int = substr($string, 0, $length - 1); - - if ($int == '') { - return '*'; - } - if (!is_numeric($int)) { - return false; - } - - $int = (int)$int; - if ($int < 0) { - return false; - } - if ($int == 0) { - return '0'; - } - if ($int == 1) { - return '*'; - } - return ((string)$int) . '*'; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php deleted file mode 100644 index f79683b4f..000000000 --- a/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php +++ /dev/null @@ -1,70 +0,0 @@ -<?php - -/** - * Validates contents based on NMTOKENS attribute type. - */ -class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef -{ - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $string = trim($string); - - // early abort: '' and '0' (strings that convert to false) are invalid - if (!$string) { - return false; - } - - $tokens = $this->split($string, $config, $context); - $tokens = $this->filter($tokens, $config, $context); - if (empty($tokens)) { - return false; - } - return implode(' ', $tokens); - } - - /** - * Splits a space separated list of tokens into its constituent parts. - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return array - */ - protected function split($string, $config, $context) - { - // OPTIMIZABLE! - // do the preg_match, capture all subpatterns for reformulation - - // we don't support U+00A1 and up codepoints or - // escaping because I don't know how to do that with regexps - // and plus it would complicate optimization efforts (you never - // see that anyway). - $pattern = '/(?:(?<=\s)|\A)' . // look behind for space or string start - '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)' . - '(?:(?=\s)|\z)/'; // look ahead for space or string end - preg_match_all($pattern, $string, $matches); - return $matches[1]; - } - - /** - * Template method for removing certain tokens based on arbitrary criteria. - * @note If we wanted to be really functional, we'd do an array_filter - * with a callback. But... we're not. - * @param array $tokens - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return array - */ - protected function filter($tokens, $config, $context) - { - return $tokens; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Pixels.php b/library/HTMLPurifier/AttrDef/HTML/Pixels.php deleted file mode 100644 index a1d019e09..000000000 --- a/library/HTMLPurifier/AttrDef/HTML/Pixels.php +++ /dev/null @@ -1,76 +0,0 @@ -<?php - -/** - * Validates an integer representation of pixels according to the HTML spec. - */ -class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef -{ - - /** - * @type int - */ - protected $max; - - /** - * @param int $max - */ - public function __construct($max = null) - { - $this->max = $max; - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $string = trim($string); - if ($string === '0') { - return $string; - } - if ($string === '') { - return false; - } - $length = strlen($string); - if (substr($string, $length - 2) == 'px') { - $string = substr($string, 0, $length - 2); - } - if (!is_numeric($string)) { - return false; - } - $int = (int)$string; - - if ($int < 0) { - return '0'; - } - - // upper-bound value, extremely high values can - // crash operating systems, see <http://ha.ckers.org/imagecrash.html> - // WARNING, above link WILL crash you if you're using Windows - - if ($this->max !== null && $int > $this->max) { - return (string)$this->max; - } - return (string)$int; - } - - /** - * @param string $string - * @return HTMLPurifier_AttrDef - */ - public function make($string) - { - if ($string === '') { - $max = null; - } else { - $max = (int)$string; - } - $class = get_class($this); - return new $class($max); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/Integer.php b/library/HTMLPurifier/AttrDef/Integer.php deleted file mode 100644 index 400e707d2..000000000 --- a/library/HTMLPurifier/AttrDef/Integer.php +++ /dev/null @@ -1,91 +0,0 @@ -<?php - -/** - * Validates an integer. - * @note While this class was modeled off the CSS definition, no currently - * allowed CSS uses this type. The properties that do are: widows, - * orphans, z-index, counter-increment, counter-reset. Some of the - * HTML attributes, however, find use for a non-negative version of this. - */ -class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef -{ - - /** - * Whether or not negative values are allowed. - * @type bool - */ - protected $negative = true; - - /** - * Whether or not zero is allowed. - * @type bool - */ - protected $zero = true; - - /** - * Whether or not positive values are allowed. - * @type bool - */ - protected $positive = true; - - /** - * @param $negative Bool indicating whether or not negative values are allowed - * @param $zero Bool indicating whether or not zero is allowed - * @param $positive Bool indicating whether or not positive values are allowed - */ - public function __construct($negative = true, $zero = true, $positive = true) - { - $this->negative = $negative; - $this->zero = $zero; - $this->positive = $positive; - } - - /** - * @param string $integer - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($integer, $config, $context) - { - $integer = $this->parseCDATA($integer); - if ($integer === '') { - return false; - } - - // we could possibly simply typecast it to integer, but there are - // certain fringe cases that must not return an integer. - - // clip leading sign - if ($this->negative && $integer[0] === '-') { - $digits = substr($integer, 1); - if ($digits === '0') { - $integer = '0'; - } // rm minus sign for zero - } elseif ($this->positive && $integer[0] === '+') { - $digits = $integer = substr($integer, 1); // rm unnecessary plus - } else { - $digits = $integer; - } - - // test if it's numeric - if (!ctype_digit($digits)) { - return false; - } - - // perform scope tests - if (!$this->zero && $integer == 0) { - return false; - } - if (!$this->positive && $integer > 0) { - return false; - } - if (!$this->negative && $integer < 0) { - return false; - } - - return $integer; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/Lang.php b/library/HTMLPurifier/AttrDef/Lang.php deleted file mode 100644 index 2a55cea64..000000000 --- a/library/HTMLPurifier/AttrDef/Lang.php +++ /dev/null @@ -1,86 +0,0 @@ -<?php - -/** - * Validates the HTML attribute lang, effectively a language code. - * @note Built according to RFC 3066, which obsoleted RFC 1766 - */ -class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef -{ - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $string = trim($string); - if (!$string) { - return false; - } - - $subtags = explode('-', $string); - $num_subtags = count($subtags); - - if ($num_subtags == 0) { // sanity check - return false; - } - - // process primary subtag : $subtags[0] - $length = strlen($subtags[0]); - switch ($length) { - case 0: - return false; - case 1: - if (!($subtags[0] == 'x' || $subtags[0] == 'i')) { - return false; - } - break; - case 2: - case 3: - if (!ctype_alpha($subtags[0])) { - return false; - } elseif (!ctype_lower($subtags[0])) { - $subtags[0] = strtolower($subtags[0]); - } - break; - default: - return false; - } - - $new_string = $subtags[0]; - if ($num_subtags == 1) { - return $new_string; - } - - // process second subtag : $subtags[1] - $length = strlen($subtags[1]); - if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) { - return $new_string; - } - if (!ctype_lower($subtags[1])) { - $subtags[1] = strtolower($subtags[1]); - } - - $new_string .= '-' . $subtags[1]; - if ($num_subtags == 2) { - return $new_string; - } - - // process all other subtags, index 2 and up - for ($i = 2; $i < $num_subtags; $i++) { - $length = strlen($subtags[$i]); - if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) { - return $new_string; - } - if (!ctype_lower($subtags[$i])) { - $subtags[$i] = strtolower($subtags[$i]); - } - $new_string .= '-' . $subtags[$i]; - } - return $new_string; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/Switch.php b/library/HTMLPurifier/AttrDef/Switch.php deleted file mode 100644 index c7eb3199a..000000000 --- a/library/HTMLPurifier/AttrDef/Switch.php +++ /dev/null @@ -1,53 +0,0 @@ -<?php - -/** - * Decorator that, depending on a token, switches between two definitions. - */ -class HTMLPurifier_AttrDef_Switch -{ - - /** - * @type string - */ - protected $tag; - - /** - * @type HTMLPurifier_AttrDef - */ - protected $withTag; - - /** - * @type HTMLPurifier_AttrDef - */ - protected $withoutTag; - - /** - * @param string $tag Tag name to switch upon - * @param HTMLPurifier_AttrDef $with_tag Call if token matches tag - * @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token - */ - public function __construct($tag, $with_tag, $without_tag) - { - $this->tag = $tag; - $this->withTag = $with_tag; - $this->withoutTag = $without_tag; - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $token = $context->get('CurrentToken', true); - if (!$token || $token->name !== $this->tag) { - return $this->withoutTag->validate($string, $config, $context); - } else { - return $this->withTag->validate($string, $config, $context); - } - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/Text.php b/library/HTMLPurifier/AttrDef/Text.php deleted file mode 100644 index 4553a4ea9..000000000 --- a/library/HTMLPurifier/AttrDef/Text.php +++ /dev/null @@ -1,21 +0,0 @@ -<?php - -/** - * Validates arbitrary text according to the HTML spec. - */ -class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef -{ - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - return $this->parseCDATA($string); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php deleted file mode 100644 index c1cd89772..000000000 --- a/library/HTMLPurifier/AttrDef/URI.php +++ /dev/null @@ -1,111 +0,0 @@ -<?php - -/** - * Validates a URI as defined by RFC 3986. - * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme - */ -class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef -{ - - /** - * @type HTMLPurifier_URIParser - */ - protected $parser; - - /** - * @type bool - */ - protected $embedsResource; - - /** - * @param bool $embeds_resource Does the URI here result in an extra HTTP request? - */ - public function __construct($embeds_resource = false) - { - $this->parser = new HTMLPurifier_URIParser(); - $this->embedsResource = (bool)$embeds_resource; - } - - /** - * @param string $string - * @return HTMLPurifier_AttrDef_URI - */ - public function make($string) - { - $embeds = ($string === 'embedded'); - return new HTMLPurifier_AttrDef_URI($embeds); - } - - /** - * @param string $uri - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($uri, $config, $context) - { - if ($config->get('URI.Disable')) { - return false; - } - - $uri = $this->parseCDATA($uri); - - // parse the URI - $uri = $this->parser->parse($uri); - if ($uri === false) { - return false; - } - - // add embedded flag to context for validators - $context->register('EmbeddedURI', $this->embedsResource); - - $ok = false; - do { - - // generic validation - $result = $uri->validate($config, $context); - if (!$result) { - break; - } - - // chained filtering - $uri_def = $config->getDefinition('URI'); - $result = $uri_def->filter($uri, $config, $context); - if (!$result) { - break; - } - - // scheme-specific validation - $scheme_obj = $uri->getSchemeObj($config, $context); - if (!$scheme_obj) { - break; - } - if ($this->embedsResource && !$scheme_obj->browsable) { - break; - } - $result = $scheme_obj->validate($uri, $config, $context); - if (!$result) { - break; - } - - // Post chained filtering - $result = $uri_def->postFilter($uri, $config, $context); - if (!$result) { - break; - } - - // survived gauntlet - $ok = true; - - } while (false); - - $context->destroy('EmbeddedURI'); - if (!$ok) { - return false; - } - // back to string - return $uri->toString(); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/URI/Email.php b/library/HTMLPurifier/AttrDef/URI/Email.php deleted file mode 100644 index daf32b764..000000000 --- a/library/HTMLPurifier/AttrDef/URI/Email.php +++ /dev/null @@ -1,20 +0,0 @@ -<?php - -abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef -{ - - /** - * Unpacks a mailbox into its display-name and address - * @param string $string - * @return mixed - */ - public function unpack($string) - { - // needs to be implemented - } - -} - -// sub-implementations - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php b/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php deleted file mode 100644 index 52c0d5968..000000000 --- a/library/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php +++ /dev/null @@ -1,29 +0,0 @@ -<?php - -/** - * Primitive email validation class based on the regexp found at - * http://www.regular-expressions.info/email.html - */ -class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email -{ - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - // no support for named mailboxes i.e. "Bob <bob@example.com>" - // that needs more percent encoding to be done - if ($string == '') { - return false; - } - $string = trim($string); - $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string); - return $result ? $string : false; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/URI/Host.php b/library/HTMLPurifier/AttrDef/URI/Host.php deleted file mode 100644 index e7df800b1..000000000 --- a/library/HTMLPurifier/AttrDef/URI/Host.php +++ /dev/null @@ -1,128 +0,0 @@ -<?php - -/** - * Validates a host according to the IPv4, IPv6 and DNS (future) specifications. - */ -class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef -{ - - /** - * IPv4 sub-validator. - * @type HTMLPurifier_AttrDef_URI_IPv4 - */ - protected $ipv4; - - /** - * IPv6 sub-validator. - * @type HTMLPurifier_AttrDef_URI_IPv6 - */ - protected $ipv6; - - public function __construct() - { - $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4(); - $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6(); - } - - /** - * @param string $string - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($string, $config, $context) - { - $length = strlen($string); - // empty hostname is OK; it's usually semantically equivalent: - // the default host as defined by a URI scheme is used: - // - // If the URI scheme defines a default for host, then that - // default applies when the host subcomponent is undefined - // or when the registered name is empty (zero length). - if ($string === '') { - return ''; - } - if ($length > 1 && $string[0] === '[' && $string[$length - 1] === ']') { - //IPv6 - $ip = substr($string, 1, $length - 2); - $valid = $this->ipv6->validate($ip, $config, $context); - if ($valid === false) { - return false; - } - return '[' . $valid . ']'; - } - - // need to do checks on unusual encodings too - $ipv4 = $this->ipv4->validate($string, $config, $context); - if ($ipv4 !== false) { - return $ipv4; - } - - // A regular domain name. - - // This doesn't match I18N domain names, but we don't have proper IRI support, - // so force users to insert Punycode. - - // There is not a good sense in which underscores should be - // allowed, since it's technically not! (And if you go as - // far to allow everything as specified by the DNS spec... - // well, that's literally everything, modulo some space limits - // for the components and the overall name (which, by the way, - // we are NOT checking!). So we (arbitrarily) decide this: - // let's allow underscores wherever we would have allowed - // hyphens, if they are enabled. This is a pretty good match - // for browser behavior, for example, a large number of browsers - // cannot handle foo_.example.com, but foo_bar.example.com is - // fairly well supported. - $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : ''; - - // The productions describing this are: - $a = '[a-z]'; // alpha - $an = '[a-z0-9]'; // alphanum - $and = "[a-z0-9-$underscore]"; // alphanum | "-" - // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum - $domainlabel = "$an($and*$an)?"; - // toplabel = alpha | alpha *( alphanum | "-" ) alphanum - $toplabel = "$a($and*$an)?"; - // hostname = *( domainlabel "." ) toplabel [ "." ] - if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { - return $string; - } - - // If we have Net_IDNA2 support, we can support IRIs by - // punycoding them. (This is the most portable thing to do, - // since otherwise we have to assume browsers support - - if ($config->get('Core.EnableIDNA')) { - $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true)); - // we need to encode each period separately - $parts = explode('.', $string); - try { - $new_parts = array(); - foreach ($parts as $part) { - $encodable = false; - for ($i = 0, $c = strlen($part); $i < $c; $i++) { - if (ord($part[$i]) > 0x7a) { - $encodable = true; - break; - } - } - if (!$encodable) { - $new_parts[] = $part; - } else { - $new_parts[] = $idna->encode($part); - } - } - $string = implode('.', $new_parts); - if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { - return $string; - } - } catch (Exception $e) { - // XXX error reporting - } - } - return false; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/URI/IPv4.php b/library/HTMLPurifier/AttrDef/URI/IPv4.php deleted file mode 100644 index 30ac16c9e..000000000 --- a/library/HTMLPurifier/AttrDef/URI/IPv4.php +++ /dev/null @@ -1,45 +0,0 @@ -<?php - -/** - * Validates an IPv4 address - * @author Feyd @ forums.devnetwork.net (public domain) - */ -class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef -{ - - /** - * IPv4 regex, protected so that IPv6 can reuse it. - * @type string - */ - protected $ip4; - - /** - * @param string $aIP - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($aIP, $config, $context) - { - if (!$this->ip4) { - $this->_loadRegex(); - } - - if (preg_match('#^' . $this->ip4 . '$#s', $aIP)) { - return $aIP; - } - return false; - } - - /** - * Lazy load function to prevent regex from being stuffed in - * cache. - */ - protected function _loadRegex() - { - $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255 - $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})"; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/URI/IPv6.php b/library/HTMLPurifier/AttrDef/URI/IPv6.php deleted file mode 100644 index f243793ee..000000000 --- a/library/HTMLPurifier/AttrDef/URI/IPv6.php +++ /dev/null @@ -1,89 +0,0 @@ -<?php - -/** - * Validates an IPv6 address. - * @author Feyd @ forums.devnetwork.net (public domain) - * @note This function requires brackets to have been removed from address - * in URI. - */ -class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4 -{ - - /** - * @param string $aIP - * @param HTMLPurifier_Config $config - * @param HTMLPurifier_Context $context - * @return bool|string - */ - public function validate($aIP, $config, $context) - { - if (!$this->ip4) { - $this->_loadRegex(); - } - - $original = $aIP; - - $hex = '[0-9a-fA-F]'; - $blk = '(?:' . $hex . '{1,4})'; - $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 - - // prefix check - if (strpos($aIP, '/') !== false) { - if (preg_match('#' . $pre . '$#s', $aIP, $find)) { - $aIP = substr($aIP, 0, 0 - strlen($find[0])); - unset($find); - } else { - return false; - } - } - - // IPv4-compatiblity check - if (preg_match('#(?<=:' . ')' . $this->ip4 . '$#s', $aIP, $find)) { - $aIP = substr($aIP, 0, 0 - strlen($find[0])); - $ip = explode('.', $find[0]); - $ip = array_map('dechex', $ip); - $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; - unset($find, $ip); - } - - // compression check - $aIP = explode('::', $aIP); - $c = count($aIP); - if ($c > 2) { - return false; - } elseif ($c == 2) { - list($first, $second) = $aIP; - $first = explode(':', $first); - $second = explode(':', $second); - - if (count($first) + count($second) > 8) { - return false; - } - - while (count($first) < 8) { - array_push($first, '0'); - } - - array_splice($first, 8 - count($second), 8, $second); - $aIP = $first; - unset($first, $second); - } else { - $aIP = explode(':', $aIP[0]); - } - $c = count($aIP); - - if ($c != 8) { - return false; - } - - // All the pieces should be 16-bit hex strings. Are they? - foreach ($aIP as $piece) { - if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) { - return false; - } - } - return $original; - } -} - -// vim: et sw=4 sts=4 |