diff options
Diffstat (limited to 'library/HTMLPurifier/URI.php')
-rw-r--r-- | library/HTMLPurifier/URI.php | 257 |
1 files changed, 199 insertions, 58 deletions
diff --git a/library/HTMLPurifier/URI.php b/library/HTMLPurifier/URI.php index 8b50d0d18..a5e7ae298 100644 --- a/library/HTMLPurifier/URI.php +++ b/library/HTMLPurifier/URI.php @@ -10,17 +10,57 @@ */ class HTMLPurifier_URI { + /** + * @type string + */ + public $scheme; - public $scheme, $userinfo, $host, $port, $path, $query, $fragment; + /** + * @type string + */ + public $userinfo; /** + * @type string + */ + public $host; + + /** + * @type int + */ + public $port; + + /** + * @type string + */ + public $path; + + /** + * @type string + */ + public $query; + + /** + * @type string + */ + public $fragment; + + /** + * @param string $scheme + * @param string $userinfo + * @param string $host + * @param int $port + * @param string $path + * @param string $query + * @param string $fragment * @note Automatically normalizes scheme and port */ - public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) { + public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) + { $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); $this->userinfo = $userinfo; $this->host = $host; - $this->port = is_null($port) ? $port : (int) $port; + $this->port = is_null($port) ? $port : (int)$port; $this->path = $path; $this->query = $query; $this->fragment = $fragment; @@ -28,19 +68,22 @@ class HTMLPurifier_URI /** * Retrieves a scheme object corresponding to the URI's scheme/default - * @param $config Instance of HTMLPurifier_Config - * @param $context Instance of HTMLPurifier_Context - * @return Scheme object appropriate for validating this URI + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI */ - public function getSchemeObj($config, $context) { + public function getSchemeObj($config, $context) + { $registry = HTMLPurifier_URISchemeRegistry::instance(); if ($this->scheme !== null) { $scheme_obj = $registry->getScheme($this->scheme, $config, $context); - if (!$scheme_obj) return false; // invalid scheme, clean it out + if (!$scheme_obj) { + return false; + } // invalid scheme, clean it out } else { // no scheme: retrieve the default one $def = $config->getDefinition('URI'); - $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context); + $scheme_obj = $def->getDefaultScheme($config, $context); if (!$scheme_obj) { // something funky happened to the default scheme object trigger_error( @@ -56,30 +99,39 @@ class HTMLPurifier_URI /** * Generic validation method applicable for all schemes. May modify * this URI in order to get it into a compliant form. - * @param $config Instance of HTMLPurifier_Config - * @param $context Instance of HTMLPurifier_Context - * @return True if validation/filtering succeeds, false if failure + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return bool True if validation/filtering succeeds, false if failure */ - public function validate($config, $context) { - + public function validate($config, $context) + { // ABNF definitions from RFC 3986 $chars_sub_delims = '!$&\'()*+,;='; $chars_gen_delims = ':/?#[]@'; $chars_pchar = $chars_sub_delims . ':@'; - // validate scheme (MUST BE FIRST!) - if (!is_null($this->scheme) && is_null($this->host)) { - $def = $config->getDefinition('URI'); - if ($def->defaultScheme === $this->scheme) { - $this->scheme = null; - } - } - // validate host if (!is_null($this->host)) { $host_def = new HTMLPurifier_AttrDef_URI_Host(); $this->host = $host_def->validate($this->host, $config, $context); - if ($this->host === false) $this->host = null; + if ($this->host === false) { + $this->host = null; + } + } + + // validate scheme + // NOTE: It's not appropriate to check whether or not this + // scheme is in our registry, since a URIFilter may convert a + // URI that we don't allow into one we do. So instead, we just + // check if the scheme can be dropped because there is no host + // and it is our default scheme. + if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') { + // support for relative paths is pretty abysmal when the + // scheme is present, so axe it when possible + $def = $config->getDefinition('URI'); + if ($def->defaultScheme === $this->scheme) { + $this->scheme = null; + } } // validate username @@ -90,38 +142,55 @@ class HTMLPurifier_URI // validate port if (!is_null($this->port)) { - if ($this->port < 1 || $this->port > 65535) $this->port = null; + if ($this->port < 1 || $this->port > 65535) { + $this->port = null; + } } // validate path - $path_parts = array(); $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); - if (!is_null($this->host)) { + if (!is_null($this->host)) { // this catches $this->host === '' // path-abempty (hier and relative) + // http://www.example.com/my/path + // //www.example.com/my/path (looks odd, but works, and + // recognized by most browsers) + // (this set is valid or invalid on a scheme by scheme + // basis, so we'll deal with it later) + // file:///my/path + // ///my/path $this->path = $segments_encoder->encode($this->path); - } elseif ($this->path !== '' && $this->path[0] === '/') { - // path-absolute (hier and relative) - if (strlen($this->path) >= 2 && $this->path[1] === '/') { - // This shouldn't ever happen! - $this->path = ''; - } else { + } elseif ($this->path !== '') { + if ($this->path[0] === '/') { + // path-absolute (hier and relative) + // http:/my/path + // /my/path + if (strlen($this->path) >= 2 && $this->path[1] === '/') { + // This could happen if both the host gets stripped + // out + // http://my/path + // //my/path + $this->path = ''; + } else { + $this->path = $segments_encoder->encode($this->path); + } + } elseif (!is_null($this->scheme)) { + // path-rootless (hier) + // http:my/path + // Short circuit evaluation means we don't need to check nz $this->path = $segments_encoder->encode($this->path); - } - } elseif (!is_null($this->scheme) && $this->path !== '') { - // path-rootless (hier) - // Short circuit evaluation means we don't need to check nz - $this->path = $segments_encoder->encode($this->path); - } elseif (is_null($this->scheme) && $this->path !== '') { - // path-noscheme (relative) - // (once again, not checking nz) - $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); - $c = strpos($this->path, '/'); - if ($c !== false) { - $this->path = - $segment_nc_encoder->encode(substr($this->path, 0, $c)) . - $segments_encoder->encode(substr($this->path, $c)); } else { - $this->path = $segment_nc_encoder->encode($this->path); + // path-noscheme (relative) + // my/path + // (once again, not checking nz) + $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); + $c = strpos($this->path, '/'); + if ($c !== false) { + $this->path = + $segment_nc_encoder->encode(substr($this->path, 0, $c)) . + $segments_encoder->encode(substr($this->path, $c)); + } else { + $this->path = $segment_nc_encoder->encode($this->path); + } } } else { // path-empty (hier and relative) @@ -138,36 +207,108 @@ class HTMLPurifier_URI if (!is_null($this->fragment)) { $this->fragment = $qf_encoder->encode($this->fragment); } - return true; - } /** * Convert URI back to string - * @return String URI appropriate for output + * @return string URI appropriate for output */ - public function toString() { + public function toString() + { // reconstruct authority $authority = null; + // there is a rendering difference between a null authority + // (http:foo-bar) and an empty string authority + // (http:///foo-bar). if (!is_null($this->host)) { $authority = ''; - if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@'; + if (!is_null($this->userinfo)) { + $authority .= $this->userinfo . '@'; + } $authority .= $this->host; - if(!is_null($this->port)) $authority .= ':' . $this->port; + if (!is_null($this->port)) { + $authority .= ':' . $this->port; + } } - // reconstruct the result + // Reconstruct the result + // One might wonder about parsing quirks from browsers after + // this reconstruction. Unfortunately, parsing behavior depends + // on what *scheme* was employed (file:///foo is handled *very* + // differently than http:///foo), so unfortunately we have to + // defer to the schemes to do the right thing. $result = ''; - if (!is_null($this->scheme)) $result .= $this->scheme . ':'; - if (!is_null($authority)) $result .= '//' . $authority; + if (!is_null($this->scheme)) { + $result .= $this->scheme . ':'; + } + if (!is_null($authority)) { + $result .= '//' . $authority; + } $result .= $this->path; - if (!is_null($this->query)) $result .= '?' . $this->query; - if (!is_null($this->fragment)) $result .= '#' . $this->fragment; + if (!is_null($this->query)) { + $result .= '?' . $this->query; + } + if (!is_null($this->fragment)) { + $result .= '#' . $this->fragment; + } return $result; } + /** + * Returns true if this URL might be considered a 'local' URL given + * the current context. This is true when the host is null, or + * when it matches the host supplied to the configuration. + * + * Note that this does not do any scheme checking, so it is mostly + * only appropriate for metadata that doesn't care about protocol + * security. isBenign is probably what you actually want. + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return bool + */ + public function isLocal($config, $context) + { + if ($this->host === null) { + return true; + } + $uri_def = $config->getDefinition('URI'); + if ($uri_def->host === $this->host) { + return true; + } + return false; + } + + /** + * Returns true if this URL should be considered a 'benign' URL, + * that is: + * + * - It is a local URL (isLocal), and + * - It has a equal or better level of security + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return bool + */ + public function isBenign($config, $context) + { + if (!$this->isLocal($config, $context)) { + return false; + } + + $scheme_obj = $this->getSchemeObj($config, $context); + if (!$scheme_obj) { + return false; + } // conservative approach + + $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context); + if ($current_scheme_obj->secure) { + if (!$scheme_obj->secure) { + return false; + } + } + return true; + } } // vim: et sw=4 sts=4 |