aboutsummaryrefslogtreecommitdiffstats
path: root/library/HTMLPurifier/URIScheme.php
diff options
context:
space:
mode:
Diffstat (limited to 'library/HTMLPurifier/URIScheme.php')
-rw-r--r--library/HTMLPurifier/URIScheme.php88
1 files changed, 74 insertions, 14 deletions
diff --git a/library/HTMLPurifier/URIScheme.php b/library/HTMLPurifier/URIScheme.php
index 039710fd1..fe9e82cf2 100644
--- a/library/HTMLPurifier/URIScheme.php
+++ b/library/HTMLPurifier/URIScheme.php
@@ -3,40 +3,100 @@
/**
* Validator for the components of a URI for a specific scheme
*/
-class HTMLPurifier_URIScheme
+abstract class HTMLPurifier_URIScheme
{
/**
- * Scheme's default port (integer)
+ * Scheme's default port (integer). If an explicit port number is
+ * specified that coincides with the default port, it will be
+ * elided.
+ * @type int
*/
public $default_port = null;
/**
- * Whether or not URIs of this schem are locatable by a browser
+ * Whether or not URIs of this scheme are locatable by a browser
* http and ftp are accessible, while mailto and news are not.
+ * @type bool
*/
public $browsable = false;
/**
+ * Whether or not data transmitted over this scheme is encrypted.
+ * https is secure, http is not.
+ * @type bool
+ */
+ public $secure = false;
+
+ /**
* Whether or not the URI always uses <hier_part>, resolves edge cases
* with making relative URIs absolute
+ * @type bool
*/
public $hierarchical = false;
/**
- * Validates the components of a URI
- * @note This implementation should be called by children if they define
- * a default port, as it does port processing.
- * @param $uri Instance of HTMLPurifier_URI
- * @param $config HTMLPurifier_Config object
- * @param $context HTMLPurifier_Context object
- * @return Bool success or failure
+ * Whether or not the URI may omit a hostname when the scheme is
+ * explicitly specified, ala file:///path/to/file. As of writing,
+ * 'file' is the only scheme that browsers support his properly.
+ * @type bool
*/
- public function validate(&$uri, $config, $context) {
- if ($this->default_port == $uri->port) $uri->port = null;
- return true;
- }
+ public $may_omit_host = false;
+
+ /**
+ * Validates the components of a URI for a specific scheme.
+ * @param HTMLPurifier_URI $uri Reference to a HTMLPurifier_URI object
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool success or failure
+ */
+ abstract public function doValidate(&$uri, $config, $context);
+ /**
+ * Public interface for validating components of a URI. Performs a
+ * bunch of default actions. Don't overload this method.
+ * @param HTMLPurifier_URI $uri Reference to a HTMLPurifier_URI object
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool success or failure
+ */
+ public function validate(&$uri, $config, $context)
+ {
+ if ($this->default_port == $uri->port) {
+ $uri->port = null;
+ }
+ // kludge: browsers do funny things when the scheme but not the
+ // authority is set
+ if (!$this->may_omit_host &&
+ // if the scheme is present, a missing host is always in error
+ (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) ||
+ // if the scheme is not present, a *blank* host is in error,
+ // since this translates into '///path' which most browsers
+ // interpret as being 'http://path'.
+ (is_null($uri->scheme) && $uri->host === '')
+ ) {
+ do {
+ if (is_null($uri->scheme)) {
+ if (substr($uri->path, 0, 2) != '//') {
+ $uri->host = null;
+ break;
+ }
+ // URI is '////path', so we cannot nullify the
+ // host to preserve semantics. Try expanding the
+ // hostname instead (fall through)
+ }
+ // first see if we can manually insert a hostname
+ $host = $config->get('URI.Host');
+ if (!is_null($host)) {
+ $uri->host = $host;
+ } else {
+ // we can't do anything sensible, reject the URL.
+ return false;
+ }
+ } while (false);
+ }
+ return $this->doValidate($uri, $config, $context);
+ }
}
// vim: et sw=4 sts=4