some important stuff we'll need

author: friendica <info@friendica.com> 2012-05-12 17:57:41 -0700
committer: friendica <info@friendica.com> 2012-07-18 20:40:31 +1000
commit: 7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a (patch)
tree: a9c3d91209cff770bb4b613b1b95e61a7bbc5a2b /lib/htmlpurifier/library/HTMLPurifier/URIParser.php
parent: cd727cb26b78a1dade09d510b071446898477356 (diff)
download: volse-hubzilla-7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a.tar.gz
volse-hubzilla-7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a.tar.bz2
volse-hubzilla-7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a.zip
1 files changed, 70 insertions, 0 deletions
diff --git a/lib/htmlpurifier/library/HTMLPurifier/URIParser.php b/lib/htmlpurifier/library/HTMLPurifier/URIParser.php
new file mode 100644
index 000000000..7179e4ab8
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/URIParser.php
@@ -0,0 +1,70 @@
+<?php
+
+/**
+ * Parses a URI into the components and fragment identifier as specified
+ * by RFC 3986.
+ */
+class HTMLPurifier_URIParser
+{
+
+    /**
+     * Instance of HTMLPurifier_PercentEncoder to do normalization with.
+     */
+    protected $percentEncoder;
+
+    public function __construct() {
+        $this->percentEncoder = new HTMLPurifier_PercentEncoder();
+    }
+
+    /**
+     * Parses a URI.
+     * @param $uri string URI to parse
+     * @return HTMLPurifier_URI representation of URI. This representation has
+     *         not been validated yet and may not conform to RFC.
+     */
+    public function parse($uri) {
+
+        $uri = $this->percentEncoder->normalize($uri);
+
+        // Regexp is as per Appendix B.
+        // Note that ["<>] are an addition to the RFC's recommended
+        // characters, because they represent external delimeters.
+        $r_URI = '!'.
+            '(([^:/?#"<>]+):)?'. // 2. Scheme
+            '(//([^/?#"<>]*))?'. // 4. Authority
+            '([^?#"<>]*)'.       // 5. Path
+            '(\?([^#"<>]*))?'.   // 7. Query
+            '(#([^"<>]*))?'.     // 8. Fragment
+            '!';
+
+        $matches = array();
+        $result = preg_match($r_URI, $uri, $matches);
+
+        if (!$result) return false; // *really* invalid URI
+
+        // seperate out parts
+        $scheme     = !empty($matches[1]) ? $matches[2] : null;
+        $authority  = !empty($matches[3]) ? $matches[4] : null;
+        $path       = $matches[5]; // always present, can be empty
+        $query      = !empty($matches[6]) ? $matches[7] : null;
+        $fragment   = !empty($matches[8]) ? $matches[9] : null;
+
+        // further parse authority
+        if ($authority !== null) {
+            $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
+            $matches = array();
+            preg_match($r_authority, $authority, $matches);
+            $userinfo   = !empty($matches[1]) ? $matches[2] : null;
+            $host       = !empty($matches[3]) ? $matches[3] : '';
+            $port       = !empty($matches[4]) ? (int) $matches[5] : null;
+        } else {
+            $port = $host = $userinfo = null;
+        }
+
+        return new HTMLPurifier_URI(
+            $scheme, $userinfo, $host, $port, $path, $query, $fragment);
+    }
+
+}
+
+// vim: et sw=4 sts=4
author	friendica <info@friendica.com>	2012-05-12 17:57:41 -0700
committer	friendica <info@friendica.com>	2012-07-18 20:40:31 +1000
commit	7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a (patch)
tree	a9c3d91209cff770bb4b613b1b95e61a7bbc5a2b /lib/htmlpurifier/library/HTMLPurifier/URIParser.php
parent	cd727cb26b78a1dade09d510b071446898477356 (diff)
download	volse-hubzilla-7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a.tar.gz volse-hubzilla-7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a.tar.bz2 volse-hubzilla-7a40f4354b32809af3d0cfd6e3af0eda02ab0e0a.zip