aboutsummaryrefslogtreecommitdiffstats
path: root/library/HTMLPurifier/PercentEncoder.php
diff options
context:
space:
mode:
Diffstat (limited to 'library/HTMLPurifier/PercentEncoder.php')
-rw-r--r--library/HTMLPurifier/PercentEncoder.php98
1 files changed, 98 insertions, 0 deletions
diff --git a/library/HTMLPurifier/PercentEncoder.php b/library/HTMLPurifier/PercentEncoder.php
new file mode 100644
index 000000000..a43c44f4c
--- /dev/null
+++ b/library/HTMLPurifier/PercentEncoder.php
@@ -0,0 +1,98 @@
+<?php
+
+/**
+ * Class that handles operations involving percent-encoding in URIs.
+ *
+ * @warning
+ * Be careful when reusing instances of PercentEncoder. The object
+ * you use for normalize() SHOULD NOT be used for encode(), or
+ * vice-versa.
+ */
+class HTMLPurifier_PercentEncoder
+{
+
+ /**
+ * Reserved characters to preserve when using encode().
+ */
+ protected $preserve = array();
+
+ /**
+ * String of characters that should be preserved while using encode().
+ */
+ public function __construct($preserve = false) {
+ // unreserved letters, ought to const-ify
+ for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
+ for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
+ for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
+ $this->preserve[45] = true; // Dash -
+ $this->preserve[46] = true; // Period .
+ $this->preserve[95] = true; // Underscore _
+ $this->preserve[126]= true; // Tilde ~
+
+ // extra letters not to escape
+ if ($preserve !== false) {
+ for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
+ $this->preserve[ord($preserve[$i])] = true;
+ }
+ }
+ }
+
+ /**
+ * Our replacement for urlencode, it encodes all non-reserved characters,
+ * as well as any extra characters that were instructed to be preserved.
+ * @note
+ * Assumes that the string has already been normalized, making any
+ * and all percent escape sequences valid. Percents will not be
+ * re-escaped, regardless of their status in $preserve
+ * @param $string String to be encoded
+ * @return Encoded string.
+ */
+ public function encode($string) {
+ $ret = '';
+ for ($i = 0, $c = strlen($string); $i < $c; $i++) {
+ if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
+ $ret .= '%' . sprintf('%02X', $int);
+ } else {
+ $ret .= $string[$i];
+ }
+ }
+ return $ret;
+ }
+
+ /**
+ * Fix up percent-encoding by decoding unreserved characters and normalizing.
+ * @warning This function is affected by $preserve, even though the
+ * usual desired behavior is for this not to preserve those
+ * characters. Be careful when reusing instances of PercentEncoder!
+ * @param $string String to normalize
+ */
+ public function normalize($string) {
+ if ($string == '') return '';
+ $parts = explode('%', $string);
+ $ret = array_shift($parts);
+ foreach ($parts as $part) {
+ $length = strlen($part);
+ if ($length < 2) {
+ $ret .= '%25' . $part;
+ continue;
+ }
+ $encoding = substr($part, 0, 2);
+ $text = substr($part, 2);
+ if (!ctype_xdigit($encoding)) {
+ $ret .= '%25' . $part;
+ continue;
+ }
+ $int = hexdec($encoding);
+ if (isset($this->preserve[$int])) {
+ $ret .= chr($int) . $text;
+ continue;
+ }
+ $encoding = strtoupper($encoding);
+ $ret .= '%' . $encoding . $text;
+ }
+ return $ret;
+ }
+
+}
+
+// vim: et sw=4 sts=4