From ffb1997902facb36b78a7cfa522f41f2b3d71cda Mon Sep 17 00:00:00 2001 From: Mike Macgirvin Date: Wed, 8 Sep 2010 20:14:17 -0700 Subject: mistpark 2.0 infrasturcture lands --- library/HTMLPurifier/PercentEncoder.php | 98 +++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 library/HTMLPurifier/PercentEncoder.php (limited to 'library/HTMLPurifier/PercentEncoder.php') diff --git a/library/HTMLPurifier/PercentEncoder.php b/library/HTMLPurifier/PercentEncoder.php new file mode 100644 index 000000000..a43c44f4c --- /dev/null +++ b/library/HTMLPurifier/PercentEncoder.php @@ -0,0 +1,98 @@ +preserve[$i] = true; // digits + for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case + for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case + $this->preserve[45] = true; // Dash - + $this->preserve[46] = true; // Period . + $this->preserve[95] = true; // Underscore _ + $this->preserve[126]= true; // Tilde ~ + + // extra letters not to escape + if ($preserve !== false) { + for ($i = 0, $c = strlen($preserve); $i < $c; $i++) { + $this->preserve[ord($preserve[$i])] = true; + } + } + } + + /** + * Our replacement for urlencode, it encodes all non-reserved characters, + * as well as any extra characters that were instructed to be preserved. + * @note + * Assumes that the string has already been normalized, making any + * and all percent escape sequences valid. Percents will not be + * re-escaped, regardless of their status in $preserve + * @param $string String to be encoded + * @return Encoded string. + */ + public function encode($string) { + $ret = ''; + for ($i = 0, $c = strlen($string); $i < $c; $i++) { + if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) { + $ret .= '%' . sprintf('%02X', $int); + } else { + $ret .= $string[$i]; + } + } + return $ret; + } + + /** + * Fix up percent-encoding by decoding unreserved characters and normalizing. + * @warning This function is affected by $preserve, even though the + * usual desired behavior is for this not to preserve those + * characters. Be careful when reusing instances of PercentEncoder! + * @param $string String to normalize + */ + public function normalize($string) { + if ($string == '') return ''; + $parts = explode('%', $string); + $ret = array_shift($parts); + foreach ($parts as $part) { + $length = strlen($part); + if ($length < 2) { + $ret .= '%25' . $part; + continue; + } + $encoding = substr($part, 0, 2); + $text = substr($part, 2); + if (!ctype_xdigit($encoding)) { + $ret .= '%25' . $part; + continue; + } + $int = hexdec($encoding); + if (isset($this->preserve[$int])) { + $ret .= chr($int) . $text; + continue; + } + $encoding = strtoupper($encoding); + $ret .= '%' . $encoding . $text; + } + return $ret; + } + +} + +// vim: et sw=4 sts=4 -- cgit v1.2.3