aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib
diff options
context:
space:
mode:
authorwycats <wycats@gmail.com>2010-06-04 11:50:34 -0700
committerwycats <wycats@gmail.com>2010-06-04 20:11:06 -0700
commit16ee4b4d1b125bd3edb5c191d58c7afdf6d3232e (patch)
tree793b5c4f479be16782e93677cacc3b26f83bd7ce /activesupport/lib
parentb8af484476d1dda685f058a0f185608cd18a862e (diff)
downloadrails-16ee4b4d1b125bd3edb5c191d58c7afdf6d3232e.tar.gz
rails-16ee4b4d1b125bd3edb5c191d58c7afdf6d3232e.tar.bz2
rails-16ee4b4d1b125bd3edb5c191d58c7afdf6d3232e.zip
Small optimization of 1.9 unescape. We should make sure that inbound ASCII always means UTF-8. It seems so based on a quick survey of common browsers, but let's be sure
Diffstat (limited to 'activesupport/lib')
-rw-r--r--activesupport/lib/active_support/core_ext/uri.rb8
1 files changed, 6 insertions, 2 deletions
diff --git a/activesupport/lib/active_support/core_ext/uri.rb b/activesupport/lib/active_support/core_ext/uri.rb
index 28eabd2111..b7fe0a6209 100644
--- a/activesupport/lib/active_support/core_ext/uri.rb
+++ b/activesupport/lib/active_support/core_ext/uri.rb
@@ -6,11 +6,15 @@ if RUBY_VERSION >= '1.9'
str = "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E" # Ni-ho-nn-go in UTF-8, means Japanese.
parser = URI::Parser.new
+
unless str == parser.unescape(parser.escape(str))
URI::Parser.class_eval do
remove_method :unescape
- def unescape(str, escaped = @regexp[:ESCAPED])
- enc = (str.encoding == Encoding::US_ASCII) ? Encoding::UTF_8 : str.encoding
+ def unescape(str, escaped = /%[a-fA-F\d]{2}/)
+ # TODO: Are we actually sure that ASCII == UTF-8?
+ # YK: My initial experiments say yes, but let's be sure please
+ enc = str.encoding
+ enc = Encoding::UTF_8 if enc == Encoding::US_ASCII
str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(enc)
end
end