From e52ab312069a9af0c37c1666141752f3bc805054 Mon Sep 17 00:00:00 2001 From: Ashe Connor Date: Wed, 7 Mar 2018 11:41:46 +1100 Subject: URI.unescape handles mixed Unicode/escaped input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, URI.enscape could handle Unicode input (without any actual escaped characters), or input with escaped characters (but no actual Unicode characters) - not both. URI.unescape("\xe3\x83\x90") # => "バ" URI.unescape("%E3%83%90") # => "バ" URI.unescape("\xe3\x83\x90%E3%83%90") # => # Encoding::CompatibilityError We need to let `gsub` handle this for us, and then force back to the original encoding of the input. The result String will be mangled if the percent-encoded characters don't conform to the encoding of the String itself, but that goes without saying. Signed-off-by: Ashe Connor --- activesupport/lib/active_support/core_ext/uri.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'activesupport/lib/active_support/core_ext/uri.rb') diff --git a/activesupport/lib/active_support/core_ext/uri.rb b/activesupport/lib/active_support/core_ext/uri.rb index c93c0b5c2d..c4ac0baa32 100644 --- a/activesupport/lib/active_support/core_ext/uri.rb +++ b/activesupport/lib/active_support/core_ext/uri.rb @@ -13,7 +13,7 @@ unless str == parser.unescape(parser.escape(str)) # YK: My initial experiments say yes, but let's be sure please enc = str.encoding enc = Encoding::UTF_8 if enc == Encoding::US_ASCII - str.gsub(escaped) { |match| [match[1, 2].hex].pack("C") }.force_encoding(enc) + str.dup.force_encoding(Encoding::ASCII_8BIT).gsub(escaped) { |match| [match[1, 2].hex].pack("C") }.force_encoding(enc) end end end -- cgit v1.2.3