aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport
diff options
context:
space:
mode:
authorCliff Pruitt <cliff.pruitt@cliffpruitt.com>2019-07-25 14:51:42 -0400
committerCliff Pruitt <cliff.pruitt@cliffpruitt.com>2019-07-26 12:18:01 -0400
commit369daa530dd9db7bbba79b8c75012e0fa84c9f48 (patch)
tree2f1acc6f4148dd0f39664c7a6ab703d3f5069e08 /activesupport
parent0cdaa38dd190cae1f13f0f22ab3d1002b6a82081 (diff)
downloadrails-369daa530dd9db7bbba79b8c75012e0fa84c9f48.tar.gz
rails-369daa530dd9db7bbba79b8c75012e0fa84c9f48.tar.bz2
rails-369daa530dd9db7bbba79b8c75012e0fa84c9f48.zip
Handle US-ASCII strings with invalid characters in transliterate
US-ASCII is a subset of UTF-8 so we can temporarily convert US-ASCII strings to UTF-8 to perform the transliteration. After we've converted characters to ASCII representations, we can set the encoding back to US-ASCII to return the same encoding we accepted.
Diffstat (limited to 'activesupport')
-rw-r--r--activesupport/lib/active_support/inflector/transliterate.rb17
-rw-r--r--activesupport/test/transliterate_test.rb28
2 files changed, 26 insertions, 19 deletions
diff --git a/activesupport/lib/active_support/inflector/transliterate.rb b/activesupport/lib/active_support/inflector/transliterate.rb
index 61651ba101..a6f57d73ac 100644
--- a/activesupport/lib/active_support/inflector/transliterate.rb
+++ b/activesupport/lib/active_support/inflector/transliterate.rb
@@ -61,13 +61,26 @@ module ActiveSupport
# supported and will raise an ArgumentError.
def transliterate(string, replacement = "?", locale: nil)
raise ArgumentError, "Can only transliterate strings. Received #{string.class.name}" unless string.is_a?(String)
- raise ArgumentError, "Can not transliterate strings with ASCII-8BIT encoding" if string.encoding == ::Encoding::ASCII_8BIT
- I18n.transliterate(
+ allowed_encodings = [Encoding::UTF_8, Encoding::US_ASCII, Encoding::GB18030]
+ raise ArgumentError, "Can not transliterate strings with #{string.encoding} encoding" unless allowed_encodings.include?(string.encoding)
+
+ input_encoding = string.encoding
+
+ # US-ASCII is a subset so we'll force encoding as UTF-8 if US-ASCII is given
+ # This way we can hancle invalid bytes in the same way as we do for UTF-8
+ string.force_encoding(Encoding::UTF_8) if string.encoding == Encoding::US_ASCII
+
+ transliterated = I18n.transliterate(
ActiveSupport::Multibyte::Unicode.tidy_bytes(string).unicode_normalize(:nfc),
replacement: replacement,
locale: locale
)
+
+ # If we were given US-ASCII we give back US-ASCII
+ transliterated.force_encoding(Encoding::US_ASCII) if input_encoding == Encoding::US_ASCII
+
+ transliterated
end
# Replaces special characters in a string so that it may be used as part of
diff --git a/activesupport/test/transliterate_test.rb b/activesupport/test/transliterate_test.rb
index 4101e4878a..47830946bf 100644
--- a/activesupport/test/transliterate_test.rb
+++ b/activesupport/test/transliterate_test.rb
@@ -67,7 +67,9 @@ class TransliterateTest < ActiveSupport::TestCase
# Valid US-ASCII Works
def test_transliterate_handles_strings_with_valid_us_ascii_encodings
string = String.new("A", encoding: Encoding::US_ASCII)
- assert_equal "A", ActiveSupport::Inflector.transliterate(string)
+ transcoded = ActiveSupport::Inflector.transliterate(string)
+ assert_equal "A", transcoded
+ assert_equal Encoding::US_ASCII, transcoded.encoding
end
# Valid GB18030 Works
@@ -76,20 +78,19 @@ class TransliterateTest < ActiveSupport::TestCase
assert_equal "A", ActiveSupport::Inflector.transliterate(string)
end
- # All other encodings raise exceptions
+ # All other encodings raise argument errors
def test_transliterate_handles_strings_with_incompatible_encodings
incompatible_encodings = Encoding.list - [
Encoding::UTF_8,
Encoding::US_ASCII,
- Encoding::GB18030,
+ Encoding::GB18030
]
- # This Raises an argument error
- incompatible_encodings -= [Encoding::ASCII_8BIT]
incompatible_encodings.each do |encoding|
string = String.new("", encoding: encoding)
- exception = assert_raises Encoding::CompatibilityError do
+ exception = assert_raises ArgumentError do
ActiveSupport::Inflector.transliterate(string)
end
+ assert_equal "Can not transliterate strings with #{encoding} encoding", exception.message
end
end
@@ -102,9 +103,10 @@ class TransliterateTest < ActiveSupport::TestCase
# Invalid raises exception
def test_transliterate_handles_strings_with_invalid_us_ascii_bytes
string = String.new("\255", encoding: Encoding::US_ASCII)
- exception = assert_raises Encoding::CompatibilityError do
- ActiveSupport::Inflector.transliterate(string)
- end
+ # exception = assert_raises Encoding::CompatibilityError do
+ # ActiveSupport::Inflector.transliterate(string)
+ # end
+ assert_equal "?", ActiveSupport::Inflector.transliterate(string)
end
# Invalid GB18030 raises exception
@@ -114,12 +116,4 @@ class TransliterateTest < ActiveSupport::TestCase
ActiveSupport::Inflector.transliterate(string)
end
end
-
- def test_transliterate_handles_ascci_8bit_strings
- ascii_8bit_string = "A".b
- exception = assert_raises ArgumentError do
- ActiveSupport::Inflector.transliterate(ascii_8bit_string)
- end
- assert_equal "Can not transliterate strings with ASCII-8BIT encoding", exception.message
- end
end