aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCliff Pruitt <cliff.pruitt@cliffpruitt.com>2019-07-16 14:41:51 -0400
committerCliff Pruitt <cliff.pruitt@cliffpruitt.com>2019-07-16 14:51:57 -0400
commit05f9e3ef92114c2df978009073d0b47fe1323eb7 (patch)
tree2bfb7197f1bc81ed23628c6e2828631fd3cd8c26
parent85b422bd7fcb93c6a3d13f78fca55b85f69865a1 (diff)
downloadrails-05f9e3ef92114c2df978009073d0b47fe1323eb7.tar.gz
rails-05f9e3ef92114c2df978009073d0b47fe1323eb7.tar.bz2
rails-05f9e3ef92114c2df978009073d0b47fe1323eb7.zip
Make UTF-8 string requirement explicit for `transliterate`
It's noted in #34062 that String#parameterize will raise an `Encoding::CompatibilityError` if the string is not UTF-8 encoded. The error is raised as a result of passing the string to `.unicode_normalize`. This PR raises a higher level `ArgumentError` if the provided string is not UTF-8 and updates documentation to note the encoding requirement.
-rw-r--r--activesupport/lib/active_support/inflector/transliterate.rb9
-rw-r--r--activesupport/test/transliterate_test.rb8
2 files changed, 15 insertions, 2 deletions
diff --git a/activesupport/lib/active_support/inflector/transliterate.rb b/activesupport/lib/active_support/inflector/transliterate.rb
index ec6e9ccb59..ea7161a6ba 100644
--- a/activesupport/lib/active_support/inflector/transliterate.rb
+++ b/activesupport/lib/active_support/inflector/transliterate.rb
@@ -5,8 +5,9 @@ require "active_support/i18n"
module ActiveSupport
module Inflector
- # Replaces non-ASCII characters with an ASCII approximation, or if none
- # exists, a replacement character which defaults to "?".
+ # Replaces non-ASCII characters in a UTF-8 encoded string with an ASCII
+ # approximation, or if none exists, a replacement character which
+ # defaults to "?".
#
# transliterate('Ærøskøbing')
# # => "AEroskobing"
@@ -56,8 +57,12 @@ module ActiveSupport
#
# transliterate('Jürgen', locale: :de)
# # => "Juergen"
+ #
+ # This method requires that `string` be UTF-8 encoded. Passing an argument
+ # with a different string encoding will raise an ArgumentError.
def transliterate(string, replacement = "?", locale: nil)
raise ArgumentError, "Can only transliterate strings. Received #{string.class.name}" unless string.is_a?(String)
+ raise ArgumentError, "Can only transliterate UTF-8 strings. Received string with encoding #{string.encoding}" unless string.encoding == ::Encoding::UTF_8
I18n.transliterate(
ActiveSupport::Multibyte::Unicode.tidy_bytes(string).unicode_normalize(:nfc),
diff --git a/activesupport/test/transliterate_test.rb b/activesupport/test/transliterate_test.rb
index 9e29a93ea0..525b4a8559 100644
--- a/activesupport/test/transliterate_test.rb
+++ b/activesupport/test/transliterate_test.rb
@@ -57,4 +57,12 @@ class TransliterateTest < ActiveSupport::TestCase
end
assert_equal "Can only transliterate strings. Received Object", exception.message
end
+
+ def test_transliterate_handles_non_unicode_strings
+ ascii_8bit_string = "A".b
+ exception = assert_raises ArgumentError do
+ assert_equal "A", ActiveSupport::Inflector.transliterate(ascii_8bit_string)
+ end
+ assert_equal "Can only transliterate UTF-8 strings. Received string with encoding ASCII-8BIT", exception.message
+ end
end