From 05f9e3ef92114c2df978009073d0b47fe1323eb7 Mon Sep 17 00:00:00 2001 From: Cliff Pruitt Date: Tue, 16 Jul 2019 14:41:51 -0400 Subject: Make UTF-8 string requirement explicit for `transliterate` It's noted in #34062 that String#parameterize will raise an `Encoding::CompatibilityError` if the string is not UTF-8 encoded. The error is raised as a result of passing the string to `.unicode_normalize`. This PR raises a higher level `ArgumentError` if the provided string is not UTF-8 and updates documentation to note the encoding requirement. --- activesupport/lib/active_support/inflector/transliterate.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'activesupport/lib') diff --git a/activesupport/lib/active_support/inflector/transliterate.rb b/activesupport/lib/active_support/inflector/transliterate.rb index ec6e9ccb59..ea7161a6ba 100644 --- a/activesupport/lib/active_support/inflector/transliterate.rb +++ b/activesupport/lib/active_support/inflector/transliterate.rb @@ -5,8 +5,9 @@ require "active_support/i18n" module ActiveSupport module Inflector - # Replaces non-ASCII characters with an ASCII approximation, or if none - # exists, a replacement character which defaults to "?". + # Replaces non-ASCII characters in a UTF-8 encoded string with an ASCII + # approximation, or if none exists, a replacement character which + # defaults to "?". # # transliterate('Ærøskøbing') # # => "AEroskobing" @@ -56,8 +57,12 @@ module ActiveSupport # # transliterate('Jürgen', locale: :de) # # => "Juergen" + # + # This method requires that `string` be UTF-8 encoded. Passing an argument + # with a different string encoding will raise an ArgumentError. def transliterate(string, replacement = "?", locale: nil) raise ArgumentError, "Can only transliterate strings. Received #{string.class.name}" unless string.is_a?(String) + raise ArgumentError, "Can only transliterate UTF-8 strings. Received string with encoding #{string.encoding}" unless string.encoding == ::Encoding::UTF_8 I18n.transliterate( ActiveSupport::Multibyte::Unicode.tidy_bytes(string).unicode_normalize(:nfc), -- cgit v1.2.3