diff options
Diffstat (limited to 'activesupport/lib/active_support/inflector')
3 files changed, 26 insertions, 4 deletions
diff --git a/activesupport/lib/active_support/inflector/inflections.rb b/activesupport/lib/active_support/inflector/inflections.rb index 88cdd99dbd..efee74a1df 100644 --- a/activesupport/lib/active_support/inflector/inflections.rb +++ b/activesupport/lib/active_support/inflector/inflections.rb @@ -2,7 +2,6 @@ require "concurrent/map" require "active_support/i18n" -require "active_support/deprecation" module ActiveSupport module Inflector @@ -230,7 +229,6 @@ module ActiveSupport end private - def define_acronym_regex_patterns @acronym_regex = @acronyms.empty? ? /(?=a)b/ : /#{@acronyms.values.join("|")}/ @acronyms_camelize_regex = /^(?:#{@acronym_regex}(?=\b|[A-Z_])|\w)/ diff --git a/activesupport/lib/active_support/inflector/methods.rb b/activesupport/lib/active_support/inflector/methods.rb index ee193add6f..18f3f53879 100644 --- a/activesupport/lib/active_support/inflector/methods.rb +++ b/activesupport/lib/active_support/inflector/methods.rb @@ -359,7 +359,6 @@ module ActiveSupport end private - # Mounts a regular expression, returned as a string to ease interpolation, # that will match part by part the given constant. # diff --git a/activesupport/lib/active_support/inflector/transliterate.rb b/activesupport/lib/active_support/inflector/transliterate.rb index ec6e9ccb59..0751f8a3ad 100644 --- a/activesupport/lib/active_support/inflector/transliterate.rb +++ b/activesupport/lib/active_support/inflector/transliterate.rb @@ -56,14 +56,39 @@ module ActiveSupport # # transliterate('Jürgen', locale: :de) # # => "Juergen" + # + # Transliteration is restricted to UTF-8, US-ASCII and GB18030 strings + # Other encodings will raise an ArgumentError. def transliterate(string, replacement = "?", locale: nil) raise ArgumentError, "Can only transliterate strings. Received #{string.class.name}" unless string.is_a?(String) - I18n.transliterate( + allowed_encodings = [Encoding::UTF_8, Encoding::US_ASCII, Encoding::GB18030] + raise ArgumentError, "Can not transliterate strings with #{string.encoding} encoding" unless allowed_encodings.include?(string.encoding) + + input_encoding = string.encoding + + # US-ASCII is a subset of UTF-8 so we'll force encoding as UTF-8 if + # US-ASCII is given. This way we can let tidy_bytes handle the string + # in the same way as we do for UTF-8 + string.force_encoding(Encoding::UTF_8) if string.encoding == Encoding::US_ASCII + + # GB18030 is Unicode compatible but is not a direct mapping so needs to be + # transcoded. Using invalid/undef :replace will result in loss of data in + # the event of invalid characters, but since tidy_bytes will replace + # invalid/undef with a "?" we're safe to do the same beforehand + string.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace) if string.encoding == Encoding::GB18030 + + transliterated = I18n.transliterate( ActiveSupport::Multibyte::Unicode.tidy_bytes(string).unicode_normalize(:nfc), replacement: replacement, locale: locale ) + + # Restore the string encoding of the input if it was not UTF-8. + # Apply invalid/undef :replace as tidy_bytes does + transliterated.encode!(input_encoding, invalid: :replace, undef: :replace) if input_encoding != transliterated.encoding + + transliterated end # Replaces special characters in a string so that it may be used as part of |