diff options
Diffstat (limited to 'activesupport/lib/active_support/inflector/transliterate.rb')
-rw-r--r-- | activesupport/lib/active_support/inflector/transliterate.rb | 65 |
1 files changed, 48 insertions, 17 deletions
diff --git a/activesupport/lib/active_support/inflector/transliterate.rb b/activesupport/lib/active_support/inflector/transliterate.rb index edea142e82..0d2a17970f 100644 --- a/activesupport/lib/active_support/inflector/transliterate.rb +++ b/activesupport/lib/active_support/inflector/transliterate.rb @@ -1,10 +1,10 @@ -# encoding: utf-8 -require 'active_support/core_ext/string/multibyte' -require 'active_support/i18n' +# frozen_string_literal: true + +require "active_support/core_ext/string/multibyte" +require "active_support/i18n" module ActiveSupport module Inflector - # Replaces non-ASCII characters with an ASCII approximation, or if none # exists, a replacement character which defaults to "?". # @@ -59,29 +59,60 @@ module ActiveSupport # transliterate('Jürgen') # # => "Juergen" def transliterate(string, replacement = "?") - I18n.transliterate(ActiveSupport::Multibyte::Unicode.normalize( - ActiveSupport::Multibyte::Unicode.tidy_bytes(string), :c), - :replacement => replacement) + raise ArgumentError, "Can only transliterate strings. Received #{string.class.name}" unless string.is_a?(String) + + I18n.transliterate( + ActiveSupport::Multibyte::Unicode.tidy_bytes(string).unicode_normalize(:nfc), + replacement: replacement + ) end # Replaces special characters in a string so that it may be used as part of # a 'pretty' URL. # # parameterize("Donald E. Knuth") # => "donald-e-knuth" - # parameterize("^trés|Jolie-- ") # => "tres-jolie" - def parameterize(string, sep = '-') - # replace accented chars with their ascii equivalents + # parameterize("^très|Jolie-- ") # => "tres-jolie" + # + # To use a custom separator, override the +separator+ argument. + # + # parameterize("Donald E. Knuth", separator: '_') # => "donald_e_knuth" + # parameterize("^très|Jolie__ ", separator: '_') # => "tres_jolie" + # + # To preserve the case of the characters in a string, use the +preserve_case+ argument. + # + # parameterize("Donald E. Knuth", preserve_case: true) # => "Donald-E-Knuth" + # parameterize("^très|Jolie-- ", preserve_case: true) # => "tres-Jolie" + # + # It preserves dashes and underscores unless they are used as separators: + # + # parameterize("^très|Jolie__ ") # => "tres-jolie__" + # parameterize("^très|Jolie-- ", separator: "_") # => "tres_jolie--" + # parameterize("^très_Jolie-- ", separator: ".") # => "tres_jolie--" + # + def parameterize(string, separator: "-", preserve_case: false) + # Replace accented chars with their ASCII equivalents. parameterized_string = transliterate(string) - # Turn unwanted chars into the separator - parameterized_string.gsub!(/[^a-z0-9\-_]+/i, sep) - unless sep.nil? || sep.empty? - re_sep = Regexp.escape(sep) + + # Turn unwanted chars into the separator. + parameterized_string.gsub!(/[^a-z0-9\-_]+/i, separator) + + unless separator.nil? || separator.empty? + if separator == "-" + re_duplicate_separator = /-{2,}/ + re_leading_trailing_separator = /^-|-$/i + else + re_sep = Regexp.escape(separator) + re_duplicate_separator = /#{re_sep}{2,}/ + re_leading_trailing_separator = /^#{re_sep}|#{re_sep}$/i + end # No more than one of the separator in a row. - parameterized_string.gsub!(/#{re_sep}{2,}/, sep) + parameterized_string.gsub!(re_duplicate_separator, separator) # Remove leading/trailing separator. - parameterized_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '') + parameterized_string.gsub!(re_leading_trailing_separator, "") end - parameterized_string.downcase + + parameterized_string.downcase! unless preserve_case + parameterized_string end end end |