aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/inflector/transliterate.rb
diff options
context:
space:
mode:
authorXavier Noria <fxn@hashref.com>2010-05-01 02:23:47 -0700
committerXavier Noria <fxn@hashref.com>2010-05-01 02:23:47 -0700
commitefba1d4227514a6ce4880910a6531b0a6c3c75aa (patch)
treedaff4155b8c19b915125ac0b0da458279358e743 /activesupport/lib/active_support/inflector/transliterate.rb
parent81807e0fe2879e08563c91ee6809ab6d1d0bd081 (diff)
parent6c280f3398966ffba45069500ff43d632513fe44 (diff)
downloadrails-efba1d4227514a6ce4880910a6531b0a6c3c75aa.tar.gz
rails-efba1d4227514a6ce4880910a6531b0a6c3c75aa.tar.bz2
rails-efba1d4227514a6ce4880910a6531b0a6c3c75aa.zip
Merge commit 'rails/master'
Conflicts: railties/guides/source/index.html.erb
Diffstat (limited to 'activesupport/lib/active_support/inflector/transliterate.rb')
-rw-r--r--activesupport/lib/active_support/inflector/transliterate.rb94
1 files changed, 56 insertions, 38 deletions
diff --git a/activesupport/lib/active_support/inflector/transliterate.rb b/activesupport/lib/active_support/inflector/transliterate.rb
index 9c99dcfb01..5ec87372d0 100644
--- a/activesupport/lib/active_support/inflector/transliterate.rb
+++ b/activesupport/lib/active_support/inflector/transliterate.rb
@@ -3,45 +3,62 @@ require 'active_support/core_ext/string/multibyte'
module ActiveSupport
module Inflector
- extend self
- # UTF-8 byte => ASCII approximate UTF-8 byte(s)
- ASCII_APPROXIMATIONS = {
- 198 => [65, 69], # Æ => AE
- 208 => 68, # Ð => D
- 216 => 79, # Ø => O
- 222 => [84, 104], # Þ => Þ
- 223 => [115, 115], # ß => ss
- 230 => [97, 101], # æ => ae
- 240 => 100, # ð => d
- 248 => 111, # ø => o
- 254 => [116, 104], # þ => th
- 272 => 68, # Đ => D
- 273 => 100, # đ => đ
- 294 => 72, # Ħ => H
- 295 => 104, # ħ => h
- 305 => 105, # ı => i
- 306 => [73, 74], # IJ =>IJ
- 307 => [105, 106], # ij => ij
- 312 => 107, # ĸ => k
- 319 => 76, # Ŀ => L
- 320 => 108, # ŀ => l
- 321 => 76, # Ł => L
- 322 => 108, # ł => l
- 329 => 110, # ʼn => n
- 330 => [78, 71], # Ŋ => NG
- 331 => [110, 103], # ŋ => ng
- 338 => [79, 69], # Œ => OE
- 339 => [111, 101], # œ => oe
- 358 => 84, # Ŧ => T
- 359 => 116 # ŧ => t
- }
-
- # Replaces accented characters with an ASCII approximation, or deletes it if none exsits.
- def transliterate(string)
- ActiveSupport::Multibyte::Chars.new(string).tidy_bytes.normalize(:d).unpack("U*").map do |char|
- ASCII_APPROXIMATIONS[char] || (char if char < 128)
- end.compact.flatten.pack("U*")
+ # Replaces non-ASCII characters with an ASCII approximation, or if none
+ # exists, a replacement character which defaults to "?".
+ #
+ # transliterate("Ærøskøbing")
+ # # => "AEroskobing"
+ #
+ # Default approximations are provided for Western/Latin characters,
+ # e.g, "ø", "ñ", "é", "ß", etc.
+ #
+ # This method is I18n aware, so you can set up custom approximations for a
+ # locale. This can be useful, for example, to transliterate German's "ü"
+ # and "ö" to "ue" and "oe", or to add support for transliterating Russian
+ # to ASCII.
+ #
+ # In order to make your custom transliterations available, you must set
+ # them as the <tt>i18n.transliterate.rule</tt> i18n key:
+ #
+ # # Store the transliterations in locales/de.yml
+ # i18n:
+ # transliterate:
+ # ü: "ue"
+ # ö: "oe"
+ #
+ # # Or set them using Ruby
+ # I18n.backend.store_translations(:de, :i18n => {
+ # :transliterate => {
+ # :rule => {
+ # "ü" => "ue",
+ # "ö" => "oe"
+ # }
+ # }
+ # })
+ #
+ # The value for <tt>i18n.transliterate.rule</tt> can be a simple Hash that maps
+ # characters to ASCII approximations as shown above, or, for more complex
+ # requirements, a Proc:
+ #
+ # I18n.backend.store_translations(:de, :i18n => {
+ # :transliterate => {
+ # :rule => lambda {|string| MyTransliterator.transliterate(string)}
+ # }
+ # })
+ #
+ # Now you can have different transliterations for each locale:
+ #
+ # I18n.locale = :en
+ # transliterate("Jürgen")
+ # # => "Jurgen"
+ #
+ # I18n.locale = :de
+ # transliterate("Jürgen")
+ # # => "Juergen"
+ def transliterate(string, replacement = "?")
+ I18n.transliterate(Multibyte::Chars.normalize(
+ Multibyte::Chars.tidy_bytes(string), :c), :replacement => replacement)
end
# Replaces special characters in a string so that it may be used as part of a 'pretty' URL.
@@ -73,5 +90,6 @@ module ActiveSupport
end
parameterized_string.downcase
end
+
end
end