diff options
author | Norman Clarke <norman@njclarke.com> | 2010-04-12 12:44:25 -0300 |
---|---|---|
committer | Jeremy Kemper <jeremy@bitsweat.net> | 2010-04-12 23:19:39 -0700 |
commit | dceef0828a23e8298dd9a9aab1a33c49e84f17d6 (patch) | |
tree | c3fcba59013a1f543df7cfffda35c0ef4688b010 /activesupport/lib | |
parent | 36f3634a6afbaf36015abb531d6bea6360654b81 (diff) | |
download | rails-dceef0828a23e8298dd9a9aab1a33c49e84f17d6.tar.gz rails-dceef0828a23e8298dd9a9aab1a33c49e84f17d6.tar.bz2 rails-dceef0828a23e8298dd9a9aab1a33c49e84f17d6.zip |
Improve reliability of Inflector.transliterate. [#4374 state:resolved]
Signed-off-by: Jeremy Kemper <jeremy@bitsweat.net>
Diffstat (limited to 'activesupport/lib')
-rw-r--r-- | activesupport/lib/active_support/inflector/transliterate.rb | 61 |
1 files changed, 37 insertions, 24 deletions
diff --git a/activesupport/lib/active_support/inflector/transliterate.rb b/activesupport/lib/active_support/inflector/transliterate.rb index ca591abc7d..9c99dcfb01 100644 --- a/activesupport/lib/active_support/inflector/transliterate.rb +++ b/activesupport/lib/active_support/inflector/transliterate.rb @@ -1,32 +1,47 @@ # encoding: utf-8 -require 'iconv' -require 'kconv' require 'active_support/core_ext/string/multibyte' module ActiveSupport module Inflector extend self - - # Replaces accented characters with their ascii equivalents. - def transliterate(string) - Iconv.iconv('ascii//ignore//translit', 'utf-8', string).to_s - end - if RUBY_VERSION >= '1.9' - undef_method :transliterate - def transliterate(string) - proxy = ActiveSupport::Multibyte.proxy_class.new(string) - proxy.normalize(:kd).gsub(/[^\x00-\x7F]+/, '') - end + # UTF-8 byte => ASCII approximate UTF-8 byte(s) + ASCII_APPROXIMATIONS = { + 198 => [65, 69], # Æ => AE + 208 => 68, # Ð => D + 216 => 79, # Ø => O + 222 => [84, 104], # Þ => Þ + 223 => [115, 115], # ß => ss + 230 => [97, 101], # æ => ae + 240 => 100, # ð => d + 248 => 111, # ø => o + 254 => [116, 104], # þ => th + 272 => 68, # Đ => D + 273 => 100, # đ => đ + 294 => 72, # Ħ => H + 295 => 104, # ħ => h + 305 => 105, # ı => i + 306 => [73, 74], # IJ =>IJ + 307 => [105, 106], # ij => ij + 312 => 107, # ĸ => k + 319 => 76, # Ŀ => L + 320 => 108, # ŀ => l + 321 => 76, # Ł => L + 322 => 108, # ł => l + 329 => 110, # ʼn => n + 330 => [78, 71], # Ŋ => NG + 331 => [110, 103], # ŋ => ng + 338 => [79, 69], # Œ => OE + 339 => [111, 101], # œ => oe + 358 => 84, # Ŧ => T + 359 => 116 # ŧ => t + } - # The iconv transliteration code doesn't function correctly - # on some platforms, but it's very fast where it does function. - elsif "foo" != (Inflector.transliterate("föö") rescue nil) - undef_method :transliterate - def transliterate(string) - string.mb_chars.normalize(:kd). # Decompose accented characters - gsub(/[^\x00-\x7F]+/, '') # Remove anything non-ASCII entirely (e.g. diacritics). - end + # Replaces accented characters with an ASCII approximation, or deletes it if none exsits. + def transliterate(string) + ActiveSupport::Multibyte::Chars.new(string).tidy_bytes.normalize(:d).unpack("U*").map do |char| + ASCII_APPROXIMATIONS[char] || (char if char < 128) + end.compact.flatten.pack("U*") end # Replaces special characters in a string so that it may be used as part of a 'pretty' URL. @@ -45,8 +60,6 @@ module ActiveSupport # <%= link_to(@person.name, person_path(@person)) %> # # => <a href="/person/1-donald-e-knuth">Donald E. Knuth</a> def parameterize(string, sep = '-') - # remove malformed utf8 characters - string = string.toutf8 unless string.is_utf8? # replace accented chars with their ascii equivalents parameterized_string = transliterate(string) # Turn unwanted chars into the separator @@ -59,6 +72,6 @@ module ActiveSupport parameterized_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '') end parameterized_string.downcase - end + end end end |