aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/inflector
diff options
context:
space:
mode:
authorNorman Clarke <norman@njclarke.com>2010-04-12 12:44:25 -0300
committerJeremy Kemper <jeremy@bitsweat.net>2010-04-12 23:19:39 -0700
commitdceef0828a23e8298dd9a9aab1a33c49e84f17d6 (patch)
treec3fcba59013a1f543df7cfffda35c0ef4688b010 /activesupport/lib/active_support/inflector
parent36f3634a6afbaf36015abb531d6bea6360654b81 (diff)
downloadrails-dceef0828a23e8298dd9a9aab1a33c49e84f17d6.tar.gz
rails-dceef0828a23e8298dd9a9aab1a33c49e84f17d6.tar.bz2
rails-dceef0828a23e8298dd9a9aab1a33c49e84f17d6.zip
Improve reliability of Inflector.transliterate. [#4374 state:resolved]
Signed-off-by: Jeremy Kemper <jeremy@bitsweat.net>
Diffstat (limited to 'activesupport/lib/active_support/inflector')
-rw-r--r--activesupport/lib/active_support/inflector/transliterate.rb61
1 files changed, 37 insertions, 24 deletions
diff --git a/activesupport/lib/active_support/inflector/transliterate.rb b/activesupport/lib/active_support/inflector/transliterate.rb
index ca591abc7d..9c99dcfb01 100644
--- a/activesupport/lib/active_support/inflector/transliterate.rb
+++ b/activesupport/lib/active_support/inflector/transliterate.rb
@@ -1,32 +1,47 @@
# encoding: utf-8
-require 'iconv'
-require 'kconv'
require 'active_support/core_ext/string/multibyte'
module ActiveSupport
module Inflector
extend self
-
- # Replaces accented characters with their ascii equivalents.
- def transliterate(string)
- Iconv.iconv('ascii//ignore//translit', 'utf-8', string).to_s
- end
- if RUBY_VERSION >= '1.9'
- undef_method :transliterate
- def transliterate(string)
- proxy = ActiveSupport::Multibyte.proxy_class.new(string)
- proxy.normalize(:kd).gsub(/[^\x00-\x7F]+/, '')
- end
+ # UTF-8 byte => ASCII approximate UTF-8 byte(s)
+ ASCII_APPROXIMATIONS = {
+ 198 => [65, 69], # Æ => AE
+ 208 => 68, # Ð => D
+ 216 => 79, # Ø => O
+ 222 => [84, 104], # Þ => Þ
+ 223 => [115, 115], # ß => ss
+ 230 => [97, 101], # æ => ae
+ 240 => 100, # ð => d
+ 248 => 111, # ø => o
+ 254 => [116, 104], # þ => th
+ 272 => 68, # Đ => D
+ 273 => 100, # đ => đ
+ 294 => 72, # Ħ => H
+ 295 => 104, # ħ => h
+ 305 => 105, # ı => i
+ 306 => [73, 74], # IJ =>IJ
+ 307 => [105, 106], # ij => ij
+ 312 => 107, # ĸ => k
+ 319 => 76, # Ŀ => L
+ 320 => 108, # ŀ => l
+ 321 => 76, # Ł => L
+ 322 => 108, # ł => l
+ 329 => 110, # ʼn => n
+ 330 => [78, 71], # Ŋ => NG
+ 331 => [110, 103], # ŋ => ng
+ 338 => [79, 69], # Π=> OE
+ 339 => [111, 101], # œ => oe
+ 358 => 84, # Ŧ => T
+ 359 => 116 # ŧ => t
+ }
- # The iconv transliteration code doesn't function correctly
- # on some platforms, but it's very fast where it does function.
- elsif "foo" != (Inflector.transliterate("föö") rescue nil)
- undef_method :transliterate
- def transliterate(string)
- string.mb_chars.normalize(:kd). # Decompose accented characters
- gsub(/[^\x00-\x7F]+/, '') # Remove anything non-ASCII entirely (e.g. diacritics).
- end
+ # Replaces accented characters with an ASCII approximation, or deletes it if none exsits.
+ def transliterate(string)
+ ActiveSupport::Multibyte::Chars.new(string).tidy_bytes.normalize(:d).unpack("U*").map do |char|
+ ASCII_APPROXIMATIONS[char] || (char if char < 128)
+ end.compact.flatten.pack("U*")
end
# Replaces special characters in a string so that it may be used as part of a 'pretty' URL.
@@ -45,8 +60,6 @@ module ActiveSupport
# <%= link_to(@person.name, person_path(@person)) %>
# # => <a href="/person/1-donald-e-knuth">Donald E. Knuth</a>
def parameterize(string, sep = '-')
- # remove malformed utf8 characters
- string = string.toutf8 unless string.is_utf8?
# replace accented chars with their ascii equivalents
parameterized_string = transliterate(string)
# Turn unwanted chars into the separator
@@ -59,6 +72,6 @@ module ActiveSupport
parameterized_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '')
end
parameterized_string.downcase
- end
+ end
end
end