From dceef0828a23e8298dd9a9aab1a33c49e84f17d6 Mon Sep 17 00:00:00 2001 From: Norman Clarke Date: Mon, 12 Apr 2010 12:44:25 -0300 Subject: Improve reliability of Inflector.transliterate. [#4374 state:resolved] Signed-off-by: Jeremy Kemper --- activesupport/test/inflector_test_cases.rb | 5 ++- activesupport/test/transliterate_test.rb | 50 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 activesupport/test/transliterate_test.rb (limited to 'activesupport/test') diff --git a/activesupport/test/inflector_test_cases.rb b/activesupport/test/inflector_test_cases.rb index 29f87ac302..59515dad32 100644 --- a/activesupport/test/inflector_test_cases.rb +++ b/activesupport/test/inflector_test_cases.rb @@ -188,7 +188,10 @@ module InflectorTestCases StringToParameterizedAndNormalized = { "Malmö" => "malmo", "Garçons" => "garcons", - "Ops\331" => "ops" + "Ops\331" => "opsu", + "Ærøskøbing" => "aeroskobing", + "Aßlar" => "asslar", + "Japanese: 日本語" => "japanese" } UnderscoreToHuman = { diff --git a/activesupport/test/transliterate_test.rb b/activesupport/test/transliterate_test.rb new file mode 100644 index 0000000000..d689b6be73 --- /dev/null +++ b/activesupport/test/transliterate_test.rb @@ -0,0 +1,50 @@ +# encoding: utf-8 +require 'abstract_unit' +require 'active_support/inflector/transliterate' + +class TransliterateTest < Test::Unit::TestCase + + APPROXIMATIONS = { + "À"=>"A", "Á"=>"A", "Â"=>"A", "Ã"=>"A", "Ä"=>"A", "Å"=>"A", "Æ"=>"AE", + "Ç"=>"C", "È"=>"E", "É"=>"E", "Ê"=>"E", "Ë"=>"E", "Ì"=>"I", "Í"=>"I", + "Î"=>"I", "Ï"=>"I", "Ð"=>"D", "Ñ"=>"N", "Ò"=>"O", "Ó"=>"O", "Ô"=>"O", + "Õ"=>"O", "Ö"=>"O", "Ø"=>"O", "Ù"=>"U", "Ú"=>"U", "Û"=>"U", "Ü"=>"U", + "Ý"=>"Y", "Þ"=>"Th", "ß"=>"ss", "à"=>"a", "á"=>"a", "â"=>"a", "ã"=>"a", + "ä"=>"a", "å"=>"a", "æ"=>"ae", "ç"=>"c", "è"=>"e", "é"=>"e", "ê"=>"e", + "ë"=>"e", "ì"=>"i", "í"=>"i", "î"=>"i", "ï"=>"i", "ð"=>"d", "ñ"=>"n", + "ò"=>"o", "ó"=>"o", "ô"=>"o", "õ"=>"o", "ö"=>"o", "ø"=>"o", "ù"=>"u", + "ú"=>"u", "û"=>"u", "ü"=>"u", "ý"=>"y", "þ"=>"th", "ÿ"=>"y", "Ā"=>"A", + "ā"=>"a", "Ă"=>"A", "ă"=>"a", "Ą"=>"A", "ą"=>"a", "Ć"=>"C", "ć"=>"c", + "Ĉ"=>"C", "ĉ"=>"c", "Ċ"=>"C", "ċ"=>"c", "Č"=>"C", "č"=>"c", "Ď"=>"D", + "ď"=>"d", "Đ"=>"D", "đ"=>"d", "Ē"=>"E", "ē"=>"e", "Ĕ"=>"E", "ĕ"=>"e", + "Ė"=>"E", "ė"=>"e", "Ę"=>"E", "ę"=>"e", "Ě"=>"E", "ě"=>"e", "Ĝ"=>"G", + "ĝ"=>"g", "Ğ"=>"G", "ğ"=>"g", "Ġ"=>"G", "ġ"=>"g", "Ģ"=>"G", "ģ"=>"g", + "Ĥ"=>"H", "ĥ"=>"h", "Ħ"=>"H", "ħ"=>"h", "Ĩ"=>"I", "ĩ"=>"i", "Ī"=>"I", + "ī"=>"i", "Ĭ"=>"I", "ĭ"=>"i", "Į"=>"I", "į"=>"i", "İ"=>"I", "ı"=>"i", + "IJ"=>"IJ", "ij"=>"ij", "Ĵ"=>"J", "ĵ"=>"j", "Ķ"=>"K", "ķ"=>"k", "ĸ"=>"k", + "Ĺ"=>"L", "ĺ"=>"l", "Ļ"=>"L", "ļ"=>"l", "Ľ"=>"L", "ľ"=>"l", "Ŀ"=>"L", + "ŀ"=>"l", "Ł"=>"L", "ł"=>"l", "Ń"=>"N", "ń"=>"n", "Ņ"=>"N", "ņ"=>"n", + "Ň"=>"N", "ň"=>"n", "ʼn"=>"n", "Ŋ"=>"NG", "ŋ"=>"ng", "Ō"=>"O", "ō"=>"o", + "Ŏ"=>"O", "ŏ"=>"o", "Ő"=>"O", "ő"=>"o", "Œ"=>"OE", "œ"=>"oe", "Ŕ"=>"R", + "ŕ"=>"r", "Ŗ"=>"R", "ŗ"=>"r", "Ř"=>"R", "ř"=>"r", "Ś"=>"S", "ś"=>"s", + "Ŝ"=>"S", "ŝ"=>"s", "Ş"=>"S", "ş"=>"s", "Š"=>"S", "š"=>"s", "Ţ"=>"T", + "ţ"=>"t", "Ť"=>"T", "ť"=>"t", "Ŧ"=>"T", "ŧ"=>"t", "Ũ"=>"U", "ũ"=>"u", + "Ū"=>"U", "ū"=>"u", "Ŭ"=>"U", "ŭ"=>"u", "Ů"=>"U", "ů"=>"u", "Ű"=>"U", + "ű"=>"u", "Ų"=>"U", "ų"=>"u", "Ŵ"=>"W", "ŵ"=>"w", "Ŷ"=>"Y", "ŷ"=>"y", + "Ÿ"=>"Y", "Ź"=>"Z", "ź"=>"z", "Ż"=>"Z", "ż"=>"z", "Ž"=>"Z", "ž"=>"z" + } + + def test_transliterate_should_not_change_ascii_chars + (0..127).each do |byte| + char = [byte].pack("U") + assert_equal char, ActiveSupport::Inflector.transliterate(char) + end + end + + def test_should_convert_accented_chars_to_approximate_ascii_chars + APPROXIMATIONS.each do |given, expected| + assert_equal expected, ActiveSupport::Inflector.transliterate(given) + end + end + +end -- cgit v1.2.3