Add ActiveSupport::Multibyte. Provides String#chars which lets you deal with strings as a sequence of chars, not of bytes. Closes #6242 [Julian Tarkhanov, Manfred Stienstra & Jan Behrens]

git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@5223 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
author: Michael Koziarski <michael@koziarski.com> 2006-10-03 23:45:32 +0000
committer: Michael Koziarski <michael@koziarski.com> 2006-10-03 23:45:32 +0000
commit: f238d495b70a264abdb864fe8107e02766b285b4 (patch)
tree: cfe1f5df118b46d1426cfc87326c26c8fbe63a85 /activesupport/test/multibyte_chars_test.rb
parent: 8cb0079feabe011b7edd1c65114efdb7047a02ec (diff)
download: rails-f238d495b70a264abdb864fe8107e02766b285b4.tar.gz
rails-f238d495b70a264abdb864fe8107e02766b285b4.tar.bz2
rails-f238d495b70a264abdb864fe8107e02766b285b4.zip
1 files changed, 163 insertions, 0 deletions
diff --git a/activesupport/test/multibyte_chars_test.rb b/activesupport/test/multibyte_chars_test.rb
new file mode 100644
index 0000000000..e5ad9d26ee
--- /dev/null
+++ b/activesupport/test/multibyte_chars_test.rb
@@ -0,0 +1,163 @@
+require File.dirname(__FILE__) + '/abstract_unit'
+
+$KCODE = 'UTF8'
+
+class CharsTest < Test::Unit::TestCase
+  
+  def setup
+    @s = {
+      :utf8 => "Abcd Блå ﬃ блa  埋",
+      :ascii => "asci ias c iia s",
+      :bytes => "\270\236\010\210\245"
+    }
+  end
+  
+  def test_sanity
+    @s.each do |t, s|
+      assert s.respond_to?(:chars), "All string should have the chars method (#{t})"
+      assert s.respond_to?(:to_s), "All string should have the to_s method (#{t})"
+      assert_kind_of ActiveSupport::Multibyte::Chars, s.chars, "#chars should return an instance of Chars (#{t})"
+    end
+  end
+  
+  def test_comparability
+    @s.each do |t, s|
+      assert_equal s, s.chars.to_s, "Chars#to_s should return enclosed string unchanged"
+    end
+    assert_nothing_raised do
+      assert_equal "a", "a", "Normal string comparisons should be unaffected"
+      assert_not_equal "a", "b", "Normal string comparisons should be unaffected"
+      assert_not_equal "a".chars, "b".chars, "Chars objects should be comparable"
+      assert_equal "a".chars, "A".downcase.chars, "Chars objects should be comparable to each other"
+      assert_equal "a".chars, "A".downcase, "Chars objects should be comparable to strings coming from elsewhere"
+    end
+    
+    assert !@s[:utf8].eql?(@s[:utf8].chars), "Strict comparison is not supported"
+    assert_equal @s[:utf8], @s[:utf8].chars, "Chars should be compared by their enclosed string"
+
+    other_string = @s[:utf8].dup
+    assert_equal other_string, @s[:utf8].chars, "Chars should be compared by their enclosed string"
+    assert_equal other_string.chars, @s[:utf8].chars, "Chars should be compared by their enclosed string"
+    
+    strings = ['builder'.chars, 'armor'.chars, 'zebra'.chars]
+    strings.sort!
+    assert_equal ['armor', 'builder', 'zebra'], strings, "Chars should be sortable based on their enclosed string"
+
+    # This leads to a StackLevelTooDeep exception if the comparison is not wired properly
+    assert_raise(NameError) do
+      Chars
+    end
+  end
+  
+  def test_utf8?
+    assert @s[:utf8].is_utf8?, "UTF-8 strings are UTF-8"
+    assert @s[:ascii].is_utf8?, "All ASCII strings are also valid UTF-8"
+    assert !@s[:bytes].is_utf8?, "This bytestring isn't UTF-8"
+  end
+  
+  # The test for the following methods are defined here because they can only be defined on the Chars class for
+  # various reasons 
+  
+  def test_gsub
+    assert_equal 'éxa', 'éda'.chars.gsub(/d/, 'x')
+    with_kcode('none') do
+      assert_equal 'éxa', 'éda'.chars.gsub(/d/, 'x')
+    end
+  end
+  
+  def test_split
+    word = "eﬃcient"
+    chars = ["e", "ﬃ", "c", "i", "e", "n", "t"]
+    assert_equal chars, word.split(//)
+    assert_equal chars, word.chars.split(//)
+    assert_kind_of ActiveSupport::Multibyte::Chars, word.chars.split(//).first, "Split should return Chars instances"
+  end
+  
+  def test_regexp
+    with_kcode('none') do
+      assert_equal 12, (@s[:utf8].chars =~ /ﬃ/),
+        "Regex matching should be bypassed to String"
+    end
+    with_kcode('UTF8') do
+      assert_equal 9, (@s[:utf8].chars =~ /ﬃ/),
+        "Regex matching should be unicode aware"
+    end
+  end
+  
+  def test_pragma
+    with_kcode('UTF8') do
+      assert " ".chars.send(:utf8_pragma?), "UTF8 pragma should be on because KCODE is UTF8"
+    end
+    with_kcode('none') do
+      assert !" ".chars.send(:utf8_pragma?), "UTF8 pragma should be off"
+    end
+  end
+  
+  def test_handler_setting
+    handler = ''.chars.handler
+    
+    ActiveSupport::Multibyte::Chars.handler = :first
+    assert_equal :first, ''.chars.handler
+    ActiveSupport::Multibyte::Chars.handler = :second
+    assert_equal :second, ''.chars.handler
+    assert_raise(NoMethodError) do
+      ''.chars.handler.split
+    end
+    
+    ActiveSupport::Multibyte::Chars.handler = handler
+  end
+  
+  def test_method_chaining
+    assert_kind_of ActiveSupport::Multibyte::Chars, ''.chars.downcase
+    assert_kind_of ActiveSupport::Multibyte::Chars, ''.chars.strip, "Strip should return a Chars object"
+    assert_kind_of ActiveSupport::Multibyte::Chars, ''.chars.downcase.strip, "The Chars object should be " +
+        "forwarded down the call path for chaining"
+    assert_equal 'foo', "  FOO   ".chars.normalize.downcase.strip, "The Chars that results from the " +
+      " operations should be comparable to the string value of the result"
+  end
+  
+  def test_passthrough_on_kcode
+    # The easiest way to check if the passthrough is in place is through #size
+    with_kcode('nonce') do
+      assert_equal 26, @s[:utf8].chars.size
+    end
+    with_kcode('UTF8') do
+      assert_equal 17, @s[:utf8].chars.size
+    end
+  end
+    
+  def test_destructiveness  
+    # Note that we're testing the destructiveness here and not the correct behaviour of the methods
+    str = 'ac'
+    str.chars.insert(1, 'b')
+    assert_equal 'abc', str, 'Insert should be destructive for a string'
+    
+    str = 'ac'
+    str.chars.reverse!
+    assert_equal 'ca', str, 'reverse! should be destructive for a string'
+  end
+  
+  def test_resilience
+    assert_nothing_raised do
+      assert_equal 1, @s[:bytes].chars.size, "There's only one valid utf-8 byte in the string"
+    end
+    assert_nothing_raised do
+      assert_equal "\010", @s[:bytes].chars.reverse, "There's only one valid utf-8 byte in the string"
+    end
+    assert_nothing_raised do
+      @s[:bytes].chars.reverse!
+      assert_equal "\010", @s[:bytes], "There's only one valid utf-8 byte in the string"
+    end
+  end
+  
+  protected
+
+  def with_kcode(kcode)
+    old_kcode, $KCODE = $KCODE, kcode
+    begin
+      yield
+    ensure
+      $KCODE = old_kcode
+    end
+  end
+end
author	Michael Koziarski <michael@koziarski.com>	2006-10-03 23:45:32 +0000
committer	Michael Koziarski <michael@koziarski.com>	2006-10-03 23:45:32 +0000
commit	f238d495b70a264abdb864fe8107e02766b285b4 (patch)
tree	cfe1f5df118b46d1426cfc87326c26c8fbe63a85 /activesupport/test/multibyte_chars_test.rb
parent	8cb0079feabe011b7edd1c65114efdb7047a02ec (diff)
download	rails-f238d495b70a264abdb864fe8107e02766b285b4.tar.gz rails-f238d495b70a264abdb864fe8107e02766b285b4.tar.bz2 rails-f238d495b70a264abdb864fe8107e02766b285b4.zip