aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/test/multibyte_chars_test.rb
diff options
context:
space:
mode:
authorMichael Koziarski <michael@koziarski.com>2006-10-03 23:45:32 +0000
committerMichael Koziarski <michael@koziarski.com>2006-10-03 23:45:32 +0000
commitf238d495b70a264abdb864fe8107e02766b285b4 (patch)
treecfe1f5df118b46d1426cfc87326c26c8fbe63a85 /activesupport/test/multibyte_chars_test.rb
parent8cb0079feabe011b7edd1c65114efdb7047a02ec (diff)
downloadrails-f238d495b70a264abdb864fe8107e02766b285b4.tar.gz
rails-f238d495b70a264abdb864fe8107e02766b285b4.tar.bz2
rails-f238d495b70a264abdb864fe8107e02766b285b4.zip
Add ActiveSupport::Multibyte. Provides String#chars which lets you deal with strings as a sequence of chars, not of bytes. Closes #6242 [Julian Tarkhanov, Manfred Stienstra & Jan Behrens]
git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@5223 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
Diffstat (limited to 'activesupport/test/multibyte_chars_test.rb')
-rw-r--r--activesupport/test/multibyte_chars_test.rb163
1 files changed, 163 insertions, 0 deletions
diff --git a/activesupport/test/multibyte_chars_test.rb b/activesupport/test/multibyte_chars_test.rb
new file mode 100644
index 0000000000..e5ad9d26ee
--- /dev/null
+++ b/activesupport/test/multibyte_chars_test.rb
@@ -0,0 +1,163 @@
+require File.dirname(__FILE__) + '/abstract_unit'
+
+$KCODE = 'UTF8'
+
+class CharsTest < Test::Unit::TestCase
+
+ def setup
+ @s = {
+ :utf8 => "Abcd Блå ffi блa 埋",
+ :ascii => "asci ias c iia s",
+ :bytes => "\270\236\010\210\245"
+ }
+ end
+
+ def test_sanity
+ @s.each do |t, s|
+ assert s.respond_to?(:chars), "All string should have the chars method (#{t})"
+ assert s.respond_to?(:to_s), "All string should have the to_s method (#{t})"
+ assert_kind_of ActiveSupport::Multibyte::Chars, s.chars, "#chars should return an instance of Chars (#{t})"
+ end
+ end
+
+ def test_comparability
+ @s.each do |t, s|
+ assert_equal s, s.chars.to_s, "Chars#to_s should return enclosed string unchanged"
+ end
+ assert_nothing_raised do
+ assert_equal "a", "a", "Normal string comparisons should be unaffected"
+ assert_not_equal "a", "b", "Normal string comparisons should be unaffected"
+ assert_not_equal "a".chars, "b".chars, "Chars objects should be comparable"
+ assert_equal "a".chars, "A".downcase.chars, "Chars objects should be comparable to each other"
+ assert_equal "a".chars, "A".downcase, "Chars objects should be comparable to strings coming from elsewhere"
+ end
+
+ assert !@s[:utf8].eql?(@s[:utf8].chars), "Strict comparison is not supported"
+ assert_equal @s[:utf8], @s[:utf8].chars, "Chars should be compared by their enclosed string"
+
+ other_string = @s[:utf8].dup
+ assert_equal other_string, @s[:utf8].chars, "Chars should be compared by their enclosed string"
+ assert_equal other_string.chars, @s[:utf8].chars, "Chars should be compared by their enclosed string"
+
+ strings = ['builder'.chars, 'armor'.chars, 'zebra'.chars]
+ strings.sort!
+ assert_equal ['armor', 'builder', 'zebra'], strings, "Chars should be sortable based on their enclosed string"
+
+ # This leads to a StackLevelTooDeep exception if the comparison is not wired properly
+ assert_raise(NameError) do
+ Chars
+ end
+ end
+
+ def test_utf8?
+ assert @s[:utf8].is_utf8?, "UTF-8 strings are UTF-8"
+ assert @s[:ascii].is_utf8?, "All ASCII strings are also valid UTF-8"
+ assert !@s[:bytes].is_utf8?, "This bytestring isn't UTF-8"
+ end
+
+ # The test for the following methods are defined here because they can only be defined on the Chars class for
+ # various reasons
+
+ def test_gsub
+ assert_equal 'éxa', 'éda'.chars.gsub(/d/, 'x')
+ with_kcode('none') do
+ assert_equal 'éxa', 'éda'.chars.gsub(/d/, 'x')
+ end
+ end
+
+ def test_split
+ word = "efficient"
+ chars = ["e", "ffi", "c", "i", "e", "n", "t"]
+ assert_equal chars, word.split(//)
+ assert_equal chars, word.chars.split(//)
+ assert_kind_of ActiveSupport::Multibyte::Chars, word.chars.split(//).first, "Split should return Chars instances"
+ end
+
+ def test_regexp
+ with_kcode('none') do
+ assert_equal 12, (@s[:utf8].chars =~ /ffi/),
+ "Regex matching should be bypassed to String"
+ end
+ with_kcode('UTF8') do
+ assert_equal 9, (@s[:utf8].chars =~ /ffi/),
+ "Regex matching should be unicode aware"
+ end
+ end
+
+ def test_pragma
+ with_kcode('UTF8') do
+ assert " ".chars.send(:utf8_pragma?), "UTF8 pragma should be on because KCODE is UTF8"
+ end
+ with_kcode('none') do
+ assert !" ".chars.send(:utf8_pragma?), "UTF8 pragma should be off"
+ end
+ end
+
+ def test_handler_setting
+ handler = ''.chars.handler
+
+ ActiveSupport::Multibyte::Chars.handler = :first
+ assert_equal :first, ''.chars.handler
+ ActiveSupport::Multibyte::Chars.handler = :second
+ assert_equal :second, ''.chars.handler
+ assert_raise(NoMethodError) do
+ ''.chars.handler.split
+ end
+
+ ActiveSupport::Multibyte::Chars.handler = handler
+ end
+
+ def test_method_chaining
+ assert_kind_of ActiveSupport::Multibyte::Chars, ''.chars.downcase
+ assert_kind_of ActiveSupport::Multibyte::Chars, ''.chars.strip, "Strip should return a Chars object"
+ assert_kind_of ActiveSupport::Multibyte::Chars, ''.chars.downcase.strip, "The Chars object should be " +
+ "forwarded down the call path for chaining"
+ assert_equal 'foo', " FOO ".chars.normalize.downcase.strip, "The Chars that results from the " +
+ " operations should be comparable to the string value of the result"
+ end
+
+ def test_passthrough_on_kcode
+ # The easiest way to check if the passthrough is in place is through #size
+ with_kcode('nonce') do
+ assert_equal 26, @s[:utf8].chars.size
+ end
+ with_kcode('UTF8') do
+ assert_equal 17, @s[:utf8].chars.size
+ end
+ end
+
+ def test_destructiveness
+ # Note that we're testing the destructiveness here and not the correct behaviour of the methods
+ str = 'ac'
+ str.chars.insert(1, 'b')
+ assert_equal 'abc', str, 'Insert should be destructive for a string'
+
+ str = 'ac'
+ str.chars.reverse!
+ assert_equal 'ca', str, 'reverse! should be destructive for a string'
+ end
+
+ def test_resilience
+ assert_nothing_raised do
+ assert_equal 1, @s[:bytes].chars.size, "There's only one valid utf-8 byte in the string"
+ end
+ assert_nothing_raised do
+ assert_equal "\010", @s[:bytes].chars.reverse, "There's only one valid utf-8 byte in the string"
+ end
+ assert_nothing_raised do
+ @s[:bytes].chars.reverse!
+ assert_equal "\010", @s[:bytes], "There's only one valid utf-8 byte in the string"
+ end
+ end
+
+ protected
+
+ def with_kcode(kcode)
+ old_kcode, $KCODE = $KCODE, kcode
+ begin
+ yield
+ ensure
+ $KCODE = old_kcode
+ end
+ end
+end