From c95002c284add2da69845f2a9407c5dd6592cb62 Mon Sep 17 00:00:00 2001 From: Jeremy Kemper Date: Fri, 21 Dec 2007 11:21:43 +0000 Subject: Multibyte: String#chars returns self for Ruby 1.9 git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@8460 5ecf4fe2-1ee6-0310-87b1-e25e094e27de --- .../lib/active_support/core_ext/string/unicode.rb | 83 +++++++++++++--------- .../lib/active_support/multibyte/chars.rb | 10 +-- activesupport/test/multibyte_chars_test.rb | 13 +++- activesupport/test/multibyte_conformance.rb | 6 +- activesupport/test/multibyte_handler_test.rb | 6 +- 5 files changed, 75 insertions(+), 43 deletions(-) (limited to 'activesupport') diff --git a/activesupport/lib/active_support/core_ext/string/unicode.rb b/activesupport/lib/active_support/core_ext/string/unicode.rb index dd19fe5428..eab1c1d246 100644 --- a/activesupport/lib/active_support/core_ext/string/unicode.rb +++ b/activesupport/lib/active_support/core_ext/string/unicode.rb @@ -1,40 +1,59 @@ module ActiveSupport #:nodoc: module CoreExtensions #:nodoc: module String #:nodoc: - # Define methods for handling unicode data. - module Unicode - # +chars+ is a Unicode safe proxy for string methods. It creates and returns an instance of the - # ActiveSupport::Multibyte::Chars class which encapsulates the original string. A Unicode safe version of all - # the String methods are defined on this proxy class. Undefined methods are forwarded to String, so all of the - # string overrides can also be called through the +chars+ proxy. - # - # name = 'Claus Müller' - # name.reverse #=> "rell??M sualC" - # name.length #=> 13 - # - # name.chars.reverse.to_s #=> "rellüM sualC" - # name.chars.length #=> 12 - # - # - # All the methods on the chars proxy which normally return a string will return a Chars object. This allows - # method chaining on the result of any of these methods. - # - # name.chars.reverse.length #=> 12 - # - # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between - # String and Char work like expected. The bang! methods change the internal string representation in the Chars - # object. Interoperability problems can be resolved easily with a +to_s+ call. - # - # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars and - # ActiveSupport::Multibyte::Handlers::UTF8Handler - def chars - ActiveSupport::Multibyte::Chars.new(self) + if RUBY_VERSION < '1.9' + # Define methods for handling unicode data. + module Unicode + # +chars+ is a Unicode safe proxy for string methods. It creates and returns an instance of the + # ActiveSupport::Multibyte::Chars class which encapsulates the original string. A Unicode safe version of all + # the String methods are defined on this proxy class. Undefined methods are forwarded to String, so all of the + # string overrides can also be called through the +chars+ proxy. + # + # name = 'Claus Müller' + # name.reverse #=> "rell??M sualC" + # name.length #=> 13 + # + # name.chars.reverse.to_s #=> "rellüM sualC" + # name.chars.length #=> 12 + # + # + # All the methods on the chars proxy which normally return a string will return a Chars object. This allows + # method chaining on the result of any of these methods. + # + # name.chars.reverse.length #=> 12 + # + # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between + # String and Char work like expected. The bang! methods change the internal string representation in the Chars + # object. Interoperability problems can be resolved easily with a +to_s+ call. + # + # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars and + # ActiveSupport::Multibyte::Handlers::UTF8Handler + def chars + ActiveSupport::Multibyte::Chars.new(self) + end + + # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have + # them), returns false otherwise. + def is_utf8? + ActiveSupport::Multibyte::Handlers::UTF8Handler.consumes?(self) + end end + else + module Unicode #:nodoc: + def chars + self + end - # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have - # them), returns false otherwise. - def is_utf8? - ActiveSupport::Multibyte::Handlers::UTF8Handler.consumes?(self) + def is_utf8? + case encoding + when Encoding::UTF_8 + valid_encoding? + when Encoding::ASCII_8BIT + dup.force_encoding('UTF-8').valid_encoding? + else + false + end + end end end end diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb index 2427f8c692..65114415eb 100644 --- a/activesupport/lib/active_support/multibyte/chars.rb +++ b/activesupport/lib/active_support/multibyte/chars.rb @@ -119,14 +119,8 @@ module ActiveSupport::Multibyte #:nodoc: # +utf8_pragma+ checks if it can send this string to the handlers. It makes sure @string isn't nil and $KCODE is # set to 'UTF8'. - if RUBY_VERSION < '1.9' - def utf8_pragma? - !@string.nil? && ($KCODE == 'UTF8') - end - else - def utf8_pragma? - false - end + def utf8_pragma? + !@string.nil? && ($KCODE == 'UTF8') end end end diff --git a/activesupport/test/multibyte_chars_test.rb b/activesupport/test/multibyte_chars_test.rb index e8493f4708..4afb63b949 100644 --- a/activesupport/test/multibyte_chars_test.rb +++ b/activesupport/test/multibyte_chars_test.rb @@ -1,6 +1,15 @@ require 'abstract_unit' -$KCODE = 'UTF8' if RUBY_VERSION < '1.9' +if RUBY_VERSION >= '1.9' + class CharsTest < Test::Unit::TestCase + def test_chars_returns_self + str = 'abc' + assert_equal str.object_id, str.chars.object_id + end + end +else + +$KCODE = 'UTF8' class CharsTest < Test::Unit::TestCase @@ -175,3 +184,5 @@ class CharsTest < Test::Unit::TestCase end end end + +end diff --git a/activesupport/test/multibyte_conformance.rb b/activesupport/test/multibyte_conformance.rb index fdcfda383f..05fb9ef7a7 100644 --- a/activesupport/test/multibyte_conformance.rb +++ b/activesupport/test/multibyte_conformance.rb @@ -1,7 +1,9 @@ require 'abstract_unit' require 'open-uri' -$KCODE = 'UTF8' if RUBY_VERSION < '1.9' +if RUBY_VERSION < '1.9' + +$KCODE = 'UTF8' UNIDATA_URL = "http://www.unicode.org/Public/#{ActiveSupport::Multibyte::UNICODE_VERSION}/ucd" UNIDATA_FILE = '/NormalizationTest.txt' @@ -140,3 +142,5 @@ class ConformanceTestPure < Test::Unit::TestCase @handler = ::ActiveSupport::Multibyte::Handlers::UTF8Handler end end + +end diff --git a/activesupport/test/multibyte_handler_test.rb b/activesupport/test/multibyte_handler_test.rb index f61176886b..a52392b8bd 100644 --- a/activesupport/test/multibyte_handler_test.rb +++ b/activesupport/test/multibyte_handler_test.rb @@ -1,6 +1,8 @@ require 'abstract_unit' -$KCODE = 'UTF8' if RUBY_VERSION < '1.9' +if RUBY_VERSION < '1.9' + +$KCODE = 'UTF8' class String # Unicode Inspect returns the codepoints of the string in hex @@ -365,3 +367,5 @@ class UTF8HandlingTestPure < Test::Unit::TestCase @handler = ::ActiveSupport::Multibyte::Handlers::UTF8Handler end end + +end -- cgit v1.2.3