From c95002c284add2da69845f2a9407c5dd6592cb62 Mon Sep 17 00:00:00 2001
From: Jeremy Kemper <jeremy@bitsweat.net>
Date: Fri, 21 Dec 2007 11:21:43 +0000
Subject: Multibyte: String#chars returns self for Ruby 1.9

git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@8460 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
---
 .../lib/active_support/core_ext/string/unicode.rb  | 83 +++++++++++++---------
 .../lib/active_support/multibyte/chars.rb          | 10 +--
 activesupport/test/multibyte_chars_test.rb         | 13 +++-
 activesupport/test/multibyte_conformance.rb        |  6 +-
 activesupport/test/multibyte_handler_test.rb       |  6 +-
 5 files changed, 75 insertions(+), 43 deletions(-)

(limited to 'activesupport')

diff --git a/activesupport/lib/active_support/core_ext/string/unicode.rb b/activesupport/lib/active_support/core_ext/string/unicode.rb
index dd19fe5428..eab1c1d246 100644
--- a/activesupport/lib/active_support/core_ext/string/unicode.rb
+++ b/activesupport/lib/active_support/core_ext/string/unicode.rb
@@ -1,40 +1,59 @@
 module ActiveSupport #:nodoc:
   module CoreExtensions #:nodoc:
     module String #:nodoc:
-      # Define methods for handling unicode data.
-      module Unicode
-        # +chars+ is a Unicode safe proxy for string methods. It creates and returns an instance of the
-        # ActiveSupport::Multibyte::Chars class which encapsulates the original string. A Unicode safe version of all
-        # the String methods are defined on this proxy class. Undefined methods are forwarded to String, so all of the
-        # string overrides can also be called through the +chars+ proxy.
-        #
-        #   name = 'Claus Müller'
-        #   name.reverse #=> "rell??M sualC"
-        #   name.length #=> 13
-        #
-        #   name.chars.reverse.to_s #=> "rellüM sualC"
-        #   name.chars.length #=> 12
-        #   
-        #
-        # All the methods on the chars proxy which normally return a string will return a Chars object. This allows
-        # method chaining on the result of any of these methods.
-        #
-        #   name.chars.reverse.length #=> 12
-        #
-        # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between
-        # String and Char work like expected. The bang! methods change the internal string representation in the Chars
-        # object. Interoperability problems can be resolved easily with a +to_s+ call.
-        #
-        # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars and
-        # ActiveSupport::Multibyte::Handlers::UTF8Handler
-        def chars
-          ActiveSupport::Multibyte::Chars.new(self)
+      if RUBY_VERSION < '1.9'
+        # Define methods for handling unicode data.
+        module Unicode
+          # +chars+ is a Unicode safe proxy for string methods. It creates and returns an instance of the
+          # ActiveSupport::Multibyte::Chars class which encapsulates the original string. A Unicode safe version of all
+          # the String methods are defined on this proxy class. Undefined methods are forwarded to String, so all of the
+          # string overrides can also be called through the +chars+ proxy.
+          #
+          #   name = 'Claus Müller'
+          #   name.reverse #=> "rell??M sualC"
+          #   name.length #=> 13
+          #
+          #   name.chars.reverse.to_s #=> "rellüM sualC"
+          #   name.chars.length #=> 12
+          #   
+          #
+          # All the methods on the chars proxy which normally return a string will return a Chars object. This allows
+          # method chaining on the result of any of these methods.
+          #
+          #   name.chars.reverse.length #=> 12
+          #
+          # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between
+          # String and Char work like expected. The bang! methods change the internal string representation in the Chars
+          # object. Interoperability problems can be resolved easily with a +to_s+ call.
+          #
+          # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars and
+          # ActiveSupport::Multibyte::Handlers::UTF8Handler
+          def chars
+            ActiveSupport::Multibyte::Chars.new(self)
+          end
+
+          # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
+          # them), returns false otherwise.
+          def is_utf8?
+            ActiveSupport::Multibyte::Handlers::UTF8Handler.consumes?(self)
+          end
         end
+      else
+        module Unicode #:nodoc:
+          def chars
+            self
+          end
 
-        # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
-        # them), returns false otherwise.
-        def is_utf8?
-          ActiveSupport::Multibyte::Handlers::UTF8Handler.consumes?(self)
+          def is_utf8?
+            case encoding
+              when Encoding::UTF_8
+                valid_encoding?
+              when Encoding::ASCII_8BIT
+                dup.force_encoding('UTF-8').valid_encoding?
+              else
+                false
+            end
+          end
         end
       end
     end
diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb
index 2427f8c692..65114415eb 100644
--- a/activesupport/lib/active_support/multibyte/chars.rb
+++ b/activesupport/lib/active_support/multibyte/chars.rb
@@ -119,14 +119,8 @@ module ActiveSupport::Multibyte #:nodoc:
       
       # +utf8_pragma+ checks if it can send this string to the handlers. It makes sure @string isn't nil and $KCODE is
       # set to 'UTF8'.
-      if RUBY_VERSION < '1.9'
-        def utf8_pragma?
-          !@string.nil? && ($KCODE == 'UTF8')
-        end
-      else
-        def utf8_pragma?
-          false
-        end
+      def utf8_pragma?
+        !@string.nil? && ($KCODE == 'UTF8')
       end
   end
 end
diff --git a/activesupport/test/multibyte_chars_test.rb b/activesupport/test/multibyte_chars_test.rb
index e8493f4708..4afb63b949 100644
--- a/activesupport/test/multibyte_chars_test.rb
+++ b/activesupport/test/multibyte_chars_test.rb
@@ -1,6 +1,15 @@
 require 'abstract_unit'
 
-$KCODE = 'UTF8' if RUBY_VERSION < '1.9'
+if RUBY_VERSION >= '1.9'
+  class CharsTest < Test::Unit::TestCase
+    def test_chars_returns_self
+      str = 'abc'
+      assert_equal str.object_id, str.chars.object_id
+    end
+  end
+else
+
+$KCODE = 'UTF8'
 
 class CharsTest < Test::Unit::TestCase
   
@@ -175,3 +184,5 @@ class CharsTest < Test::Unit::TestCase
     end
   end
 end
+
+end
diff --git a/activesupport/test/multibyte_conformance.rb b/activesupport/test/multibyte_conformance.rb
index fdcfda383f..05fb9ef7a7 100644
--- a/activesupport/test/multibyte_conformance.rb
+++ b/activesupport/test/multibyte_conformance.rb
@@ -1,7 +1,9 @@
 require 'abstract_unit'
 require 'open-uri'
 
-$KCODE = 'UTF8' if RUBY_VERSION < '1.9'
+if RUBY_VERSION < '1.9'
+
+$KCODE = 'UTF8'
 
 UNIDATA_URL = "http://www.unicode.org/Public/#{ActiveSupport::Multibyte::UNICODE_VERSION}/ucd"
 UNIDATA_FILE = '/NormalizationTest.txt'
@@ -140,3 +142,5 @@ class ConformanceTestPure < Test::Unit::TestCase
     @handler = ::ActiveSupport::Multibyte::Handlers::UTF8Handler
   end
 end
+
+end
diff --git a/activesupport/test/multibyte_handler_test.rb b/activesupport/test/multibyte_handler_test.rb
index f61176886b..a52392b8bd 100644
--- a/activesupport/test/multibyte_handler_test.rb
+++ b/activesupport/test/multibyte_handler_test.rb
@@ -1,6 +1,8 @@
 require 'abstract_unit'
 
-$KCODE = 'UTF8' if RUBY_VERSION < '1.9'
+if RUBY_VERSION < '1.9'
+
+$KCODE = 'UTF8'
 
 class String
   # Unicode Inspect returns the codepoints of the string in hex
@@ -365,3 +367,5 @@ class UTF8HandlingTestPure < Test::Unit::TestCase
     @handler = ::ActiveSupport::Multibyte::Handlers::UTF8Handler
   end
 end
+
+end
-- 
cgit v1.2.3