aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--activesupport/lib/active_support/core_ext/string/unicode.rb83
-rw-r--r--activesupport/lib/active_support/multibyte/chars.rb10
-rw-r--r--activesupport/test/multibyte_chars_test.rb13
-rw-r--r--activesupport/test/multibyte_conformance.rb6
-rw-r--r--activesupport/test/multibyte_handler_test.rb6
5 files changed, 75 insertions, 43 deletions
diff --git a/activesupport/lib/active_support/core_ext/string/unicode.rb b/activesupport/lib/active_support/core_ext/string/unicode.rb
index dd19fe5428..eab1c1d246 100644
--- a/activesupport/lib/active_support/core_ext/string/unicode.rb
+++ b/activesupport/lib/active_support/core_ext/string/unicode.rb
@@ -1,40 +1,59 @@
module ActiveSupport #:nodoc:
module CoreExtensions #:nodoc:
module String #:nodoc:
- # Define methods for handling unicode data.
- module Unicode
- # +chars+ is a Unicode safe proxy for string methods. It creates and returns an instance of the
- # ActiveSupport::Multibyte::Chars class which encapsulates the original string. A Unicode safe version of all
- # the String methods are defined on this proxy class. Undefined methods are forwarded to String, so all of the
- # string overrides can also be called through the +chars+ proxy.
- #
- # name = 'Claus Müller'
- # name.reverse #=> "rell??M sualC"
- # name.length #=> 13
- #
- # name.chars.reverse.to_s #=> "rellüM sualC"
- # name.chars.length #=> 12
- #
- #
- # All the methods on the chars proxy which normally return a string will return a Chars object. This allows
- # method chaining on the result of any of these methods.
- #
- # name.chars.reverse.length #=> 12
- #
- # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between
- # String and Char work like expected. The bang! methods change the internal string representation in the Chars
- # object. Interoperability problems can be resolved easily with a +to_s+ call.
- #
- # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars and
- # ActiveSupport::Multibyte::Handlers::UTF8Handler
- def chars
- ActiveSupport::Multibyte::Chars.new(self)
+ if RUBY_VERSION < '1.9'
+ # Define methods for handling unicode data.
+ module Unicode
+ # +chars+ is a Unicode safe proxy for string methods. It creates and returns an instance of the
+ # ActiveSupport::Multibyte::Chars class which encapsulates the original string. A Unicode safe version of all
+ # the String methods are defined on this proxy class. Undefined methods are forwarded to String, so all of the
+ # string overrides can also be called through the +chars+ proxy.
+ #
+ # name = 'Claus Müller'
+ # name.reverse #=> "rell??M sualC"
+ # name.length #=> 13
+ #
+ # name.chars.reverse.to_s #=> "rellüM sualC"
+ # name.chars.length #=> 12
+ #
+ #
+ # All the methods on the chars proxy which normally return a string will return a Chars object. This allows
+ # method chaining on the result of any of these methods.
+ #
+ # name.chars.reverse.length #=> 12
+ #
+ # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between
+ # String and Char work like expected. The bang! methods change the internal string representation in the Chars
+ # object. Interoperability problems can be resolved easily with a +to_s+ call.
+ #
+ # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars and
+ # ActiveSupport::Multibyte::Handlers::UTF8Handler
+ def chars
+ ActiveSupport::Multibyte::Chars.new(self)
+ end
+
+ # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
+ # them), returns false otherwise.
+ def is_utf8?
+ ActiveSupport::Multibyte::Handlers::UTF8Handler.consumes?(self)
+ end
end
+ else
+ module Unicode #:nodoc:
+ def chars
+ self
+ end
- # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
- # them), returns false otherwise.
- def is_utf8?
- ActiveSupport::Multibyte::Handlers::UTF8Handler.consumes?(self)
+ def is_utf8?
+ case encoding
+ when Encoding::UTF_8
+ valid_encoding?
+ when Encoding::ASCII_8BIT
+ dup.force_encoding('UTF-8').valid_encoding?
+ else
+ false
+ end
+ end
end
end
end
diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb
index 2427f8c692..65114415eb 100644
--- a/activesupport/lib/active_support/multibyte/chars.rb
+++ b/activesupport/lib/active_support/multibyte/chars.rb
@@ -119,14 +119,8 @@ module ActiveSupport::Multibyte #:nodoc:
# +utf8_pragma+ checks if it can send this string to the handlers. It makes sure @string isn't nil and $KCODE is
# set to 'UTF8'.
- if RUBY_VERSION < '1.9'
- def utf8_pragma?
- !@string.nil? && ($KCODE == 'UTF8')
- end
- else
- def utf8_pragma?
- false
- end
+ def utf8_pragma?
+ !@string.nil? && ($KCODE == 'UTF8')
end
end
end
diff --git a/activesupport/test/multibyte_chars_test.rb b/activesupport/test/multibyte_chars_test.rb
index e8493f4708..4afb63b949 100644
--- a/activesupport/test/multibyte_chars_test.rb
+++ b/activesupport/test/multibyte_chars_test.rb
@@ -1,6 +1,15 @@
require 'abstract_unit'
-$KCODE = 'UTF8' if RUBY_VERSION < '1.9'
+if RUBY_VERSION >= '1.9'
+ class CharsTest < Test::Unit::TestCase
+ def test_chars_returns_self
+ str = 'abc'
+ assert_equal str.object_id, str.chars.object_id
+ end
+ end
+else
+
+$KCODE = 'UTF8'
class CharsTest < Test::Unit::TestCase
@@ -175,3 +184,5 @@ class CharsTest < Test::Unit::TestCase
end
end
end
+
+end
diff --git a/activesupport/test/multibyte_conformance.rb b/activesupport/test/multibyte_conformance.rb
index fdcfda383f..05fb9ef7a7 100644
--- a/activesupport/test/multibyte_conformance.rb
+++ b/activesupport/test/multibyte_conformance.rb
@@ -1,7 +1,9 @@
require 'abstract_unit'
require 'open-uri'
-$KCODE = 'UTF8' if RUBY_VERSION < '1.9'
+if RUBY_VERSION < '1.9'
+
+$KCODE = 'UTF8'
UNIDATA_URL = "http://www.unicode.org/Public/#{ActiveSupport::Multibyte::UNICODE_VERSION}/ucd"
UNIDATA_FILE = '/NormalizationTest.txt'
@@ -140,3 +142,5 @@ class ConformanceTestPure < Test::Unit::TestCase
@handler = ::ActiveSupport::Multibyte::Handlers::UTF8Handler
end
end
+
+end
diff --git a/activesupport/test/multibyte_handler_test.rb b/activesupport/test/multibyte_handler_test.rb
index f61176886b..a52392b8bd 100644
--- a/activesupport/test/multibyte_handler_test.rb
+++ b/activesupport/test/multibyte_handler_test.rb
@@ -1,6 +1,8 @@
require 'abstract_unit'
-$KCODE = 'UTF8' if RUBY_VERSION < '1.9'
+if RUBY_VERSION < '1.9'
+
+$KCODE = 'UTF8'
class String
# Unicode Inspect returns the codepoints of the string in hex
@@ -365,3 +367,5 @@ class UTF8HandlingTestPure < Test::Unit::TestCase
@handler = ::ActiveSupport::Multibyte::Handlers::UTF8Handler
end
end
+
+end