aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/core_ext
diff options
context:
space:
mode:
Diffstat (limited to 'activesupport/lib/active_support/core_ext')
-rw-r--r--activesupport/lib/active_support/core_ext/string.rb6
-rw-r--r--activesupport/lib/active_support/core_ext/string/multibyte.rb81
-rw-r--r--activesupport/lib/active_support/core_ext/string/unicode.rb66
3 files changed, 85 insertions, 68 deletions
diff --git a/activesupport/lib/active_support/core_ext/string.rb b/activesupport/lib/active_support/core_ext/string.rb
index 7ff2f11eff..16c544a577 100644
--- a/activesupport/lib/active_support/core_ext/string.rb
+++ b/activesupport/lib/active_support/core_ext/string.rb
@@ -1,9 +1,11 @@
+# encoding: utf-8
+
require 'active_support/core_ext/string/inflections'
require 'active_support/core_ext/string/conversions'
require 'active_support/core_ext/string/access'
require 'active_support/core_ext/string/starts_ends_with'
require 'active_support/core_ext/string/iterators'
-require 'active_support/core_ext/string/unicode'
+require 'active_support/core_ext/string/multibyte'
require 'active_support/core_ext/string/xchar'
require 'active_support/core_ext/string/filters'
require 'active_support/core_ext/string/behavior'
@@ -15,6 +17,6 @@ class String #:nodoc:
include ActiveSupport::CoreExtensions::String::Inflections
include ActiveSupport::CoreExtensions::String::StartsEndsWith
include ActiveSupport::CoreExtensions::String::Iterators
- include ActiveSupport::CoreExtensions::String::Unicode
include ActiveSupport::CoreExtensions::String::Behavior
+ include ActiveSupport::CoreExtensions::String::Multibyte
end
diff --git a/activesupport/lib/active_support/core_ext/string/multibyte.rb b/activesupport/lib/active_support/core_ext/string/multibyte.rb
new file mode 100644
index 0000000000..5a2dc36f72
--- /dev/null
+++ b/activesupport/lib/active_support/core_ext/string/multibyte.rb
@@ -0,0 +1,81 @@
+# encoding: utf-8
+
+module ActiveSupport #:nodoc:
+ module CoreExtensions #:nodoc:
+ module String #:nodoc:
+ # Implements multibyte methods for easier access to multibyte characters in a String instance.
+ module Multibyte
+ unless '1.9'.respond_to?(:force_encoding)
+ # +mb_chars+ is a multibyte safe proxy method for string methods.
+ #
+ # In Ruby 1.8 and older it creates and returns an instance of the ActiveSupport::Multibyte::Chars class which
+ # encapsulates the original string. A Unicode safe version of all the String methods are defined on this proxy
+ # class. If the proxy class doesn't respond to a certain method, it's forwarded to the encapsuled string.
+ #
+ # name = 'Claus Müller'
+ # name.reverse #=> "rell??M sualC"
+ # name.length #=> 13
+ #
+ # name.mb_chars.reverse.to_s #=> "rellüM sualC"
+ # name.mb_chars.length #=> 12
+ #
+ # In Ruby 1.9 and newer +mb_chars+ returns +self+ because String is (mostly) encoding aware so we don't need
+ # a proxy class any more. This means that +mb_chars+ makes it easier to write code that runs on multiple Ruby
+ # versions.
+ #
+ # == Method chaining
+ #
+ # All the methods on the Chars proxy which normally return a string will return a Chars object. This allows
+ # method chaining on the result of any of these methods.
+ #
+ # name.mb_chars.reverse.length #=> 12
+ #
+ # == Interoperability and configuration
+ #
+ # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between
+ # String and Char work like expected. The bang! methods change the internal string representation in the Chars
+ # object. Interoperability problems can be resolved easily with a +to_s+ call.
+ #
+ # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars. For
+ # information about how to change the default Multibyte behaviour, see ActiveSupport::Multibyte.
+ def mb_chars
+ if ActiveSupport::Multibyte.proxy_class.wants?(self)
+ ActiveSupport::Multibyte.proxy_class.new(self)
+ else
+ self
+ end
+ end
+
+ # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
+ # them), returns false otherwise.
+ def is_utf8?
+ ActiveSupport::Multibyte::Chars.consumes?(self)
+ end
+
+ unless '1.8.7 and later'.respond_to?(:chars)
+ alias chars mb_chars
+ end
+ else
+ # In Ruby 1.9 and newer +mb_chars+ returns self. In Ruby 1.8 and older +mb_chars+ creates and returns an
+ # Unicode safe proxy for string operations, this makes it easier to write code that runs on multiple Ruby
+ # versions.
+ def mb_chars
+ self
+ end
+
+ # Returns true if the string has valid UTF-8 encoding.
+ def is_utf8?
+ case encoding
+ when Encoding::UTF_8
+ valid_encoding?
+ when Encoding::ASCII_8BIT, Encoding::US_ASCII
+ dup.force_encoding(Encoding::UTF_8).valid_encoding?
+ else
+ false
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/activesupport/lib/active_support/core_ext/string/unicode.rb b/activesupport/lib/active_support/core_ext/string/unicode.rb
deleted file mode 100644
index 666f7bcb65..0000000000
--- a/activesupport/lib/active_support/core_ext/string/unicode.rb
+++ /dev/null
@@ -1,66 +0,0 @@
-module ActiveSupport #:nodoc:
- module CoreExtensions #:nodoc:
- module String #:nodoc:
- # Define methods for handling unicode data.
- module Unicode
- def self.append_features(base)
- if '1.8.7 and later'.respond_to?(:chars)
- base.class_eval { remove_method :chars }
- end
- super
- end
-
- unless '1.9'.respond_to?(:force_encoding)
- # +chars+ is a Unicode safe proxy for string methods. It creates and returns an instance of the
- # ActiveSupport::Multibyte::Chars class which encapsulates the original string. A Unicode safe version of all
- # the String methods are defined on this proxy class. Undefined methods are forwarded to String, so all of the
- # string overrides can also be called through the +chars+ proxy.
- #
- # name = 'Claus Müller'
- # name.reverse # => "rell??M sualC"
- # name.length # => 13
- #
- # name.chars.reverse.to_s # => "rellüM sualC"
- # name.chars.length # => 12
- #
- #
- # All the methods on the chars proxy which normally return a string will return a Chars object. This allows
- # method chaining on the result of any of these methods.
- #
- # name.chars.reverse.length # => 12
- #
- # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between
- # String and Char work like expected. The bang! methods change the internal string representation in the Chars
- # object. Interoperability problems can be resolved easily with a +to_s+ call.
- #
- # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars and
- # ActiveSupport::Multibyte::Handlers::UTF8Handler.
- def chars
- ActiveSupport::Multibyte::Chars.new(self)
- end
-
- # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
- # them), returns false otherwise.
- def is_utf8?
- ActiveSupport::Multibyte::Handlers::UTF8Handler.consumes?(self)
- end
- else
- def chars #:nodoc:
- self
- end
-
- def is_utf8? #:nodoc:
- case encoding
- when Encoding::UTF_8
- valid_encoding?
- when Encoding::ASCII_8BIT
- dup.force_encoding('UTF-8').valid_encoding?
- else
- false
- end
- end
- end
- end
- end
- end
-end