aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/core_ext/string/multibyte.rb
diff options
context:
space:
mode:
Diffstat (limited to 'activesupport/lib/active_support/core_ext/string/multibyte.rb')
-rw-r--r--activesupport/lib/active_support/core_ext/string/multibyte.rb81
1 files changed, 81 insertions, 0 deletions
diff --git a/activesupport/lib/active_support/core_ext/string/multibyte.rb b/activesupport/lib/active_support/core_ext/string/multibyte.rb
new file mode 100644
index 0000000000..5a2dc36f72
--- /dev/null
+++ b/activesupport/lib/active_support/core_ext/string/multibyte.rb
@@ -0,0 +1,81 @@
+# encoding: utf-8
+
+module ActiveSupport #:nodoc:
+ module CoreExtensions #:nodoc:
+ module String #:nodoc:
+ # Implements multibyte methods for easier access to multibyte characters in a String instance.
+ module Multibyte
+ unless '1.9'.respond_to?(:force_encoding)
+ # +mb_chars+ is a multibyte safe proxy method for string methods.
+ #
+ # In Ruby 1.8 and older it creates and returns an instance of the ActiveSupport::Multibyte::Chars class which
+ # encapsulates the original string. A Unicode safe version of all the String methods are defined on this proxy
+ # class. If the proxy class doesn't respond to a certain method, it's forwarded to the encapsuled string.
+ #
+ # name = 'Claus Müller'
+ # name.reverse #=> "rell??M sualC"
+ # name.length #=> 13
+ #
+ # name.mb_chars.reverse.to_s #=> "rellüM sualC"
+ # name.mb_chars.length #=> 12
+ #
+ # In Ruby 1.9 and newer +mb_chars+ returns +self+ because String is (mostly) encoding aware so we don't need
+ # a proxy class any more. This means that +mb_chars+ makes it easier to write code that runs on multiple Ruby
+ # versions.
+ #
+ # == Method chaining
+ #
+ # All the methods on the Chars proxy which normally return a string will return a Chars object. This allows
+ # method chaining on the result of any of these methods.
+ #
+ # name.mb_chars.reverse.length #=> 12
+ #
+ # == Interoperability and configuration
+ #
+ # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between
+ # String and Char work like expected. The bang! methods change the internal string representation in the Chars
+ # object. Interoperability problems can be resolved easily with a +to_s+ call.
+ #
+ # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars. For
+ # information about how to change the default Multibyte behaviour, see ActiveSupport::Multibyte.
+ def mb_chars
+ if ActiveSupport::Multibyte.proxy_class.wants?(self)
+ ActiveSupport::Multibyte.proxy_class.new(self)
+ else
+ self
+ end
+ end
+
+ # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
+ # them), returns false otherwise.
+ def is_utf8?
+ ActiveSupport::Multibyte::Chars.consumes?(self)
+ end
+
+ unless '1.8.7 and later'.respond_to?(:chars)
+ alias chars mb_chars
+ end
+ else
+ # In Ruby 1.9 and newer +mb_chars+ returns self. In Ruby 1.8 and older +mb_chars+ creates and returns an
+ # Unicode safe proxy for string operations, this makes it easier to write code that runs on multiple Ruby
+ # versions.
+ def mb_chars
+ self
+ end
+
+ # Returns true if the string has valid UTF-8 encoding.
+ def is_utf8?
+ case encoding
+ when Encoding::UTF_8
+ valid_encoding?
+ when Encoding::ASCII_8BIT, Encoding::US_ASCII
+ dup.force_encoding(Encoding::UTF_8).valid_encoding?
+ else
+ false
+ end
+ end
+ end
+ end
+ end
+ end
+end