diff options
Diffstat (limited to 'activesupport/lib/active_support/core_ext/string')
-rw-r--r-- | activesupport/lib/active_support/core_ext/string/multibyte.rb | 81 | ||||
-rw-r--r-- | activesupport/lib/active_support/core_ext/string/unicode.rb | 66 |
2 files changed, 81 insertions, 66 deletions
diff --git a/activesupport/lib/active_support/core_ext/string/multibyte.rb b/activesupport/lib/active_support/core_ext/string/multibyte.rb new file mode 100644 index 0000000000..5a2dc36f72 --- /dev/null +++ b/activesupport/lib/active_support/core_ext/string/multibyte.rb @@ -0,0 +1,81 @@ +# encoding: utf-8 + +module ActiveSupport #:nodoc: + module CoreExtensions #:nodoc: + module String #:nodoc: + # Implements multibyte methods for easier access to multibyte characters in a String instance. + module Multibyte + unless '1.9'.respond_to?(:force_encoding) + # +mb_chars+ is a multibyte safe proxy method for string methods. + # + # In Ruby 1.8 and older it creates and returns an instance of the ActiveSupport::Multibyte::Chars class which + # encapsulates the original string. A Unicode safe version of all the String methods are defined on this proxy + # class. If the proxy class doesn't respond to a certain method, it's forwarded to the encapsuled string. + # + # name = 'Claus Müller' + # name.reverse #=> "rell??M sualC" + # name.length #=> 13 + # + # name.mb_chars.reverse.to_s #=> "rellüM sualC" + # name.mb_chars.length #=> 12 + # + # In Ruby 1.9 and newer +mb_chars+ returns +self+ because String is (mostly) encoding aware so we don't need + # a proxy class any more. This means that +mb_chars+ makes it easier to write code that runs on multiple Ruby + # versions. + # + # == Method chaining + # + # All the methods on the Chars proxy which normally return a string will return a Chars object. This allows + # method chaining on the result of any of these methods. + # + # name.mb_chars.reverse.length #=> 12 + # + # == Interoperability and configuration + # + # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between + # String and Char work like expected. The bang! methods change the internal string representation in the Chars + # object. Interoperability problems can be resolved easily with a +to_s+ call. + # + # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars. For + # information about how to change the default Multibyte behaviour, see ActiveSupport::Multibyte. + def mb_chars + if ActiveSupport::Multibyte.proxy_class.wants?(self) + ActiveSupport::Multibyte.proxy_class.new(self) + else + self + end + end + + # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have + # them), returns false otherwise. + def is_utf8? + ActiveSupport::Multibyte::Chars.consumes?(self) + end + + unless '1.8.7 and later'.respond_to?(:chars) + alias chars mb_chars + end + else + # In Ruby 1.9 and newer +mb_chars+ returns self. In Ruby 1.8 and older +mb_chars+ creates and returns an + # Unicode safe proxy for string operations, this makes it easier to write code that runs on multiple Ruby + # versions. + def mb_chars + self + end + + # Returns true if the string has valid UTF-8 encoding. + def is_utf8? + case encoding + when Encoding::UTF_8 + valid_encoding? + when Encoding::ASCII_8BIT, Encoding::US_ASCII + dup.force_encoding(Encoding::UTF_8).valid_encoding? + else + false + end + end + end + end + end + end +end diff --git a/activesupport/lib/active_support/core_ext/string/unicode.rb b/activesupport/lib/active_support/core_ext/string/unicode.rb deleted file mode 100644 index 666f7bcb65..0000000000 --- a/activesupport/lib/active_support/core_ext/string/unicode.rb +++ /dev/null @@ -1,66 +0,0 @@ -module ActiveSupport #:nodoc: - module CoreExtensions #:nodoc: - module String #:nodoc: - # Define methods for handling unicode data. - module Unicode - def self.append_features(base) - if '1.8.7 and later'.respond_to?(:chars) - base.class_eval { remove_method :chars } - end - super - end - - unless '1.9'.respond_to?(:force_encoding) - # +chars+ is a Unicode safe proxy for string methods. It creates and returns an instance of the - # ActiveSupport::Multibyte::Chars class which encapsulates the original string. A Unicode safe version of all - # the String methods are defined on this proxy class. Undefined methods are forwarded to String, so all of the - # string overrides can also be called through the +chars+ proxy. - # - # name = 'Claus Müller' - # name.reverse # => "rell??M sualC" - # name.length # => 13 - # - # name.chars.reverse.to_s # => "rellüM sualC" - # name.chars.length # => 12 - # - # - # All the methods on the chars proxy which normally return a string will return a Chars object. This allows - # method chaining on the result of any of these methods. - # - # name.chars.reverse.length # => 12 - # - # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between - # String and Char work like expected. The bang! methods change the internal string representation in the Chars - # object. Interoperability problems can be resolved easily with a +to_s+ call. - # - # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars and - # ActiveSupport::Multibyte::Handlers::UTF8Handler. - def chars - ActiveSupport::Multibyte::Chars.new(self) - end - - # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have - # them), returns false otherwise. - def is_utf8? - ActiveSupport::Multibyte::Handlers::UTF8Handler.consumes?(self) - end - else - def chars #:nodoc: - self - end - - def is_utf8? #:nodoc: - case encoding - when Encoding::UTF_8 - valid_encoding? - when Encoding::ASCII_8BIT - dup.force_encoding('UTF-8').valid_encoding? - else - false - end - end - end - end - end - end -end |