aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/core_ext/string/unicode.rb
blob: ba16d4d86670e46ac93d8f5afceebd330c2046ae (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
module ActiveSupport #:nodoc:
  module CoreExtensions #:nodoc:
    module String #:nodoc:
      unless '1.9'.respond_to?(:force_encoding)
        # Define methods for handling unicode data.
        module Unicode
          def self.append_features(base)
            if '1.8.7'.respond_to?(:chars)
              base.class_eval { remove_method :chars }
            end
            super
          end

          # +chars+ is a Unicode safe proxy for string methods. It creates and returns an instance of the
          # ActiveSupport::Multibyte::Chars class which encapsulates the original string. A Unicode safe version of all
          # the String methods are defined on this proxy class. Undefined methods are forwarded to String, so all of the
          # string overrides can also be called through the +chars+ proxy.
          #
          #   name = 'Claus Müller'
          #   name.reverse  #=> "rell??M sualC"
          #   name.length   #=> 13
          #
          #   name.chars.reverse.to_s   #=> "rellüM sualC"
          #   name.chars.length         #=> 12
          #   
          #
          # All the methods on the chars proxy which normally return a string will return a Chars object. This allows
          # method chaining on the result of any of these methods.
          #
          #   name.chars.reverse.length #=> 12
          #
          # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between
          # String and Char work like expected. The bang! methods change the internal string representation in the Chars
          # object. Interoperability problems can be resolved easily with a +to_s+ call.
          #
          # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars and
          # ActiveSupport::Multibyte::Handlers::UTF8Handler.
          def chars
            ActiveSupport::Multibyte::Chars.new(self)
          end

          # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
          # them), returns false otherwise.
          def is_utf8?
            ActiveSupport::Multibyte::Handlers::UTF8Handler.consumes?(self)
          end
        end
      else
        module Unicode #:nodoc:
          def chars
            self
          end

          def is_utf8?
            case encoding
              when Encoding::UTF_8
                valid_encoding?
              when Encoding::ASCII_8BIT
                dup.force_encoding('UTF-8').valid_encoding?
              else
                false
            end
          end
        end
      end
    end
  end
end