aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/multibyte/utils.rb
blob: 8e47763d396eac9617e42a5f2e1ba3f5755fbda7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# encoding: utf-8

module ActiveSupport #:nodoc:
  module Multibyte #:nodoc:
    if Kernel.const_defined?(:Encoding)
      # Returns a regular expression that matches valid characters in the current encoding
      def self.valid_character
        VALID_CHARACTER[Encoding.default_external.to_s]
      end
    else
      def self.valid_character
        case $KCODE
        when 'UTF8'
          VALID_CHARACTER['UTF-8']
        when 'SJIS'
          VALID_CHARACTER['Shift_JIS']
        end
      end
    end

    if 'string'.respond_to?(:valid_encoding?)
      # Verifies the encoding of a string
      def self.verify(string)
        string.valid_encoding?
      end
    else
      def self.verify(string)
        if expression = valid_character
          for c in string.split(//)
            return false unless expression.match(c)
          end
        end
        true
      end
    end

    # Verifies the encoding of the string and raises an exception when it's not valid
    def self.verify!(string)
      raise EncodingError.new("Found characters with invalid encoding") unless verify(string)
    end

    if 'string'.respond_to?(:force_encoding)
      # Removes all invalid characters from the string.
      #
      # Note: this method is a no-op in Ruby 1.9
      def self.clean(string)
        string
      end
    else
      def self.clean(string)
        if expression = valid_character
          stripped = []; for c in string.split(//)
            stripped << c if expression.match(c)
          end; stripped.join
        else
          string
        end
      end
    end
  end
end