aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/test/multibyte_utils_test.rb
blob: 1dff944922b96f2f2a75328feebae3910c6169a4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# encoding: utf-8

require 'abstract_unit'
require 'multibyte_test_helpers'
require 'active_support/core_ext/kernel/reporting'

class MultibyteUtilsTest < ActiveSupport::TestCase
  include MultibyteTestHelpers

  test "valid_character returns an expression for the current encoding" do
    with_encoding('None') do
      assert_nil ActiveSupport::Multibyte.valid_character
    end
    with_encoding('UTF8') do
      assert_equal ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8'], ActiveSupport::Multibyte.valid_character
    end
    with_encoding('SJIS') do
      assert_equal ActiveSupport::Multibyte::VALID_CHARACTER['Shift_JIS'], ActiveSupport::Multibyte.valid_character
    end
  end

  test "verify verifies ASCII strings are properly encoded" do
    with_encoding('None') do
      examples.each do |example|
        assert ActiveSupport::Multibyte.verify(example)
      end
    end
  end

  test "verify verifies UTF-8 strings are properly encoded" do
    with_encoding('UTF8') do
      assert ActiveSupport::Multibyte.verify(example('valid UTF-8'))
      assert !ActiveSupport::Multibyte.verify(example('invalid UTF-8'))
    end
  end

  test "verify verifies Shift-JIS strings are properly encoded" do
    with_encoding('SJIS') do
      assert ActiveSupport::Multibyte.verify(example('valid Shift-JIS'))
      assert !ActiveSupport::Multibyte.verify(example('invalid Shift-JIS'))
    end
  end

  test "verify! raises an exception when it finds an invalid character" do
    with_encoding('UTF8') do
      assert_raises(ActiveSupport::Multibyte::EncodingError) do
        ActiveSupport::Multibyte.verify!(example('invalid UTF-8'))
      end
    end
  end

  test "verify! doesn't raise an exception when the encoding is valid" do
    with_encoding('UTF8') do
      assert_nothing_raised do
        ActiveSupport::Multibyte.verify!(example('valid UTF-8'))
      end
    end
  end

  if RUBY_VERSION < '1.9'
    test "clean leaves ASCII strings intact" do
      with_encoding('None') do
        [
          'word', "\270\236\010\210\245"
        ].each do |string|
          assert_equal string, ActiveSupport::Multibyte.clean(string)
        end
      end
    end

    test "clean cleans invalid characters from UTF-8 encoded strings" do
      with_encoding('UTF8') do
        cleaned_utf8 = [8].pack('C*')
        assert_equal example('valid UTF-8'), ActiveSupport::Multibyte.clean(example('valid UTF-8'))
        assert_equal cleaned_utf8, ActiveSupport::Multibyte.clean(example('invalid UTF-8'))
      end
    end

    test "clean cleans invalid characters from Shift-JIS encoded strings" do
      with_encoding('SJIS') do
        cleaned_sjis = [184, 0, 136, 165].pack('C*')
        assert_equal example('valid Shift-JIS'), ActiveSupport::Multibyte.clean(example('valid Shift-JIS'))
        assert_equal cleaned_sjis, ActiveSupport::Multibyte.clean(example('invalid Shift-JIS'))
      end
    end
  else
    test "clean is a no-op" do
      with_encoding('UTF8') do
        assert_equal example('invalid Shift-JIS'), ActiveSupport::Multibyte.clean(example('invalid Shift-JIS'))
      end
    end
  end

  private

  STRINGS = {
    'valid ASCII'       => [65, 83, 67, 73, 73].pack('C*'),
    'invalid ASCII'     => [128].pack('C*'),
    'valid UTF-8'       => [227, 129, 147, 227, 129, 171, 227, 129, 161, 227, 130, 143].pack('C*'),
    'invalid UTF-8'     => [184, 158, 8, 136, 165].pack('C*'),
    'valid Shift-JIS'   => [131, 122, 129, 91, 131, 128].pack('C*'),
    'invalid Shift-JIS' => [184, 158, 8, 0, 255, 136, 165].pack('C*')
  }

  if Kernel.const_defined?(:Encoding)
    def example(key)
      STRINGS[key].force_encoding(Encoding.default_external)
    end

    def examples
      STRINGS.values.map { |s| s.force_encoding(Encoding.default_external) }
    end
  else
    def example(key)
      STRINGS[key]
    end

    def examples
      STRINGS.values
    end
  end

  if 'string'.respond_to?(:encoding)
    KCODE_TO_ENCODING = Hash.new(Encoding::BINARY).
      update('UTF8' => Encoding::UTF_8, 'SJIS' => Encoding::Shift_JIS)

    def with_encoding(enc)
      before = Encoding.default_external
      silence_warnings { Encoding.default_external = KCODE_TO_ENCODING[enc] }

      yield

      silence_warnings { Encoding.default_external = before }
    end
  else
    alias with_encoding with_kcode
  end
end