1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
# encoding: utf-8
require 'abstract_unit'
require 'multibyte_test_helpers'
class MultibyteUtilsTest < ActiveSupport::TestCase
include MultibyteTestHelpers
test "valid_character returns an expression for the current encoding" do
with_encoding('None') do
assert_nil ActiveSupport::Multibyte.valid_character
end
with_encoding('UTF8') do
assert_equal ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8'], ActiveSupport::Multibyte.valid_character
end
with_encoding('SJIS') do
assert_equal ActiveSupport::Multibyte::VALID_CHARACTER['Shift_JIS'], ActiveSupport::Multibyte.valid_character
end
end
test "verify verifies ASCII strings are properly encoded" do
with_encoding('None') do
examples.each do |example|
assert ActiveSupport::Multibyte.verify(example)
end
end
end
test "verify verifies UTF-8 strings are properly encoded" do
with_encoding('UTF8') do
assert ActiveSupport::Multibyte.verify(example('valid UTF-8'))
assert !ActiveSupport::Multibyte.verify(example('invalid UTF-8'))
end
end
test "verify verifies Shift-JIS strings are properly encoded" do
with_encoding('SJIS') do
assert ActiveSupport::Multibyte.verify(example('valid Shift-JIS'))
assert !ActiveSupport::Multibyte.verify(example('invalid Shift-JIS'))
end
end
test "verify! raises an exception when it finds an invalid character" do
with_encoding('UTF8') do
assert_raises(ActiveSupport::Multibyte::EncodingError) do
ActiveSupport::Multibyte.verify!(example('invalid UTF-8'))
end
end
end
test "verify! doesn't raise an exception when the encoding is valid" do
with_encoding('UTF8') do
assert_nothing_raised do
ActiveSupport::Multibyte.verify!(example('valid UTF-8'))
end
end
end
if RUBY_VERSION < '1.9'
test "clean leaves ASCII strings intact" do
with_encoding('None') do
[
'word', "\270\236\010\210\245"
].each do |string|
assert_equal string, ActiveSupport::Multibyte.clean(string)
end
end
end
test "clean cleans invalid characters from UTF-8 encoded strings" do
with_encoding('UTF8') do
cleaned_utf8 = [8].pack('C*')
assert_equal example('valid UTF-8'), ActiveSupport::Multibyte.clean(example('valid UTF-8'))
assert_equal cleaned_utf8, ActiveSupport::Multibyte.clean(example('invalid UTF-8'))
end
end
test "clean cleans invalid characters from Shift-JIS encoded strings" do
with_encoding('SJIS') do
cleaned_sjis = [184, 0, 136, 165].pack('C*')
assert_equal example('valid Shift-JIS'), ActiveSupport::Multibyte.clean(example('valid Shift-JIS'))
assert_equal cleaned_sjis, ActiveSupport::Multibyte.clean(example('invalid Shift-JIS'))
end
end
else
test "clean is a no-op" do
with_encoding('UTF8') do
assert_equal example('invalid Shift-JIS'), ActiveSupport::Multibyte.clean(example('invalid Shift-JIS'))
end
end
end
private
STRINGS = {
'valid ASCII' => [65, 83, 67, 73, 73].pack('C*'),
'invalid ASCII' => [128].pack('C*'),
'valid UTF-8' => [227, 129, 147, 227, 129, 171, 227, 129, 161, 227, 130, 143].pack('C*'),
'invalid UTF-8' => [184, 158, 8, 136, 165].pack('C*'),
'valid Shift-JIS' => [131, 122, 129, 91, 131, 128].pack('C*'),
'invalid Shift-JIS' => [184, 158, 8, 0, 255, 136, 165].pack('C*')
}
if Kernel.const_defined?(:Encoding)
def example(key)
STRINGS[key].force_encoding(Encoding.default_external)
end
def examples
STRINGS.values.map { |s| s.force_encoding(Encoding.default_external) }
end
else
def example(key)
STRINGS[key]
end
def examples
STRINGS.values
end
end
if 'string'.respond_to?(:encoding)
KCODE_TO_ENCODING = Hash.new(Encoding::BINARY).
update('UTF8' => Encoding::UTF_8, 'SJIS' => Encoding::Shift_JIS)
def with_encoding(enc)
before = Encoding.default_external
silence_warnings { Encoding.default_external = KCODE_TO_ENCODING[enc] }
yield
silence_warnings { Encoding.default_external = before }
end
else
alias with_encoding with_kcode
end
end
|