1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
|
require File.dirname(__FILE__) + '/abstract_unit'
require 'open-uri'
$KCODE = 'UTF8'
UNIDATA_URL = "http://www.unicode.org/Public/#{ActiveSupport::Multibyte::UNICODE_VERSION}/ucd"
UNIDATA_FILE = '/NormalizationTest.txt'
CACHE_DIR = File.dirname(__FILE__) + '/cache'
class Downloader
def self.download(from, to)
unless File.exist?(to)
$stderr.puts "Downloading #{from} to #{to}"
unless File.exists?(File.dirname(to))
system "mkdir -p #{File.dirname(to)}"
end
open(from) do |source|
File.open(to, 'w') do |target|
source.each_line do |l|
target.write l
end
end
end
end
end
end
class String
# Unicode Inspect returns the codepoints of the string in hex
def ui
"#{self} " + ("[%s]" % unpack("U*").map{|cp| cp.to_s(16) }.join(' '))
end unless ''.respond_to?(:ui)
end
Dir.mkdir(CACHE_DIR) unless File.exists?(CACHE_DIR)
Downloader.download(UNIDATA_URL + UNIDATA_FILE, CACHE_DIR + UNIDATA_FILE)
module ConformanceTest
def test_normalizations_C
each_line_of_norm_tests do |*cols|
col1, col2, col3, col4, col5, comment = *cols
# CONFORMANCE:
# 1. The following invariants must be true for all conformant implementations
#
# NFC
# c2 == NFC(c1) == NFC(c2) == NFC(c3)
assert_equal col2.ui, @handler.normalize(col1, :c).ui, "Form C - Col 2 has to be NFC(1) - #{comment}"
assert_equal col2.ui, @handler.normalize(col2, :c).ui, "Form C - Col 2 has to be NFC(2) - #{comment}"
assert_equal col2.ui, @handler.normalize(col3, :c).ui, "Form C - Col 2 has to be NFC(3) - #{comment}"
#
# c4 == NFC(c4) == NFC(c5)
assert_equal col4.ui, @handler.normalize(col4, :c).ui, "Form C - Col 4 has to be C(4) - #{comment}"
assert_equal col4.ui, @handler.normalize(col5, :c).ui, "Form C - Col 4 has to be C(5) - #{comment}"
end
end
def test_normalizations_D
each_line_of_norm_tests do |*cols|
col1, col2, col3, col4, col5, comment = *cols
#
# NFD
# c3 == NFD(c1) == NFD(c2) == NFD(c3)
assert_equal col3.ui, @handler.normalize(col1, :d).ui, "Form D - Col 3 has to be NFD(1) - #{comment}"
assert_equal col3.ui, @handler.normalize(col2, :d).ui, "Form D - Col 3 has to be NFD(2) - #{comment}"
assert_equal col3.ui, @handler.normalize(col3, :d).ui, "Form D - Col 3 has to be NFD(3) - #{comment}"
# c5 == NFD(c4) == NFD(c5)
assert_equal col5.ui, @handler.normalize(col4, :d).ui, "Form D - Col 5 has to be NFD(4) - #{comment}"
assert_equal col5.ui, @handler.normalize(col5, :d).ui, "Form D - Col 5 has to be NFD(5) - #{comment}"
end
end
def test_normalizations_KC
each_line_of_norm_tests do | *cols |
col1, col2, col3, col4, col5, comment = *cols
#
# NFKC
# c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
assert_equal col4.ui, @handler.normalize(col1, :kc).ui, "Form D - Col 4 has to be NFKC(1) - #{comment}"
assert_equal col4.ui, @handler.normalize(col2, :kc).ui, "Form D - Col 4 has to be NFKC(2) - #{comment}"
assert_equal col4.ui, @handler.normalize(col3, :kc).ui, "Form D - Col 4 has to be NFKC(3) - #{comment}"
assert_equal col4.ui, @handler.normalize(col4, :kc).ui, "Form D - Col 4 has to be NFKC(4) - #{comment}"
assert_equal col4.ui, @handler.normalize(col5, :kc).ui, "Form D - Col 4 has to be NFKC(5) - #{comment}"
end
end
def test_normalizations_KD
each_line_of_norm_tests do | *cols |
col1, col2, col3, col4, col5, comment = *cols
#
# NFKD
# c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
assert_equal col5.ui, @handler.normalize(col1, :kd).ui, "Form KD - Col 5 has to be NFKD(1) - #{comment}"
assert_equal col5.ui, @handler.normalize(col2, :kd).ui, "Form KD - Col 5 has to be NFKD(2) - #{comment}"
assert_equal col5.ui, @handler.normalize(col3, :kd).ui, "Form KD - Col 5 has to be NFKD(3) - #{comment}"
assert_equal col5.ui, @handler.normalize(col4, :kd).ui, "Form KD - Col 5 has to be NFKD(4) - #{comment}"
assert_equal col5.ui, @handler.normalize(col5, :kd).ui, "Form KD - Col 5 has to be NFKD(5) - #{comment}"
end
end
protected
def each_line_of_norm_tests(&block)
lines = 0
max_test_lines = 0 # Don't limit below 38, because that's the header of the testfile
File.open(File.dirname(__FILE__) + '/cache' + UNIDATA_FILE, 'r') do | f |
until f.eof? || (max_test_lines > 38 and lines > max_test_lines)
lines += 1
line = f.gets.chomp!
next if (line.empty? || line =~ /^\#/)
cols, comment = line.split("#")
cols = cols.split(";").map{|e| e.strip}.reject{|e| e.empty? }
next unless cols.length == 5
# codepoints are in hex in the test suite, pack wants them as integers
cols.map!{|c| c.split.map{|codepoint| codepoint.to_i(16)}.pack("U*") }
cols << comment
yield(*cols)
end
end
end
end
begin
require_library_or_gem('utf8proc_native')
require 'active_record/multibyte/handlers/utf8_handler_proc'
class ConformanceTestProc < Test::Unit::TestCase
include ConformanceTest
def setup
@handler = ::ActiveSupport::Multibyte::Handlers::UTF8HandlerProc
end
end
rescue LoadError
end
class ConformanceTestPure < Test::Unit::TestCase
include ConformanceTest
def setup
@handler = ::ActiveSupport::Multibyte::Handlers::UTF8Handler
end
end
|