Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/test/jdk/java/text/Normalizer/DataValidationTest.java
41149 views
1
/*
2
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation.
8
*
9
* This code is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
* version 2 for more details (a copy is included in the LICENSE file that
13
* accompanied this code).
14
*
15
* You should have received a copy of the GNU General Public License version
16
* 2 along with this work; if not, write to the Free Software Foundation,
17
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
*
19
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
* or visit www.oracle.com if you need additional information or have any
21
* questions.
22
*/
23
/*
24
* test
25
* bug 4221795
26
* summary Confirm *.icu data using ICU4J Normalizer
27
*/
28
29
import java.io.BufferedReader;
30
import java.io.FileInputStream;
31
import java.io.InputStreamReader;
32
import java.nio.charset.Charset;
33
import java.nio.charset.CharsetDecoder;
34
import java.util.BitSet;
35
import java.util.StringTokenizer;
36
37
import com.ibm.icu.text.Normalizer;
38
import com.ibm.icu.impl.NormalizerImpl;
39
40
/**
41
* This is not a test program but a data validation utility.
42
* Two datafiles for Normalizer, unorm.icu and uprops.icu under
43
* sun/text/resouces, are generated using generators in ICU4C 3.2 on a
44
* BIG-ENDIAN machine. Before using them with java.text.Normalizer and
45
* sun.text.Normalizer, you may want to check these test datafile's validation.
46
* You can test datafiles using Normalizer in ICU4J 3.2. Download ICU4J 3.2 and
47
* run this test program with -cp <ICU4J 3.2>.
48
*/
49
public class DataValidationTest {
50
51
//
52
// Options to be used with com.ibm.icu.text.Normalizer
53
//
54
55
/*
56
* Default Unicode 3.2.0 normalization.
57
*
58
* - With Corrigendum 4 fix
59
* (Different from Mustang's Normalizer.)
60
* - With Public Review Issue #29 fix
61
* (Different from Mustang's Normalizer.)
62
*/
63
private static final int UNICODE_3_2_0 = Normalizer.UNICODE_3_2;
64
65
/*
66
* *Incomplete* Unicode 3.2.0 normalization for IDNA/StringPrep.
67
*
68
* - With Corrigendum 4 fix
69
* - Without Public Review Issue #29 fix
70
*
71
* ICU4J's Normalizer itself doesn't support normalization for Unicode 3.2.0
72
* without Corrigendum 4 fix, which is necessary for IDNA/StringPrep. It is
73
* done in StringPrep. Therefore, we don't test the normlaization in this
74
* test program. We merely test normalization for Unicode 3.2.0 without
75
* Public Review Issue #29 fix with this test program.
76
*/
77
private static final int UNICODE_3_2_0_BEFORE_PRI_29 =
78
Normalizer.UNICODE_3_2 |
79
NormalizerImpl.BEFORE_PRI_29;
80
81
/*
82
* Default normalization.
83
*
84
* - Unicode 4.0.1
85
* (Different from Mustang's Normalizer.)
86
* - With Corrigendum 4 fix
87
* - With Public Review Issue #29 fix
88
* (Different from Mustang's Normalizer.)
89
*
90
* Because Public Review Issue #29 is fixed in Unicode 4.1.0. I think that
91
* IUC4J 3.2 should not support it. But it actually supports PRI #29 fix
92
* as default....
93
*/
94
private static final int UNICODE_LATEST = 0x00;
95
96
/*
97
* Normalization without Public Review Issue #29 fix.
98
*
99
* - Unicode 4.0.1
100
* - Without Corrigendum 4 fix
101
* - Without Public Review Issue #29 fix
102
*/
103
static final int UNICODE_LATEST_BEFORE_PRI_29 =
104
NormalizerImpl.BEFORE_PRI_29;
105
106
//
107
// Conformance test datafiles
108
//
109
110
/*
111
* Conformance test datafile for normalization for Unicode 3.2.0 with
112
* Corrigendum 4 corrections. This is NOT an original Conformace test
113
* data. Some inconvenient test cases are commented out.
114
* About corrigendum 4, please refer
115
* http://www.unicode.org/versions/corrigendum4.html
116
*
117
* ICU4J 3.2's Normalizer itself doesn't support normalization for Unicode
118
* 3.2.0 without Corrigendum 4 corrections. StringPrep helps it. So, we
119
* don't test the normalization with this test program.
120
*/
121
static final String DATA_3_2_0 = "NormalizationTest-3.2.0.Corrigendum4.txt";
122
123
/*
124
* Conformance test datafile for the latest Unicode which is supported
125
* by J2SE.
126
*/
127
static final String DATA_LATEST = "NormalizationTest-Latest.txt";
128
129
/*
130
* Decorder
131
*/
132
static final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
133
134
/*
135
* List to pick up characters which are not listed in Part1
136
*/
137
static BitSet charList = new BitSet(Character.MAX_CODE_POINT+1);
138
139
/*
140
* Shortcuts
141
*/
142
static final Normalizer.Mode NFC = com.ibm.icu.text.Normalizer.NFC;
143
static final Normalizer.Mode NFD = com.ibm.icu.text.Normalizer.NFD;
144
static final Normalizer.Mode NFKC = com.ibm.icu.text.Normalizer.NFKC;
145
static final Normalizer.Mode NFKD = com.ibm.icu.text.Normalizer.NFKD;
146
static final Normalizer.Mode[] modes = {NFC, NFD, NFKC, NFKD};
147
148
149
public static void main(String[] args) throws Exception {
150
test(DATA_3_2_0, UNICODE_3_2_0);
151
test(DATA_3_2_0, UNICODE_3_2_0_BEFORE_PRI_29);
152
test(DATA_LATEST, UNICODE_LATEST);
153
// This test started failing since ICU4J 3.6.
154
// test(DATA_LATEST, UNICODE_LATEST_BEFORE_PRI_29);
155
156
/* Unconformity test */
157
// test(DATA_3_2_0, UNICODE_LATEST);
158
// test(DATA_LATEST, UNICODE_3_2);
159
}
160
161
private static void test(String filename, int unicodeVer) throws Exception {
162
163
FileInputStream fis = new FileInputStream(filename);
164
BufferedReader in =
165
new BufferedReader(new InputStreamReader(fis, decoder));
166
167
System.out.println("\nStart testing with " + filename +
168
" for options: " +
169
(((unicodeVer & Normalizer.UNICODE_3_2) != 0) ?
170
"Unicode 3.2.0" : "the latest Unicode") + ", " +
171
(((unicodeVer & NormalizerImpl.BEFORE_PRI_29) != 0) ?
172
"with" : "without") + " PRI #29 fix");
173
174
int lineNo = 0;
175
String text;
176
String[] columns = new String[6];
177
boolean part1test = false;
178
179
while ((text = in.readLine()) != null) {
180
lineNo ++;
181
182
char c = text.charAt(0);
183
if (c == '#') {
184
continue;
185
} else if (c == '@') {
186
if (text.startsWith("@Part")) {
187
System.out.println("# Testing data in " + text);
188
189
if (text.startsWith("@Part1 ")) {
190
part1test = true;
191
} else {
192
part1test = false;
193
}
194
195
continue;
196
}
197
}
198
199
prepareColumns(columns, text, filename, lineNo, part1test);
200
201
testNFC(columns, unicodeVer, filename, lineNo);
202
testNFD(columns, unicodeVer, filename, lineNo);
203
testNFKC(columns, unicodeVer, filename, lineNo);
204
testNFKD(columns, unicodeVer, filename, lineNo);
205
}
206
207
in.close();
208
fis.close();
209
210
if (unicodeVer == UNICODE_LATEST) {
211
System.out.println("# Testing characters which are not listed in Part1");
212
testRemainingChars(filename, unicodeVer);
213
}
214
}
215
216
/*
217
* Test for NFC
218
*
219
* c2 == NFC(c1) == NFC(c2) == NFC(c3)
220
* c4 == NFC(c4) == NFC(c5)
221
*/
222
private static void testNFC(String[] c, int unicodeVer,
223
String file, int line) throws Exception {
224
test(2, c, 1, 3, NFC, unicodeVer, file, line);
225
test(4, c, 4, 5, NFC, unicodeVer, file, line);
226
}
227
228
/*
229
* Test for NFD
230
*
231
* c3 == NFD(c1) == NFD(c2) == NFD(c3)
232
* c5 == NFD(c4) == NFD(c5)
233
*/
234
private static void testNFD(String[] c, int unicodeVer,
235
String file, int line) throws Exception {
236
test(3, c, 1, 3, NFD, unicodeVer, file, line);
237
test(5, c, 4, 5, NFD, unicodeVer, file, line);
238
}
239
240
/*
241
* Test for NFKC
242
*
243
* c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
244
*/
245
private static void testNFKC(String[] c, int unicodeVer,
246
String file, int line) throws Exception {
247
test(4, c, 1, 5, NFKC, unicodeVer, file, line);
248
}
249
250
/*
251
* Test for NFKD
252
*
253
* c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
254
*/
255
private static void testNFKD(String[] c, int unicodeVer,
256
String file, int line) throws Exception {
257
test(5, c, 1, 5, NFKD, unicodeVer, file, line);
258
}
259
260
/*
261
* Test for characters which aren't listed in Part1
262
*
263
* X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
264
*/
265
private static void testRemainingChars(String file,
266
int unicodeVer) throws Exception {
267
for (int i = Character.MIN_CODE_POINT;
268
i <= Character.MAX_CODE_POINT;
269
i++) {
270
if (!charList.get(i)) {
271
String from = String.valueOf(Character.toChars(i));
272
String to;
273
274
for (int j = 0; j < modes.length; j++) {
275
Normalizer.Mode mode = modes[j];
276
277
to = Normalizer.normalize(from, mode, unicodeVer);
278
if (!from.equals(to)) {
279
error(mode, from, from, to, file, -1);
280
// } else {
281
// okay(mode, from, from, to, file, -1);
282
}
283
284
if (!Normalizer.isNormalized(from, mode, unicodeVer)) {
285
error(mode, from, file, -1);
286
// } else {
287
// okay(mode, from, file, -1);
288
}
289
}
290
}
291
}
292
}
293
294
/*
295
* Test normalize() and isNormalized()
296
*/
297
private static void test(int col, String[] c,
298
int FROM, int TO,
299
Normalizer.Mode mode, int unicodeVer,
300
String file, int line) throws Exception {
301
for (int i = FROM; i <= TO; i++) {
302
String got = Normalizer.normalize(c[i], mode, unicodeVer);
303
if (!c[col].equals(got)) {
304
error(mode, c[i], c[col], got, file, line);
305
// } else {
306
// okay(mode, c[i], c[col], got, file, line);
307
}
308
309
/*
310
* If the original String equals its normalized String, it means
311
* that the original String is normalizerd. Thus, isNormalized()
312
* should return true. And, vice versa!
313
*/
314
if (c[col].equals(c[i])) {
315
if (!Normalizer.isNormalized(c[i], mode, unicodeVer)) {
316
error(mode, c[i], file, line);
317
// } else {
318
// okay(mode, c[i], file, line);
319
}
320
} else {
321
if (Normalizer.isNormalized(c[i], mode, unicodeVer)) {
322
error(mode, c[i], file, line);
323
// } else {
324
// okay(mode, c[i], file, line);
325
}
326
}
327
}
328
}
329
330
/*
331
* Generate an array of String from a line of conformance datafile.
332
*/
333
private static void prepareColumns(String[] col, String text,
334
String file, int line,
335
boolean part1test) throws Exception {
336
int index = text.indexOf('#');
337
if (index != -1) {
338
text = text.substring(0, index);
339
}
340
341
StringTokenizer st = new StringTokenizer(text, ";");
342
int tokenCount = st.countTokens();
343
if (tokenCount < 5) {
344
throw new RuntimeException("# of tokens in datafile should be 6, but got: " + tokenCount + " at line " + line + " in " + file);
345
}
346
347
StringBuffer sb = new StringBuffer();
348
for (int i = 1; i <= 5; i++) {
349
StringTokenizer tst = new StringTokenizer(st.nextToken(), " ");
350
351
while (tst.hasMoreTokens()) {
352
int code = Integer.parseInt(tst.nextToken(), 16);
353
sb.append(Character.toChars(code));
354
}
355
356
col[i] = sb.toString();
357
sb.setLength(0);
358
}
359
360
if (part1test) {
361
charList.set(col[1].codePointAt(0));
362
}
363
}
364
365
/*
366
* Show an error message when normalize() didn't return the expected value.
367
* (An exception is sometimes convenient. Therefore, it is commented out
368
* for the moment.)
369
*/
370
private static void error(Normalizer.Mode mode,
371
String from, String to, String got,
372
String file, int line) throws Exception {
373
System.err.println("\t" + toString(mode) + ": normalize(" +
374
toHexString(from) + ") doesn't equal <" + toHexString(to) +
375
"> at line " + line + " in " + file + ". Got <" +
376
toHexString(got) + ">.");
377
// throw new RuntimeException("Normalization(" + toString(mode) + ") failed");
378
}
379
380
/*
381
* Show an error message when isNormalize() didn't return the expected value.
382
* (An exception is sometimes convenient. Therefore, it is commented out
383
* for the moment.)
384
*/
385
private static void error(Normalizer.Mode mode, String orig,
386
String file, int line) throws Exception {
387
System.err.println("\t" + toString(mode) + ": isNormalized(" +
388
toHexString(orig) + ") returned the wrong value at line " + line +
389
" in " + file + ".");
390
// throw new RuntimeException("Normalization(" + toString(mode) +") failed");
391
}
392
393
/*
394
* (For debugging)
395
* Shows a message when normalize() returned the expected value.
396
*/
397
private static void okay(Normalizer.Mode mode,
398
String from, String to, String got,
399
String file, int line) {
400
System.out.println("\t" + toString(mode) + ": normalize(" +
401
toHexString(from) + ") equals <" + toHexString(to) +
402
"> at line " + line + " in " + file + ". Got <" +
403
toHexString(got) + ">.");
404
}
405
406
/*
407
* (For debugging)
408
* Shows a message when isNormalized() returned the expected value.
409
*/
410
private static void okay(Normalizer.Mode mode, String orig,
411
String file, int line) {
412
System.out.println("\t" + toString(mode) + ": isNormalized(" +
413
toHexString(orig) + ") returned the correct value at line " +
414
line + " in " + file + ".");
415
}
416
417
/*
418
* Returns a spece-delimited hex String
419
*/
420
private static String toHexString(String s) {
421
StringBuffer sb = new StringBuffer(" ");
422
423
for (int i = 0; i < s.length(); i++) {
424
sb.append(Integer.toHexString(s.charAt(i)));
425
sb.append(' ');
426
}
427
428
return sb.toString();
429
}
430
431
/*
432
* Returns the name of Normalizer.Mode
433
*/
434
private static String toString(Normalizer.Mode mode) {
435
if (mode == Normalizer.NFC) {
436
return "NFC";
437
} else if (mode == Normalizer.NFD) {
438
return "NFD";
439
} else if (mode == Normalizer.NFKC) {
440
return "NFKC";
441
} else if (mode == Normalizer.NFKD) {
442
return "NFKD";
443
}
444
445
return "unknown";
446
}
447
}
448
449