Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/test/jdk/java/text/Normalizer/ConformanceTest.java
41149 views
1
/*
2
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation.
8
*
9
* This code is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
* version 2 for more details (a copy is included in the LICENSE file that
13
* accompanied this code).
14
*
15
* You should have received a copy of the GNU General Public License version
16
* 2 along with this work; if not, write to the Free Software Foundation,
17
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
*
19
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
* or visit www.oracle.com if you need additional information or have any
21
* questions.
22
*/
23
/*
24
* @test
25
* @bug 4221795 6565620 6959267 7070436 7198195 8032446 8174270 8221431 8239383
26
* @summary Confirm Normalizer's fundamental behavior
27
* @library /lib/testlibrary/java/lang
28
* @modules java.base/sun.text java.base/jdk.internal.icu.text
29
* @compile -XDignore.symbol.file ConformanceTest.java
30
* @run main/timeout=3000 ConformanceTest
31
*/
32
33
import java.io.BufferedReader;
34
import java.io.File;
35
import java.io.FileInputStream;
36
import java.io.InputStreamReader;
37
import java.nio.charset.Charset;
38
import java.nio.charset.CharsetDecoder;
39
import java.util.BitSet;
40
import java.util.StringTokenizer;
41
42
import jdk.internal.icu.text.NormalizerBase;
43
44
/*
45
* Conformance test for java.text.Normalizer and sun.text.Normalizer.
46
*/
47
public class ConformanceTest {
48
49
//
50
// Options to be used with sun.text.Normalizer
51
//
52
53
/*
54
* Default Unicode 3.2.0 normalization. (Provided for IDNA/StringPrep)
55
*
56
* - Without Corrigendum 4 fix
57
* (Different from ICU4J 3.2's Normalizer.)
58
* - Without Public Review Issue #29 fix
59
* (Different from ICU4J 3.2's Normalizer.)
60
*/
61
private static final int UNICODE_3_2_0 = sun.text.Normalizer.UNICODE_3_2;
62
63
/*
64
* Original Unicode 3.2.0 normalization. (Provided for testing only)
65
*
66
* - With Corrigendum 4 fix
67
* - With Public Revilew Issue #29 fix
68
*/
69
private static final int UNICODE_3_2_0_ORIGINAL =
70
NormalizerBase.UNICODE_3_2;
71
72
/*
73
* Default normalization. In JDK 6,
74
* - Unicode 4.0.0
75
* - With Corrigendum 4 fix
76
* - Without Public Review Issue #29 fix
77
*
78
* In JDK 7,
79
* - Unicode 5.1.0
80
* (Different from ICU4J 3.2's Normalizer.)
81
* - With Corrigendum 4 fix
82
* - With Public Review Issue #29 fix
83
*
84
* In JDK 8,
85
* - Unicode 6.1.0
86
* - With Corrigendum 4 fix
87
* - With Public Review Issue #29 fix
88
*
89
* When we support Unicode 4.1.0 or later, we need to do normalization
90
* with Public Review Issue #29 fix. For more details of PRI #29, see
91
* http://unicode.org/review/pr-29.html .
92
*/
93
private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST;
94
95
//
96
// Conformance test datafiles
97
//
98
99
/*
100
* Conformance test datafile for Unicode 3.2.0 with Corrigendum4
101
* corrections.
102
* This testdata is for sun.text.Normalize(UNICODE_3_2)
103
*
104
* This is NOT an original Conformace test data. Some inconvenient test
105
* cases are commented out. About corrigendum 4, please refer
106
* http://www.unicode.org/review/resolved-pri.html#pri29
107
*
108
*/
109
static final String DATA_3_2_0_CORRIGENDUM =
110
"NormalizationTest-3.2.0.Corrigendum4.txt";
111
112
/*
113
* Conformance test datafile for Unicode 3.2.0 without Corrigendum4
114
* corrections. This is the original Conformace test data.
115
*
116
* This testdata is for sun.text.Normalize(UNICODE_3_2_IDNA)
117
*/
118
static final String DATA_3_2_0 = "NormalizationTest-3.2.0.txt";
119
120
/*
121
* Conformance test datafile for the latest Unicode which is supported
122
* by J2SE.
123
* Unicode 4.0.0 is the latest version in JDK 5.0 and JDK 6. Unicode 5.1.0
124
* in JDK 7, and 6.1.0 in JDK 8. This Unicode can be used via both
125
* java.text.Normalizer and sun.text.Normalizer.
126
*
127
* This testdata is for sun.text.Normalize(UNICODE_LATEST)
128
*/
129
static final String DATA_LATEST = "NormalizationTest.txt";
130
131
/*
132
* Conformance test datafile in ICU4J 3.2.
133
*/
134
static final String DATA_ICU = "ICUNormalizationTest.txt";
135
136
/*
137
* Decorder
138
*/
139
static final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
140
141
/*
142
* List to pick up characters which are not listed in Part1
143
*/
144
static BitSet charList = new BitSet(Character.MAX_CODE_POINT+1);
145
146
/*
147
* Shortcuts
148
*/
149
private static final java.text.Normalizer.Form NFC =
150
java.text.Normalizer.Form.NFC;
151
private static final java.text.Normalizer.Form NFD =
152
java.text.Normalizer.Form.NFD;
153
private static final java.text.Normalizer.Form NFKC =
154
java.text.Normalizer.Form.NFKC;
155
private static final java.text.Normalizer.Form NFKD =
156
java.text.Normalizer.Form.NFKD;
157
static final java.text.Normalizer.Form[] forms = {NFC, NFD, NFKC, NFKD};
158
159
160
static TestNormalizer normalizer;
161
162
public static void main(String[] args) throws Exception {
163
ConformanceTest ct = new ConformanceTest();
164
ct.test();
165
}
166
167
void test() throws Exception {
168
normalizer = new testJavaNormalizer();
169
test(DATA_LATEST, UNICODE_LATEST);
170
171
normalizer = new testSunNormalizer();
172
test(DATA_3_2_0_CORRIGENDUM, UNICODE_3_2_0);
173
test(DATA_LATEST, UNICODE_LATEST);
174
test(DATA_ICU, UNICODE_LATEST);
175
176
/* Unconformity test */
177
// test(DATA_3_2_0, UNICODE_LATEST);
178
// test(DATA_LATEST, UNICODE_3_2_0);
179
}
180
181
/*
182
* Main routine of conformance test
183
*/
184
private static void test(String filename, int unicodeVer) throws Exception {
185
186
File f = filename.equals(DATA_LATEST) ?
187
UCDFiles.NORMALIZATION_TEST.toFile() :
188
new File(System.getProperty("test.src", "."), filename);
189
FileInputStream fis = new FileInputStream(f);
190
BufferedReader in =
191
new BufferedReader(new InputStreamReader(fis, decoder));
192
193
System.out.println("\nStart testing for " + normalizer.name +
194
" with " + filename + " for options: " +
195
(((unicodeVer & NormalizerBase.UNICODE_3_2) != 0) ?
196
"Unicode 3.2.0" : "the latest Unicode"));
197
198
int lineNo = 0;
199
String text;
200
boolean part1test = false;
201
boolean part1testExists = false;
202
String[] columns = new String[6];
203
204
while ((text = in.readLine()) != null) {
205
lineNo ++;
206
207
char c = text.charAt(0);
208
if (c == '#') {
209
continue;
210
} else if (c == '@') {
211
if (text.startsWith("@Part")) {
212
System.out.println("# Testing data in " + text);
213
214
if (text.startsWith("@Part1 ")) {
215
part1test = true;
216
part1testExists = true;
217
} else {
218
part1test = false;
219
}
220
221
continue;
222
}
223
}
224
225
prepareColumns(columns, text, filename, lineNo, part1test);
226
227
testNFC(columns, unicodeVer, filename, lineNo);
228
testNFD(columns, unicodeVer, filename, lineNo);
229
testNFKC(columns, unicodeVer, filename, lineNo);
230
testNFKD(columns, unicodeVer, filename, lineNo);
231
}
232
233
in.close();
234
fis.close();
235
236
if (part1testExists) {
237
System.out.println("# Testing characters which are not listed in Part1");
238
testRemainingChars(filename, unicodeVer);
239
part1testExists = false;
240
}
241
}
242
243
/*
244
* Test for NFC
245
*
246
* c2 == NFC(c1) == NFC(c2) == NFC(c3)
247
* c4 == NFC(c4) == NFC(c5)
248
*/
249
private static void testNFC(String[] c, int unicodeVer,
250
String file, int line) throws Exception {
251
test(2, c, 1, 3, NFC, unicodeVer, file, line);
252
test(4, c, 4, 5, NFC, unicodeVer, file, line);
253
}
254
255
/*
256
* Test for NFD
257
*
258
* c3 == NFD(c1) == NFD(c2) == NFD(c3)
259
* c5 == NFD(c4) == NFD(c5)
260
*/
261
private static void testNFD(String[] c, int unicodeVer,
262
String file, int line) throws Exception {
263
test(3, c, 1, 3, NFD, unicodeVer, file, line);
264
test(5, c, 4, 5, NFD, unicodeVer, file, line);
265
}
266
267
/*
268
* Test for NFKC
269
*
270
* c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
271
*/
272
private static void testNFKC(String[] c, int unicodeVer,
273
String file, int line) throws Exception {
274
test(4, c, 1, 5, NFKC, unicodeVer, file, line);
275
}
276
277
/*
278
* Test for NFKD
279
*
280
* c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
281
*/
282
private static void testNFKD(String[] c, int unicodeVer,
283
String file, int line) throws Exception {
284
test(5, c, 1, 5, NFKD, unicodeVer, file, line);
285
}
286
287
/*
288
* Test for characters which aren't listed in Part1
289
*
290
* X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
291
*/
292
private static void testRemainingChars(String file,
293
int unicodeVer) throws Exception {
294
for (int i = Character.MIN_CODE_POINT;
295
i <= Character.MAX_CODE_POINT;
296
i++) {
297
if (!charList.get(i)) {
298
String from = String.valueOf(Character.toChars(i));
299
String to;
300
301
for (int j = 0; j < forms.length; j++) {
302
java.text.Normalizer.Form form = forms[j];
303
304
to = normalizer.normalize(from, form, unicodeVer);
305
if (!from.equals(to)) {
306
error(form, from, from, to, file, -1);
307
// } else {
308
// okay(form, from, from, to, file, -1);
309
}
310
311
if (!normalizer.isNormalized(from, form, unicodeVer)) {
312
error(form, from, file, -1);
313
// } else {
314
// okay(form, from, file, -1);
315
}
316
}
317
}
318
}
319
}
320
321
/*
322
* Test normalize() and isNormalized()
323
*/
324
private static void test(int col, String[] c,
325
int FROM, int TO,
326
java.text.Normalizer.Form form, int unicodeVer,
327
String file, int line) throws Exception {
328
for (int i = FROM; i <= TO; i++) {
329
String got = normalizer.normalize(c[i], form, unicodeVer);
330
if (!c[col].equals(got)) {
331
error(form, c[i], c[col], got, file, line);
332
// } else {
333
// okay(form, c[i], c[col], got, file, line);
334
}
335
336
/*
337
* If the original String equals its normalized String, it means
338
* that the original String is normalizerd. Thus, isNormalized()
339
* should return true. And, vice versa!
340
*/
341
if (c[col].equals(c[i])) {
342
if (!normalizer.isNormalized(c[i], form, unicodeVer)) {
343
error(form, c[i], file, line);
344
// } else {
345
// okay(form, c[i], file, line);
346
}
347
} else {
348
if (normalizer.isNormalized(c[i], form, unicodeVer)) {
349
error(form, c[i], file, line);
350
// } else {
351
// okay(form, c[i], file, line);
352
}
353
}
354
}
355
}
356
357
/*
358
* Generate an array of String from a line of conformance datafile.
359
*/
360
private static void prepareColumns(String[] cols, String text,
361
String file, int line,
362
boolean part1test) throws Exception {
363
int index = text.indexOf('#');
364
if (index != -1) {
365
text = text.substring(0, index);
366
}
367
368
StringTokenizer st = new StringTokenizer(text, ";");
369
int tokenCount = st.countTokens();
370
if (tokenCount < 5) {
371
throw new RuntimeException("# of tokens in datafile should be 6, but got: " + tokenCount + " at line " + line + " in " + file);
372
}
373
374
StringBuffer sb = new StringBuffer();
375
for (int i = 1; i <= 5; i++) {
376
StringTokenizer tst = new StringTokenizer(st.nextToken(), " ");
377
378
while (tst.hasMoreTokens()) {
379
int code = Integer.parseInt(tst.nextToken(), 16);
380
sb.append(Character.toChars(code));
381
}
382
383
cols[i] = sb.toString();
384
sb.setLength(0);
385
}
386
387
if (part1test) {
388
charList.set(cols[1].codePointAt(0));
389
}
390
}
391
392
/*
393
* Show an error message when normalize() didn't return the expected value.
394
* (An exception is sometimes convenient. Therefore, it is commented out
395
* for the moment.)
396
*/
397
private static void error(java.text.Normalizer.Form form,
398
String from, String to, String got,
399
String file, int line) throws Exception {
400
System.err.println("-\t" + form.toString() + ": normalize(" +
401
toHexString(from) + ") doesn't equal <" + toHexString(to) +
402
"> at line " + line + " in " + file + ". Got [" +
403
toHexString(got) + "]");
404
throw new RuntimeException("Normalization(" + form.toString() + ") failed");
405
}
406
407
/*
408
* Show an error message when isNormalize() didn't return the expected
409
* value.
410
* (An exception is sometimes convenient. Therefore, it is commented out
411
* for the moment.)
412
*/
413
private static void error(java.text.Normalizer.Form form, String s,
414
String file, int line) throws Exception {
415
System.err.println("\t" + form.toString() + ": isNormalized(" +
416
toHexString(s) + ") returned the wrong value at line " + line +
417
" in " + file);
418
throw new RuntimeException("Normalization(" + form.toString() +") failed");
419
}
420
421
/*
422
* (For debugging)
423
* Shows a message when normalize() returned the expected value.
424
*/
425
private static void okay(java.text.Normalizer.Form form,
426
String from, String to, String got,
427
String file, int line) {
428
System.out.println("\t" + form.toString() + ": normalize(" +
429
toHexString(from) + ") equals <" + toHexString(to) +
430
"> at line " + line + " in " + file + ". Got [" +
431
toHexString(got) + "]");
432
}
433
434
/*
435
* (For debugging)
436
* Shows a message when isNormalized() returned the expected value.
437
*/
438
private static void okay(java.text.Normalizer.Form form, String s,
439
String file, int line) {
440
System.out.println("\t" + form.toString() + ": isNormalized(" +
441
toHexString(s) + ") returned the correct value at line " +
442
line + " in " + file);
443
}
444
445
/*
446
* Returns a spece-delimited hex String
447
*/
448
private static String toHexString(String s) {
449
StringBuffer sb = new StringBuffer(" ");
450
451
for (int i = 0; i < s.length(); i++) {
452
sb.append(Integer.toHexString(s.charAt(i)));
453
sb.append(' ');
454
}
455
456
return sb.toString();
457
}
458
459
/*
460
* Abstract class to call each Normalizer in java.text or sun.text.
461
*/
462
private abstract class TestNormalizer {
463
String name;
464
465
TestNormalizer(String str) {
466
name = str;
467
}
468
469
String getNormalizerName() {
470
return name;
471
}
472
473
abstract String normalize(CharSequence cs,
474
java.text.Normalizer.Form form,
475
int option);
476
477
abstract boolean isNormalized(CharSequence cs,
478
java.text.Normalizer.Form form,
479
int option);
480
}
481
482
/*
483
* For java.text.Normalizer
484
* - normalize(CharSequence, Normalizer.Form)
485
* - isNormalized(CharSequence, Normalizer.Form)
486
*/
487
private class testJavaNormalizer extends TestNormalizer {
488
testJavaNormalizer() {
489
super("java.text.Normalizer");
490
}
491
492
String normalize(CharSequence cs,
493
java.text.Normalizer.Form form,
494
int option) {
495
return java.text.Normalizer.normalize(cs, form);
496
}
497
498
boolean isNormalized(CharSequence cs,
499
java.text.Normalizer.Form form,
500
int option) {
501
return java.text.Normalizer.isNormalized(cs, form);
502
}
503
}
504
505
/*
506
* For sun.text.Normalizer
507
* - normalize(CharSequence, Normalizer.Form, int)
508
* - isNormalized(CharSequence, Normalizer.Form, int)
509
*/
510
private class testSunNormalizer extends TestNormalizer {
511
testSunNormalizer() {
512
super("sun.text.Normalizer");
513
}
514
515
String normalize(CharSequence cs,
516
java.text.Normalizer.Form form,
517
int option) {
518
return sun.text.Normalizer.normalize(cs, form, option);
519
}
520
521
boolean isNormalized(CharSequence cs,
522
java.text.Normalizer.Form form,
523
int option) {
524
return sun.text.Normalizer.isNormalized(cs, form, option);
525
}
526
}
527
}
528
529