CoCalc -- ConformanceTest.java

GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/test/jdk/java/text/Normalizer/ConformanceTest.java
⁴¹¹⁴⁹ views
1
/*
2
 * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 */
23
/*
24
 * @test
25
 * @bug  4221795 6565620 6959267 7070436 7198195 8032446 8174270 8221431 8239383
26
 * @summary Confirm Normalizer's fundamental behavior
27
 * @library /lib/testlibrary/java/lang
28
 * @modules java.base/sun.text java.base/jdk.internal.icu.text
29
 * @compile -XDignore.symbol.file ConformanceTest.java
30
 * @run main/timeout=3000 ConformanceTest
31
 */
32

33
import java.io.BufferedReader;
34
import java.io.File;
35
import java.io.FileInputStream;
36
import java.io.InputStreamReader;
37
import java.nio.charset.Charset;
38
import java.nio.charset.CharsetDecoder;
39
import java.util.BitSet;
40
import java.util.StringTokenizer;
41

42
import jdk.internal.icu.text.NormalizerBase;
43

44
/*
45
 * Conformance test for java.text.Normalizer and sun.text.Normalizer.
46
 */
47
public class ConformanceTest {
48

49
    //
50
    // Options to be used with sun.text.Normalizer
51
    //
52

53
    /*
54
     * Default Unicode 3.2.0 normalization. (Provided for IDNA/StringPrep)
55
     *
56
     *   - Without Corrigendum 4 fix
57
     *     (Different from ICU4J 3.2's Normalizer.)
58
     *   - Without Public Review Issue #29 fix
59
     *     (Different from ICU4J 3.2's Normalizer.)
60
     */
61
    private static final int UNICODE_3_2_0 = sun.text.Normalizer.UNICODE_3_2;
62

63
    /*
64
     * Original Unicode 3.2.0 normalization. (Provided for testing only)
65
     *
66
     *   - With Corrigendum 4 fix
67
     *   - With Public Revilew Issue #29 fix
68
     */
69
    private static final int UNICODE_3_2_0_ORIGINAL =
70
                                 NormalizerBase.UNICODE_3_2;
71

72
    /*
73
     * Default normalization. In JDK 6,
74
     *   - Unicode 4.0.0
75
     *   - With Corrigendum 4 fix
76
     *   - Without Public Review Issue #29 fix
77
     *
78
     * In JDK 7,
79
     *   - Unicode 5.1.0
80
     *     (Different from ICU4J 3.2's Normalizer.)
81
     *   - With Corrigendum 4 fix
82
     *   - With Public Review Issue #29 fix
83
     *
84
     * In JDK 8,
85
     *   - Unicode 6.1.0
86
     *   - With Corrigendum 4 fix
87
     *   - With Public Review Issue #29 fix
88
     *
89
     *  When we support Unicode 4.1.0 or later, we need to do normalization
90
     *  with Public Review Issue #29 fix. For more details of PRI #29, see
91
     *  http://unicode.org/review/pr-29.html .
92
     */
93
    private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST;
94

95
    //
96
    // Conformance test datafiles
97
    //
98

99
    /*
100
     * Conformance test datafile for Unicode 3.2.0 with Corrigendum4
101
     * corrections.
102
     * This testdata is for sun.text.Normalize(UNICODE_3_2)
103
     *
104
     * This is NOT an original Conformace test data. Some inconvenient test
105
     * cases are commented out. About corrigendum 4, please refer
106
     *   http://www.unicode.org/review/resolved-pri.html#pri29
107
     *
108
     */
109
    static final String DATA_3_2_0_CORRIGENDUM =
110
                            "NormalizationTest-3.2.0.Corrigendum4.txt";
111

112
    /*
113
     * Conformance test datafile for Unicode 3.2.0 without Corrigendum4
114
     * corrections. This is the original Conformace test data.
115
     *
116
     * This testdata is for sun.text.Normalize(UNICODE_3_2_IDNA)
117
     */
118
    static final String DATA_3_2_0 = "NormalizationTest-3.2.0.txt";
119

120
    /*
121
     * Conformance test datafile for the latest Unicode which is supported
122
     * by J2SE.
123
     * Unicode 4.0.0 is the latest version in JDK 5.0 and JDK 6. Unicode 5.1.0
124
     * in JDK 7, and 6.1.0 in JDK 8. This Unicode can be used via both
125
     * java.text.Normalizer and sun.text.Normalizer.
126
     *
127
     * This testdata is for sun.text.Normalize(UNICODE_LATEST)
128
     */
129
    static final String DATA_LATEST = "NormalizationTest.txt";
130

131
    /*
132
     * Conformance test datafile in ICU4J 3.2.
133
     */
134
    static final String DATA_ICU = "ICUNormalizationTest.txt";
135

136
    /*
137
     * Decorder
138
     */
139
    static final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
140

141
    /*
142
     * List to pick up characters which are not listed in Part1
143
     */
144
    static BitSet charList = new BitSet(Character.MAX_CODE_POINT+1);
145

146
    /*
147
     * Shortcuts
148
     */
149
    private static final java.text.Normalizer.Form NFC  =
150
        java.text.Normalizer.Form.NFC;
151
    private static final java.text.Normalizer.Form NFD  =
152
        java.text.Normalizer.Form.NFD;
153
    private static final java.text.Normalizer.Form NFKC =
154
        java.text.Normalizer.Form.NFKC;
155
    private static final java.text.Normalizer.Form NFKD =
156
        java.text.Normalizer.Form.NFKD;
157
    static final java.text.Normalizer.Form[] forms = {NFC, NFD, NFKC, NFKD};
158

159

160
    static TestNormalizer normalizer;
161

162
    public static void main(String[] args) throws Exception {
163
        ConformanceTest ct = new ConformanceTest();
164
        ct.test();
165
    }
166

167
    void test() throws Exception {
168
        normalizer = new testJavaNormalizer();
169
        test(DATA_LATEST, UNICODE_LATEST);
170

171
        normalizer = new testSunNormalizer();
172
        test(DATA_3_2_0_CORRIGENDUM, UNICODE_3_2_0);
173
        test(DATA_LATEST, UNICODE_LATEST);
174
        test(DATA_ICU, UNICODE_LATEST);
175

176
        /* Unconformity test */
177
//      test(DATA_3_2_0, UNICODE_LATEST);
178
//      test(DATA_LATEST, UNICODE_3_2_0);
179
    }
180

181
    /*
182
     * Main routine of conformance test
183
     */
184
    private static void test(String filename, int unicodeVer) throws Exception {
185

186
        File  f = filename.equals(DATA_LATEST) ?
187
            UCDFiles.NORMALIZATION_TEST.toFile() :
188
            new File(System.getProperty("test.src", "."), filename);
189
        FileInputStream fis = new FileInputStream(f);
190
        BufferedReader in =
191
            new BufferedReader(new InputStreamReader(fis, decoder));
192

193
        System.out.println("\nStart testing for " + normalizer.name +
194
            " with " + filename + " for options: " +
195
            (((unicodeVer & NormalizerBase.UNICODE_3_2) != 0) ?
196
                "Unicode 3.2.0" : "the latest Unicode"));
197

198
        int lineNo = 0;
199
        String text;
200
        boolean part1test = false;
201
        boolean part1testExists = false;
202
        String[] columns = new String[6];
203

204
        while ((text = in.readLine()) != null) {
205
            lineNo ++;
206

207
            char c = text.charAt(0);
208
            if (c == '#') {
209
                continue;
210
            } else if (c == '@') {
211
                if (text.startsWith("@Part")) {
212
                    System.out.println("# Testing data in " + text);
213

214
                    if (text.startsWith("@Part1 ")) {
215
                        part1test = true;
216
                        part1testExists = true;
217
                    } else {
218
                        part1test = false;
219
                    }
220

221
                    continue;
222
                }
223
            }
224

225
            prepareColumns(columns, text, filename, lineNo, part1test);
226

227
            testNFC(columns, unicodeVer, filename, lineNo);
228
            testNFD(columns, unicodeVer, filename, lineNo);
229
            testNFKC(columns, unicodeVer, filename, lineNo);
230
            testNFKD(columns, unicodeVer, filename, lineNo);
231
        }
232

233
        in.close();
234
        fis.close();
235

236
        if (part1testExists) {
237
            System.out.println("# Testing characters which are not listed in Part1");
238
            testRemainingChars(filename, unicodeVer);
239
            part1testExists = false;
240
        }
241
    }
242

243
    /*
244
     * Test for NFC
245
     *
246
     *   c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
247
     *   c4 ==  NFC(c4) ==  NFC(c5)
248
     */
249
    private static void testNFC(String[] c, int unicodeVer,
250
                                String file, int line) throws Exception {
251
        test(2, c, 1, 3, NFC, unicodeVer, file, line);
252
        test(4, c, 4, 5, NFC, unicodeVer, file, line);
253
    }
254

255
    /*
256
     * Test for NFD
257
     *
258
     *   c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
259
     *   c5 ==  NFD(c4) ==  NFD(c5)
260
     */
261
    private static void testNFD(String[] c, int unicodeVer,
262
                                String file, int line) throws Exception {
263
        test(3, c, 1, 3, NFD, unicodeVer, file, line);
264
        test(5, c, 4, 5, NFD, unicodeVer, file, line);
265
    }
266

267
    /*
268
     * Test for NFKC
269
     *
270
     *   c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
271
     */
272
    private static void testNFKC(String[] c, int unicodeVer,
273
                                 String file, int line) throws Exception {
274
        test(4, c, 1, 5, NFKC, unicodeVer, file, line);
275
    }
276

277
    /*
278
     * Test for NFKD
279
     *
280
     *   c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
281
     */
282
    private static void testNFKD(String[] c, int unicodeVer,
283
                                 String file, int line) throws Exception {
284
        test(5, c, 1, 5, NFKD, unicodeVer, file, line);
285
    }
286

287
    /*
288
     * Test for characters which aren't listed in Part1
289
     *
290
     *   X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
291
     */
292
    private static void testRemainingChars(String file,
293
                                           int unicodeVer) throws Exception {
294
        for (int i = Character.MIN_CODE_POINT;
295
             i <= Character.MAX_CODE_POINT;
296
             i++) {
297
            if (!charList.get(i)) {
298
                String from = String.valueOf(Character.toChars(i));
299
                String to;
300

301
                for (int j = 0; j < forms.length; j++) {
302
                    java.text.Normalizer.Form form = forms[j];
303

304
                    to = normalizer.normalize(from, form, unicodeVer);
305
                    if (!from.equals(to)) {
306
                        error(form, from, from, to, file, -1);
307
//                  } else {
308
//                      okay(form, from, from, to, file, -1);
309
                    }
310

311
                    if (!normalizer.isNormalized(from, form, unicodeVer)) {
312
                        error(form, from, file, -1);
313
//                  } else {
314
//                      okay(form, from, file, -1);
315
                    }
316
                }
317
            }
318
        }
319
    }
320

321
    /*
322
     * Test normalize() and isNormalized()
323
     */
324
    private static void test(int col, String[] c,
325
                             int FROM, int TO,
326
                             java.text.Normalizer.Form form, int unicodeVer,
327
                             String file, int line) throws Exception {
328
        for (int i = FROM; i <= TO; i++) {
329
            String got = normalizer.normalize(c[i], form, unicodeVer);
330
            if (!c[col].equals(got)) {
331
                error(form, c[i], c[col], got, file, line);
332
//          } else {
333
//              okay(form, c[i], c[col], got, file, line);
334
            }
335

336
            /*
337
             * If the original String equals its normalized String, it means
338
             * that the original String is normalizerd. Thus, isNormalized()
339
             * should return true. And, vice versa!
340
             */
341
            if (c[col].equals(c[i])) {
342
                if (!normalizer.isNormalized(c[i], form, unicodeVer)) {
343
                    error(form, c[i], file, line);
344
//              } else {
345
//                  okay(form, c[i], file, line);
346
                }
347
            } else {
348
                if (normalizer.isNormalized(c[i], form, unicodeVer)) {
349
                    error(form, c[i], file, line);
350
//              } else {
351
//                  okay(form, c[i], file, line);
352
                }
353
            }
354
        }
355
    }
356

357
    /*
358
     * Generate an array of String from a line of conformance datafile.
359
     */
360
    private static void prepareColumns(String[] cols, String text,
361
                                           String file, int line,
362
                                           boolean part1test) throws Exception {
363
        int index = text.indexOf('#');
364
        if (index != -1) {
365
            text = text.substring(0, index);
366
        }
367

368
        StringTokenizer st = new StringTokenizer(text, ";");
369
        int tokenCount = st.countTokens();
370
        if (tokenCount < 5) {
371
             throw new RuntimeException("# of tokens in datafile should be 6, but got: " + tokenCount + " at line " + line + " in " + file);
372
        }
373

374
        StringBuffer sb = new StringBuffer();
375
        for (int i = 1; i <= 5; i++) {
376
            StringTokenizer tst = new StringTokenizer(st.nextToken(), " ");
377

378
            while (tst.hasMoreTokens()) {
379
                int code = Integer.parseInt(tst.nextToken(), 16);
380
                sb.append(Character.toChars(code));
381
            }
382

383
            cols[i] = sb.toString();
384
            sb.setLength(0);
385
        }
386

387
        if (part1test) {
388
            charList.set(cols[1].codePointAt(0));
389
        }
390
    }
391

392
    /*
393
     * Show an error message when normalize() didn't return the expected value.
394
     * (An exception is sometimes convenient. Therefore, it is commented out
395
     * for the moment.)
396
     */
397
    private static void error(java.text.Normalizer.Form form,
398
                              String from, String to, String got,
399
                              String file, int line) throws Exception {
400
        System.err.println("-\t" + form.toString() + ": normalize(" +
401
            toHexString(from) + ") doesn't equal <" + toHexString(to) +
402
            "> at line " + line + " in " + file + ". Got [" +
403
            toHexString(got) + "]");
404
        throw new RuntimeException("Normalization(" + form.toString() + ") failed");
405
    }
406

407
    /*
408
     * Show an error message when isNormalize() didn't return the expected
409
     * value.
410
     * (An exception is sometimes convenient. Therefore, it is commented out
411
     * for the moment.)
412
     */
413
    private static void error(java.text.Normalizer.Form form, String s,
414
                              String file, int line) throws Exception {
415
        System.err.println("\t" + form.toString() + ": isNormalized(" +
416
            toHexString(s) + ") returned the wrong value at line " + line +
417
            " in " + file);
418
        throw new RuntimeException("Normalization(" + form.toString() +") failed");
419
    }
420

421
    /*
422
     * (For debugging)
423
     * Shows a message when normalize() returned the expected value.
424
     */
425
    private static void okay(java.text.Normalizer.Form form,
426
                             String from, String to, String got,
427
                             String file, int line) {
428
        System.out.println("\t" + form.toString() + ": normalize(" +
429
            toHexString(from) + ") equals <" + toHexString(to) +
430
            "> at line " + line + " in " + file + ". Got [" +
431
            toHexString(got) + "]");
432
    }
433

434
    /*
435
     * (For debugging)
436
     * Shows a message when isNormalized() returned the expected value.
437
     */
438
    private static void okay(java.text.Normalizer.Form form, String s,
439
                             String file, int line) {
440
        System.out.println("\t" + form.toString() + ": isNormalized(" +
441
            toHexString(s) + ") returned the correct value at line " +
442
            line + " in " + file);
443
    }
444

445
    /*
446
     * Returns a spece-delimited hex String
447
     */
448
    private static String toHexString(String s) {
449
        StringBuffer sb = new StringBuffer(" ");
450

451
        for (int i = 0; i < s.length(); i++) {
452
            sb.append(Integer.toHexString(s.charAt(i)));
453
            sb.append(' ');
454
        }
455

456
        return sb.toString();
457
    }
458

459
    /*
460
     * Abstract class to call each Normalizer in java.text or sun.text.
461
     */
462
    private abstract class TestNormalizer {
463
        String name;
464

465
        TestNormalizer(String str) {
466
            name = str;
467
        }
468

469
        String getNormalizerName() {
470
            return name;
471
        }
472

473
        abstract String normalize(CharSequence cs,
474
                                  java.text.Normalizer.Form form,
475
                                  int option);
476

477
        abstract boolean isNormalized(CharSequence cs,
478
                                     java.text.Normalizer.Form form,
479
                                     int option);
480
    }
481

482
    /*
483
     * For java.text.Normalizer
484
     *   - normalize(CharSequence, Normalizer.Form)
485
     *   - isNormalized(CharSequence, Normalizer.Form)
486
     */
487
    private class testJavaNormalizer extends TestNormalizer {
488
        testJavaNormalizer() {
489
            super("java.text.Normalizer");
490
        }
491

492
        String normalize(CharSequence cs,
493
                         java.text.Normalizer.Form form,
494
                         int option) {
495
            return java.text.Normalizer.normalize(cs, form);
496
        }
497

498
        boolean isNormalized(CharSequence cs,
499
                             java.text.Normalizer.Form form,
500
                             int option) {
501
            return java.text.Normalizer.isNormalized(cs, form);
502
        }
503
    }
504

505
    /*
506
     * For sun.text.Normalizer
507
     *   - normalize(CharSequence, Normalizer.Form, int)
508
     *   - isNormalized(CharSequence, Normalizer.Form, int)
509
     */
510
    private class testSunNormalizer extends TestNormalizer {
511
        testSunNormalizer() {
512
            super("sun.text.Normalizer");
513
        }
514

515
        String normalize(CharSequence cs,
516
                         java.text.Normalizer.Form form,
517
                         int option) {
518
            return sun.text.Normalizer.normalize(cs, form, option);
519
        }
520

521
        boolean isNormalized(CharSequence cs,
522
                             java.text.Normalizer.Form form,
523
                             int option) {
524
            return sun.text.Normalizer.isNormalized(cs, form, option);
525
        }
526
    }
527
}
528

529
Product

Resources

Company