CoCalc -- DataValidationTest.java

GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/test/jdk/java/text/Normalizer/DataValidationTest.java
⁴¹¹⁴⁹ views
1
/*
2
 * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 */
23
/*
24
 * test
25
 * bug  4221795
26
 * summary Confirm *.icu data using ICU4J Normalizer
27
 */
28

29
import java.io.BufferedReader;
30
import java.io.FileInputStream;
31
import java.io.InputStreamReader;
32
import java.nio.charset.Charset;
33
import java.nio.charset.CharsetDecoder;
34
import java.util.BitSet;
35
import java.util.StringTokenizer;
36

37
import com.ibm.icu.text.Normalizer;
38
import com.ibm.icu.impl.NormalizerImpl;
39

40
/**
41
 * This is not a test program but a data validation utility.
42
 * Two datafiles for Normalizer, unorm.icu and uprops.icu under
43
 * sun/text/resouces, are generated using generators in ICU4C 3.2 on a
44
 * BIG-ENDIAN machine. Before using them with java.text.Normalizer and
45
 * sun.text.Normalizer, you may want to check these test datafile's validation.
46
 * You can test datafiles using Normalizer in ICU4J 3.2. Download ICU4J 3.2 and
47
 * run this test program with -cp <ICU4J 3.2>.
48
 */
49
public class DataValidationTest {
50

51
    //
52
    // Options to be used with com.ibm.icu.text.Normalizer
53
    //
54

55
    /*
56
     * Default Unicode 3.2.0 normalization.
57
     *
58
     *   - With Corrigendum 4 fix
59
     *     (Different from Mustang's Normalizer.)
60
     *   - With Public Review Issue #29 fix
61
     *     (Different from Mustang's Normalizer.)
62
     */
63
    private static final int UNICODE_3_2_0 = Normalizer.UNICODE_3_2;
64

65
    /*
66
     * *Incomplete* Unicode 3.2.0 normalization for IDNA/StringPrep.
67
     *
68
     *   - With Corrigendum 4 fix
69
     *   - Without Public Review Issue #29 fix
70
     *
71
     * ICU4J's Normalizer itself doesn't support normalization for Unicode 3.2.0
72
     * without Corrigendum 4 fix, which is necessary for IDNA/StringPrep. It is
73
     * done in StringPrep. Therefore, we don't test the normlaization in this
74
     * test program. We merely test normalization for Unicode 3.2.0 without
75
     * Public Review Issue #29 fix with this test program.
76
     */
77
    private static final int UNICODE_3_2_0_BEFORE_PRI_29 =
78
                                 Normalizer.UNICODE_3_2 |
79
                                 NormalizerImpl.BEFORE_PRI_29;
80

81
    /*
82
     * Default normalization.
83
     *
84
     *   - Unicode 4.0.1
85
     *     (Different from Mustang's Normalizer.)
86
     *   - With Corrigendum 4 fix
87
     *   - With Public Review Issue #29 fix
88
     *     (Different from Mustang's Normalizer.)
89
     *
90
     * Because Public Review Issue #29 is fixed in Unicode 4.1.0. I think that
91
     * IUC4J 3.2 should not support it. But it actually supports PRI #29 fix
92
     * as default....
93
     */
94
    private static final int UNICODE_LATEST = 0x00;
95

96
    /*
97
     * Normalization without Public Review Issue #29 fix.
98
     *
99
     *   - Unicode 4.0.1
100
     *   - Without Corrigendum 4 fix
101
     *   - Without Public Review Issue #29 fix
102
     */
103
    static final int UNICODE_LATEST_BEFORE_PRI_29 =
104
                         NormalizerImpl.BEFORE_PRI_29;
105

106
    //
107
    // Conformance test datafiles
108
    //
109

110
    /*
111
     * Conformance test datafile for normalization for Unicode 3.2.0 with
112
     * Corrigendum 4 corrections. This is NOT an original Conformace test
113
     * data. Some inconvenient test cases are commented out.
114
     * About corrigendum 4, please refer
115
     *   http://www.unicode.org/versions/corrigendum4.html
116
     *
117
     * ICU4J 3.2's Normalizer itself doesn't support normalization for Unicode
118
     * 3.2.0 without Corrigendum 4 corrections. StringPrep helps it. So, we
119
     * don't test the normalization with this test program.
120
     */
121
    static final String DATA_3_2_0 = "NormalizationTest-3.2.0.Corrigendum4.txt";
122

123
    /*
124
     * Conformance test datafile for the latest Unicode which is supported
125
     * by J2SE.
126
     */
127
    static final String DATA_LATEST = "NormalizationTest-Latest.txt";
128

129
   /*
130
    * Decorder
131
    */
132
    static final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
133

134
   /*
135
    * List to pick up characters which are not listed in Part1
136
    */
137
    static BitSet charList = new BitSet(Character.MAX_CODE_POINT+1);
138

139
   /*
140
    * Shortcuts
141
    */
142
    static final Normalizer.Mode NFC  = com.ibm.icu.text.Normalizer.NFC;
143
    static final Normalizer.Mode NFD  = com.ibm.icu.text.Normalizer.NFD;
144
    static final Normalizer.Mode NFKC = com.ibm.icu.text.Normalizer.NFKC;
145
    static final Normalizer.Mode NFKD = com.ibm.icu.text.Normalizer.NFKD;
146
    static final Normalizer.Mode[] modes = {NFC, NFD, NFKC, NFKD};
147

148

149
    public static void main(String[] args) throws Exception {
150
        test(DATA_3_2_0, UNICODE_3_2_0);
151
        test(DATA_3_2_0, UNICODE_3_2_0_BEFORE_PRI_29);
152
        test(DATA_LATEST, UNICODE_LATEST);
153
        // This test started failing since ICU4J 3.6.
154
//      test(DATA_LATEST, UNICODE_LATEST_BEFORE_PRI_29);
155

156
        /* Unconformity test */
157
//      test(DATA_3_2_0, UNICODE_LATEST);
158
//      test(DATA_LATEST, UNICODE_3_2);
159
    }
160

161
    private static void test(String filename, int unicodeVer) throws Exception {
162

163
        FileInputStream fis = new FileInputStream(filename);
164
        BufferedReader in =
165
            new BufferedReader(new InputStreamReader(fis, decoder));
166

167
        System.out.println("\nStart testing with " + filename +
168
            " for options: " +
169
            (((unicodeVer & Normalizer.UNICODE_3_2) != 0) ?
170
                "Unicode 3.2.0" : "the latest Unicode") + ", " +
171
            (((unicodeVer & NormalizerImpl.BEFORE_PRI_29) != 0) ?
172
                "with" : "without") + " PRI #29 fix");
173

174
        int lineNo = 0;
175
        String text;
176
        String[] columns = new String[6];
177
        boolean part1test = false;
178

179
        while ((text = in.readLine()) != null) {
180
            lineNo ++;
181

182
            char c = text.charAt(0);
183
            if (c == '#') {
184
                continue;
185
            } else if (c == '@') {
186
                if (text.startsWith("@Part")) {
187
                    System.out.println("# Testing data in " + text);
188

189
                    if (text.startsWith("@Part1 ")) {
190
                        part1test = true;
191
                    } else {
192
                        part1test = false;
193
                    }
194

195
                    continue;
196
                }
197
            }
198

199
            prepareColumns(columns, text, filename, lineNo, part1test);
200

201
            testNFC(columns, unicodeVer, filename, lineNo);
202
            testNFD(columns, unicodeVer, filename, lineNo);
203
            testNFKC(columns, unicodeVer, filename, lineNo);
204
            testNFKD(columns, unicodeVer, filename, lineNo);
205
        }
206

207
        in.close();
208
        fis.close();
209

210
        if (unicodeVer == UNICODE_LATEST) {
211
            System.out.println("# Testing characters which are not listed in Part1");
212
            testRemainingChars(filename, unicodeVer);
213
        }
214
    }
215

216
    /*
217
     * Test for NFC
218
     *
219
     *   c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
220
     *   c4 ==  NFC(c4) ==  NFC(c5)
221
     */
222
    private static void testNFC(String[] c, int unicodeVer,
223
                                String file, int line) throws Exception {
224
        test(2, c, 1, 3, NFC, unicodeVer, file, line);
225
        test(4, c, 4, 5, NFC, unicodeVer, file, line);
226
    }
227

228
    /*
229
     * Test for NFD
230
     *
231
     *   c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
232
     *   c5 ==  NFD(c4) ==  NFD(c5)
233
     */
234
    private static void testNFD(String[] c, int unicodeVer,
235
                                String file, int line) throws Exception {
236
        test(3, c, 1, 3, NFD, unicodeVer, file, line);
237
        test(5, c, 4, 5, NFD, unicodeVer, file, line);
238
    }
239

240
    /*
241
     * Test for NFKC
242
     *
243
     *   c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
244
     */
245
    private static void testNFKC(String[] c, int unicodeVer,
246
                                 String file, int line) throws Exception {
247
        test(4, c, 1, 5, NFKC, unicodeVer, file, line);
248
    }
249

250
    /*
251
     * Test for NFKD
252
     *
253
     *   c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
254
     */
255
    private static void testNFKD(String[] c, int unicodeVer,
256
                                 String file, int line) throws Exception {
257
        test(5, c, 1, 5, NFKD, unicodeVer, file, line);
258
    }
259

260
    /*
261
     * Test for characters which aren't listed in Part1
262
     *
263
     *   X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
264
     */
265
    private static void testRemainingChars(String file,
266
                                           int unicodeVer) throws Exception {
267
        for (int i = Character.MIN_CODE_POINT;
268
             i <= Character.MAX_CODE_POINT;
269
             i++) {
270
            if (!charList.get(i)) {
271
                String from = String.valueOf(Character.toChars(i));
272
                String to;
273

274
                for (int j = 0; j < modes.length; j++) {
275
                    Normalizer.Mode mode = modes[j];
276

277
                    to = Normalizer.normalize(from, mode, unicodeVer);
278
                    if (!from.equals(to)) {
279
                        error(mode, from, from, to, file, -1);
280
//                  } else {
281
//                      okay(mode, from, from, to, file, -1);
282
                    }
283

284
                    if (!Normalizer.isNormalized(from, mode, unicodeVer)) {
285
                        error(mode, from, file, -1);
286
//                  } else {
287
//                      okay(mode, from, file, -1);
288
                    }
289
                }
290
            }
291
        }
292
    }
293

294
    /*
295
     * Test normalize() and isNormalized()
296
     */
297
    private static void test(int col, String[] c,
298
                             int FROM, int TO,
299
                             Normalizer.Mode mode, int unicodeVer,
300
                             String file, int line) throws Exception {
301
        for (int i = FROM; i <= TO; i++) {
302
            String got = Normalizer.normalize(c[i], mode, unicodeVer);
303
            if (!c[col].equals(got)) {
304
                error(mode, c[i], c[col], got, file, line);
305
//          } else {
306
//              okay(mode, c[i], c[col], got, file, line);
307
            }
308

309
            /*
310
             * If the original String equals its normalized String, it means
311
             * that the original String is normalizerd. Thus, isNormalized()
312
             * should return true. And, vice versa!
313
             */
314
            if (c[col].equals(c[i])) {
315
                if (!Normalizer.isNormalized(c[i], mode, unicodeVer)) {
316
                    error(mode, c[i], file, line);
317
//              } else {
318
//                  okay(mode, c[i], file, line);
319
                }
320
            } else {
321
                if (Normalizer.isNormalized(c[i], mode, unicodeVer)) {
322
                    error(mode, c[i], file, line);
323
//              } else {
324
//                  okay(mode, c[i], file, line);
325
                }
326
            }
327
        }
328
    }
329

330
    /*
331
     * Generate an array of String from a line of conformance datafile.
332
     */
333
    private static void prepareColumns(String[] col, String text,
334
                                       String file, int line,
335
                                       boolean part1test) throws Exception {
336
        int index = text.indexOf('#');
337
        if (index != -1) {
338
            text = text.substring(0, index);
339
        }
340

341
        StringTokenizer st = new StringTokenizer(text, ";");
342
        int tokenCount = st.countTokens();
343
        if (tokenCount < 5) {
344
             throw new RuntimeException("# of tokens in datafile should be 6, but got: " + tokenCount + " at line " + line + " in " + file);
345
        }
346

347
        StringBuffer sb = new StringBuffer();
348
        for (int i = 1; i <= 5; i++) {
349
            StringTokenizer tst = new StringTokenizer(st.nextToken(), " ");
350

351
            while (tst.hasMoreTokens()) {
352
                int code = Integer.parseInt(tst.nextToken(), 16);
353
                sb.append(Character.toChars(code));
354
            }
355

356
            col[i] = sb.toString();
357
            sb.setLength(0);
358
        }
359

360
        if (part1test) {
361
            charList.set(col[1].codePointAt(0));
362
        }
363
    }
364

365
    /*
366
     * Show an error message when normalize() didn't return the expected value.
367
     * (An exception is sometimes convenient. Therefore, it is commented out
368
     * for the moment.)
369
     */
370
    private static void error(Normalizer.Mode mode,
371
                              String from, String to, String got,
372
                              String file, int line) throws Exception {
373
        System.err.println("\t" + toString(mode) + ": normalize(" +
374
            toHexString(from) + ") doesn't equal <" + toHexString(to) +
375
            "> at line " + line + " in " + file + ". Got <" +
376
            toHexString(got) + ">.");
377
//      throw new RuntimeException("Normalization(" + toString(mode) + ") failed");
378
    }
379

380
    /*
381
     * Show an error message when isNormalize() didn't return the expected value.
382
     * (An exception is sometimes convenient. Therefore, it is commented out
383
     * for the moment.)
384
     */
385
    private static void error(Normalizer.Mode mode, String orig,
386
                              String file, int line) throws Exception {
387
        System.err.println("\t" + toString(mode) + ": isNormalized(" +
388
            toHexString(orig) + ") returned the wrong value at line " + line +
389
            " in " + file + ".");
390
//      throw new RuntimeException("Normalization(" + toString(mode) +") failed");
391
    }
392

393
    /*
394
     * (For debugging)
395
     * Shows a message when normalize() returned the expected value.
396
     */
397
    private static void okay(Normalizer.Mode mode,
398
                             String from, String to, String got,
399
                             String file, int line) {
400
        System.out.println("\t" + toString(mode) + ": normalize(" +
401
            toHexString(from) + ") equals <" + toHexString(to) +
402
            "> at line " + line + " in " + file + ". Got <" +
403
            toHexString(got) + ">.");
404
    }
405

406
    /*
407
     * (For debugging)
408
     * Shows a message when isNormalized() returned the expected value.
409
     */
410
    private static void okay(Normalizer.Mode mode, String orig,
411
                             String file, int line) {
412
        System.out.println("\t" + toString(mode) + ": isNormalized(" +
413
            toHexString(orig) + ") returned the correct value at line " +
414
            line + " in " + file + ".");
415
    }
416

417
    /*
418
     * Returns a spece-delimited hex String
419
     */
420
    private static String toHexString(String s) {
421
        StringBuffer sb = new StringBuffer(" ");
422

423
        for (int i = 0; i < s.length(); i++) {
424
            sb.append(Integer.toHexString(s.charAt(i)));
425
            sb.append(' ');
426
        }
427

428
        return sb.toString();
429
    }
430

431
   /*
432
    * Returns the name of Normalizer.Mode
433
    */
434
    private static String toString(Normalizer.Mode mode) {
435
        if (mode == Normalizer.NFC) {
436
            return "NFC";
437
        } else if (mode == Normalizer.NFD) {
438
            return "NFD";
439
        } else if (mode == Normalizer.NFKC) {
440
            return "NFKC";
441
        } else if (mode == Normalizer.NFKD) {
442
            return "NFKD";
443
        }
444

445
        return "unknown";
446
    }
447
}
448

449
Product

Resources

Company