CoCalc -- ICUBasicTest.java

GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/test/jdk/java/text/Normalizer/ICUBasicTest.java
⁴¹¹⁴⁹ views
1
/*
2
 * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 */
23
/*
24
 * @test
25
 * @bug  4221795 8032446 8174270
26
 * @summary Confirm Normalizer's fundamental behavior. Imported from ICU4J 3.2's
27
 * src/com/ibm/icu/dev/test and modified.
28
 * @modules java.base/sun.text java.base/jdk.internal.icu.text
29
 * @library /java/text/testlib
30
 * @compile -XDignore.symbol.file ICUBasicTest.java
31
 * @run main/timeout=30 ICUBasicTest
32
 */
33

34
/*
35
 *******************************************************************************
36
 * Copyright (C) 1996-2004, International Business Machines Corporation and    *
37
 * others. All Rights Reserved.                                                *
38
 *******************************************************************************
39
 */
40

41
import sun.text.Normalizer;
42
import jdk.internal.icu.text.NormalizerBase;
43

44
import static java.text.Normalizer.Form.*;
45

46
public class ICUBasicTest extends IntlTest {
47

48
    public static void main(String[] args) throws Exception {
49
        new ICUBasicTest().run(args);
50
    }
51

52
    /*
53
     * Normalization modes
54
     */
55
    private static final NormalizerBase.Mode NFCmode  = NormalizerBase.NFC;
56
    private static final NormalizerBase.Mode NFDmode  = NormalizerBase.NFD;
57
    private static final NormalizerBase.Mode NFKCmode = NormalizerBase.NFKC;
58
    private static final NormalizerBase.Mode NFKDmode = NormalizerBase.NFKD;
59
    private static final NormalizerBase.Mode NONEmode = NormalizerBase.NONE;
60

61
    /*
62
     * Normalization options
63
     */
64

65
    /* Normal Unicode versions */
66
    private static final int UNICODE_3_2_0  = Normalizer.UNICODE_3_2;
67
    private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST;
68

69
    /*
70
     * Special cases for UAX #15 bug
71
     * see Unicode Public Review Issue #29
72
     * at http://www.unicode.org/review/resolved-pri.html#pri29
73
     *
74
     * Note:
75
     *   PRI #29 is supported in Unicode 4.1.0. Therefore, expected results are
76
     *   different for earlier Unicode versions.
77
     */
78
    public void TestComposition() {
79

80
        final TestCompositionCase cases[] = new TestCompositionCase[] {
81
            new TestCompositionCase(NFC, UNICODE_3_2_0,
82
                "\u1100\u0300\u1161\u0327",
83
                "\u1100\u0300\u1161\u0327"),
84
            new TestCompositionCase(NFC, UNICODE_LATEST,
85
                "\u1100\u0300\u1161\u0327",
86
                "\u1100\u0300\u1161\u0327"),
87

88
            new TestCompositionCase(NFC, UNICODE_3_2_0,
89
                "\u1100\u0300\u1161\u0327\u11a8",
90
                "\u1100\u0300\u1161\u0327\u11a8"),
91
            new TestCompositionCase(NFC, UNICODE_LATEST,
92
                "\u1100\u0300\u1161\u0327\u11a8",
93
                "\u1100\u0300\u1161\u0327\u11a8"),
94

95
            new TestCompositionCase(NFC, UNICODE_3_2_0,
96
                "\uac00\u0300\u0327\u11a8",
97
                "\uac00\u0327\u0300\u11a8"),
98
            new TestCompositionCase(NFC, UNICODE_LATEST,
99
                "\uac00\u0300\u0327\u11a8",
100
                "\uac00\u0327\u0300\u11a8"),
101

102
            new TestCompositionCase(NFC, UNICODE_3_2_0,
103
                "\u0b47\u0300\u0b3e",
104
                "\u0b47\u0300\u0b3e"),
105
            new TestCompositionCase(NFC, UNICODE_LATEST,
106
                "\u0b47\u0300\u0b3e",
107
                "\u0b47\u0300\u0b3e"),
108
        };
109

110
        String output;
111
        int i, length;
112

113
        for (i=0; i<cases.length; ++i) {
114
            output = Normalizer.normalize(cases[i].input,
115
                                          cases[i].form, cases[i].options);
116
            if (!output.equals(cases[i].expect)) {
117
                errln("unexpected result for case " + i + ". Expected="
118
                      + cases[i].expect + ", Actual=" + output);
119
            } else if (verbose) {
120
                logln("expected result for case " + i + ". Expected="
121
                      + cases[i].expect + ", Actual=" + output);
122
            }
123
        }
124
    }
125

126
    private final static class TestCompositionCase {
127
        public java.text.Normalizer.Form form;
128
        public int options;
129
        public String input, expect;
130

131
        TestCompositionCase(java.text.Normalizer.Form form,
132
                            int options,
133
                            String input,
134
                            String expect) {
135
            this.form    = form;
136
            this.options = options;
137
            this.input   = input;
138
            this.expect  = expect;
139
        }
140
    }
141

142
    /*
143
     * Added in order to detect a regression.
144
     */
145
    public void TestCombiningMarks() {
146
        String src      = "\u0f71\u0f72\u0f73\u0f74\u0f75";
147
        String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
148
        String result   = NormalizerBase.normalize(src, NFD);
149

150
        if (!expected.equals(result)) {
151
            errln("Reordering of combining marks failed. Expected: " +
152
                  toHexString(expected) + " Got: "+ toHexString(result));
153
        }
154
    }
155

156
    /*
157
     * Added in order to detect a regression.
158
     */
159
    public void TestBengali() throws Exception {
160
        String input = "\u09bc\u09be\u09cd\u09be";
161
        String output=NormalizerBase.normalize(input, NFC);
162

163
        if (!input.equals(output)) {
164
             errln("ERROR in NFC of string");
165
        }
166
        return;
167
    }
168

169

170
    /*
171
     * Added in order to detect a regression.
172
     */
173
    /**
174
     * Test for a problem found by Verisign.  Problem is that
175
     * characters at the start of a string are not put in canonical
176
     * order correctly by compose() if there is no starter.
177
     */
178
    public void TestVerisign() throws Exception {
179
        String[] inputs = {
180
            "\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
181
            "\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
182
        };
183
        String[] outputs = {
184
            "\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f",
185
            "\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4"
186
        };
187

188
        for (int i = 0; i < inputs.length; ++i) {
189
            String input = inputs[i];
190
            String output = outputs[i];
191

192
            String result = NormalizerBase.normalize(input, NFD);
193
            if (!result.equals(output)) {
194
                errln("FAIL input: " + toHexString(input) + "\n" +
195
                      " decompose: " + toHexString(result) + "\n" +
196
                      "  expected: " + toHexString(output));
197
            }
198

199
            result = NormalizerBase.normalize(input, NFC);
200
            if (!result.equals(output)) {
201
                errln("FAIL input: " + toHexString(input) + "\n" +
202
                      "   compose: " + toHexString(result) + "\n" +
203
                      "  expected: " + toHexString(output));
204
            }
205
        }
206
    }
207

208
    /**
209
     * Test for a problem that showed up just before ICU 1.6 release
210
     * having to do with combining characters with an index of zero.
211
     * Such characters do not participate in any canonical
212
     * decompositions.  However, having an index of zero means that
213
     * they all share one typeMask[] entry, that is, they all have to
214
     * map to the same canonical class, which is not the case, in
215
     * reality.
216
     */
217
    public void TestZeroIndex() throws Exception {
218
        String[] DATA = {
219
            // Expect col1 x COMPOSE_COMPAT => col2
220
            // Expect col2 x DECOMP => col3
221
            "A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",
222
            "A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300",
223
            "A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300",
224
            "c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327",
225
            "c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321",
226
        };
227

228
        for (int i=0; i<DATA.length; i+=3) {
229
            String a = DATA[i];
230
            String b = NormalizerBase.normalize(a, NFKC);
231
            String exp = DATA[i+1];
232

233
            if (b.equals(exp)) {
234
                logln("Ok: " + toHexString(a) + " x COMPOSE_COMPAT => " +
235
                      toHexString(b));
236
            } else {
237
                errln("FAIL: " + toHexString(a) + " x COMPOSE_COMPAT => " +
238
                      toHexString(b) + ", expect " + toHexString(exp));
239
            }
240

241
            a = NormalizerBase.normalize(b, NFD);
242
            exp = DATA[i+2];
243
            if (a.equals(exp)) {
244
                logln("Ok: " + toHexString(b) + " x DECOMP => " +
245
                      toHexString(a));
246
            } else {
247
                errln("FAIL: " + toHexString(b) + " x DECOMP => " +
248
                      toHexString(a) + ", expect " + toHexString(exp));
249
            }
250
        }
251
    }
252

253
    /**
254
     * Make sure characters in the CompositionExclusion.txt list do not get
255
     * composed to.
256
     */
257
    public void TestCompositionExclusion() throws Exception {
258
        // This list is generated from CompositionExclusion.txt.
259
        // Update whenever the normalizer tables are updated.  Note
260
        // that we test all characters listed, even those that can be
261
        // derived from the Unicode DB and are therefore commented
262
        // out.
263

264
        /*
265
         * kyuka's note:
266
         *   Original data seemed to be based on Unicode 3.0.0(the initial
267
         *   Composition Exclusions list) and seemed to have some mistakes.
268
         *   Updated in order to correct mistakes and to support Unicode 4.0.0.
269
         *   And, this table can be used also for Unicode 3.2.0.
270
         */
271
        String[][] EXCLUDED_UNICODE_3_2_0 = {
272
            {"\u0340"},
273
            {"\u0341"},
274
            {"\u0343"},
275
            {"\u0344"},
276
            {"\u0374"},
277
            {"\u037E"},
278
            {"\u0387"},
279
            {"\u0958"},
280
            {"\u0959", "\u095F"},
281
            {"\u09DC"},
282
            {"\u09DD"},
283
            {"\u09DF"},
284
            {"\u0A33"},
285
            {"\u0A36"},
286
            {"\u0A59", "\u0A5B"},
287
            {"\u0A5E"},
288
            {"\u0B5C"},
289
            {"\u0B5D"},
290
            {"\u0F43"},
291
            {"\u0F4D"},
292
            {"\u0F52"},
293
            {"\u0F57"},
294
            {"\u0F5C"},
295
            {"\u0F69"},
296
            {"\u0F73"},
297
            {"\u0F75"},
298
            {"\u0F76"},
299
            {"\u0F78"},
300
            {"\u0F81"},
301
            {"\u0F93"},
302
            {"\u0F9D"},
303
            {"\u0FA2"},
304
            {"\u0FA7"},
305
            {"\u0FAC"},
306
            {"\u0FB9"},
307
            {"\u1F71"},
308
            {"\u1F73"},
309
            {"\u1F75"},
310
            {"\u1F77"},
311
            {"\u1F79"},
312
            {"\u1F7B"},
313
            {"\u1F7D"},
314
            {"\u1FBB"},
315
            {"\u1FBE"},
316
            {"\u1FC9"},
317
            {"\u1FCB"},
318
            {"\u1FD3"},
319
            {"\u1FDB"},
320
            {"\u1FE3"},
321
            {"\u1FEB"},
322
            {"\u1FEE"},
323
            {"\u1FEF"},
324
            {"\u1FF9"},
325
            {"\u1FFB"},
326
            {"\u1FFD"},
327
            {"\u2000"},
328
            {"\u2001"},
329
            {"\u2126"},
330
            {"\u212A"},
331
            {"\u212B"},
332
            {"\u2329"},
333
            {"\u232A"},
334
            {"\u2ADC"},
335
            {"\uF900", "\uFA0D"},
336
            {"\uFA10"},
337
            {"\uFA12"},
338
            {"\uFA15", "\uFA1E"},
339
            {"\uFA20"},
340
            {"\uFA22"},
341
            {"\uFA25"},
342
            {"\uFA26"},
343
            {"\uFA2A", "\uFA2D"},
344
            {"\uFA30", "\uFA6A"},
345
            {"\uFB1D"},
346
            {"\uFB1F"},
347
            {"\uFB2A", "\uFB36"},
348
            {"\uFB38", "\uFB3C"},
349
            {"\uFB3E"},
350
            {"\uFB40"},
351
            {"\uFB41"},
352
            {"\uFB43"},
353
            {"\uFB44"},
354
            {"\uFB46", "\uFB4E"},
355
            {"\uD834\uDD5E", "\uD834\uDD64"},
356
            {"\uD834\uDDBB", "\uD834\uDDC0"},
357
            {"\uD87E\uDC00", "\uD87E\uDE1D"}
358
        };
359

360
        String[][] EXCLUDED_LATEST = {
361

362
        };
363

364
        for (int i = 0; i < EXCLUDED_UNICODE_3_2_0.length; ++i) {
365
            if (EXCLUDED_UNICODE_3_2_0[i].length == 1) {
366
                checkCompositionExclusion_320(EXCLUDED_UNICODE_3_2_0[i][0]);
367
            } else {
368
                int from, to;
369
                from = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][0], 0);
370
                to   = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][1], 0);
371

372
                for (int j = from; j <= to; j++) {
373
                    checkCompositionExclusion_320(String.valueOf(Character.toChars(j)));
374
                }
375
            }
376
        }
377
    }
378

379
    private void checkCompositionExclusion_320(String s) throws Exception {
380
        String a = String.valueOf(s);
381
        String b = NormalizerBase.normalize(a, NFKD);
382
        String c = NormalizerBase.normalize(b, NFC);
383

384
        if (c.equals(a)) {
385
            errln("FAIL: " + toHexString(a) + " x DECOMP_COMPAT => " +
386
                  toHexString(b) + " x COMPOSE => " +
387
                  toHexString(c) + " for the latest Unicode");
388
        } else if (verbose) {
389
            logln("Ok: " + toHexString(a) + " x DECOMP_COMPAT => " +
390
                  toHexString(b) + " x COMPOSE => " +
391
                  toHexString(c) + " for the latest Unicode");
392
        }
393

394
        b = NormalizerBase.normalize(a, NFKD, Normalizer.UNICODE_3_2);
395
        c = NormalizerBase.normalize(b, NFC, Normalizer.UNICODE_3_2);
396
        if (c.equals(a)) {
397
            errln("FAIL: " + toHexString(a) + " x DECOMP_COMPAT => " +
398
                  toHexString(b) + " x COMPOSE => " +
399
                  toHexString(c) + " for Unicode 3.2.0");
400
        } else if (verbose) {
401
            logln("Ok: " + toHexString(a) + " x DECOMP_COMPAT => " +
402
                  toHexString(b) + " x COMPOSE => " +
403
                  toHexString(c) + " for Unicode 3.2.0");
404
        }
405
    }
406

407
    public void TestTibetan() throws Exception {
408
        String[][] decomp = {
409
            { "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
410
        };
411
        String[][] compose = {
412
            { "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80" }
413
        };
414

415
        staticTest(NFD, decomp, 1);
416
        staticTest(NFKD,decomp, 2);
417
        staticTest(NFC, compose, 1);
418
        staticTest(NFKC,compose, 2);
419
    }
420

421
    public void TestExplodingBase() throws Exception{
422
        // \u017f - Latin small letter long s
423
        // \u0307 - combining dot above
424
        // \u1e61 - Latin small letter s with dot above
425
        // \u1e9b - Latin small letter long s with dot above
426
        String[][] canon = {
427
            // Input                Decomposed              Composed
428
            { "Tschu\u017f",        "Tschu\u017f",          "Tschu\u017f"    },
429
            { "Tschu\u1e9b",        "Tschu\u017f\u0307",    "Tschu\u1e9b"    },
430
        };
431
        String[][] compat = {
432
            // Input                Decomposed              Composed
433
            { "\u017f",             "s",                    "s"           },
434
            { "\u1e9b",             "s\u0307",              "\u1e61"      },
435
        };
436

437
        staticTest(NFD, canon,  1);
438
        staticTest(NFC, canon,  2);
439
        staticTest(NFKD, compat, 1);
440
        staticTest(NFKC, compat, 2);
441
    }
442

443
    private String[][] canonTests = {
444
        // Input                Decomposed              Composed
445

446
        { "cat",                "cat",                  "cat"               },
447
        { "\u00e0ardvark",      "a\u0300ardvark",       "\u00e0ardvark",    },
448

449
        // D-dot_above
450
        { "\u1e0a",             "D\u0307",              "\u1e0a"            },
451

452
        // D dot_above
453
        { "D\u0307",            "D\u0307",              "\u1e0a"            },
454

455
        // D-dot_below dot_above
456
        { "\u1e0c\u0307",       "D\u0323\u0307",        "\u1e0c\u0307"      },
457

458
        // D-dot_above dot_below
459
        { "\u1e0a\u0323",       "D\u0323\u0307",        "\u1e0c\u0307"      },
460

461
        // D dot_below dot_above
462
        { "D\u0307\u0323",      "D\u0323\u0307",        "\u1e0c\u0307"      },
463

464
        // D dot_below cedilla dot_above
465
        { "\u1e10\u0307\u0323", "D\u0327\u0323\u0307",  "\u1e10\u0323\u0307"},
466

467
        // D dot_above ogonek dot_below
468
        { "D\u0307\u0328\u0323","D\u0328\u0323\u0307",  "\u1e0c\u0328\u0307"},
469

470
        // E-macron-grave
471
        { "\u1E14",             "E\u0304\u0300",        "\u1E14"            },
472

473
        // E-macron + grave
474
        { "\u0112\u0300",       "E\u0304\u0300",        "\u1E14"            },
475

476
        // E-grave + macron
477
        { "\u00c8\u0304",       "E\u0300\u0304",        "\u00c8\u0304"      },
478

479
        // angstrom_sign
480
        { "\u212b",             "A\u030a",              "\u00c5"            },
481

482
        // A-ring
483
        { "\u00c5",             "A\u030a",              "\u00c5"            },
484
        { "\u00c4ffin",         "A\u0308ffin",          "\u00c4ffin"        },
485
        { "\u00c4\uFB03n",      "A\u0308\uFB03n",       "\u00c4\uFB03n"     },
486

487
        //updated with 3.0
488
        { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },
489
        { "\u00fd\uFB03n",      "y\u0301\uFB03n",       "\u00fd\uFB03n"     },
490

491
        { "Henry IV",           "Henry IV",             "Henry IV"          },
492
        { "Henry \u2163",       "Henry \u2163",         "Henry \u2163"      },
493

494
        // ga(Zenkaku-Katakana)
495
        { "\u30AC",             "\u30AB\u3099",         "\u30AC"            },
496

497
        // ka(Zenkaku-Katakana) + ten(Zenkaku)
498
        { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            },
499

500
        // ka(Hankaku-Katakana) + ten(Hankaku-Katakana)
501
        { "\uFF76\uFF9E",       "\uFF76\uFF9E",         "\uFF76\uFF9E"      },
502

503
        // ka(Zenkaku-Katakana) + ten(Hankaku)
504
        { "\u30AB\uFF9E",       "\u30AB\uFF9E",         "\u30AB\uFF9E"      },
505
        // ka(Hankaku-Katakana) + ten(Zenkaku)
506
        { "\uFF76\u3099",       "\uFF76\u3099",         "\uFF76\u3099"      },
507

508
        { "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
509

510
        { "\ud834\udd5e\ud834\udd57\ud834\udd65\ud834\udd5e",
511
          "\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65",
512
          "\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65" },
513
    };
514

515
    private String[][] compatTests = {
516
        // Input                Decomposed              Composed
517

518
        { "cat",                 "cat",                     "cat"           },
519

520
        // Alef-Lamed vs. Alef, Lamed
521
        { "\uFB4f",             "\u05D0\u05DC",         "\u05D0\u05DC",     },
522

523
        { "\u00C4ffin",         "A\u0308ffin",          "\u00C4ffin"        },
524

525
        // ffi ligature -> f + f + i
526
        { "\u00C4\uFB03n",      "A\u0308ffin",          "\u00C4ffin"        },
527

528
        //updated for 3.0
529
        { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },
530

531
        // ffi ligature -> f + f + i
532
        { "\u00fd\uFB03n",      "y\u0301ffin",          "\u00fdffin"        },
533

534
        { "Henry IV",           "Henry IV",             "Henry IV"          },
535
        { "Henry \u2163",       "Henry IV",             "Henry IV"          },
536

537
        // ga(Zenkaku-Katakana)
538
        { "\u30AC",             "\u30AB\u3099",         "\u30AC"            },
539

540
        // ka(Zenkaku-Katakana) + ten(Zenkaku)
541
        { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            },
542

543
        // ka(Hankaku-Katakana) + ten(Zenkaku)
544
        { "\uFF76\u3099",       "\u30AB\u3099",         "\u30AC"            },
545

546
        /* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
547
        // ka(Hankaku-Katakana) + ten(Hankaku)
548
        { "\uFF76\uFF9E",       "\u30AB\u3099",         "\u30AC"            },
549

550
        // ka(Zenkaku-Katakana) + ten(Hankaku)
551
        { "\u30AB\uFF9E",       "\u30AB\u3099",         "\u30AC"            },
552
    };
553

554
    public void TestNFD() throws Exception{
555
        staticTest(NFD, canonTests, 1);
556
    }
557

558
    public void TestNFC() throws Exception{
559
        staticTest(NFC, canonTests, 2);
560
    }
561

562
    public void TestNFKD() throws Exception{
563
        staticTest(NFKD, compatTests, 1);
564
    }
565

566
    public void TestNFKC() throws Exception{
567
        staticTest(NFKC, compatTests, 2);
568
    }
569

570
    private void staticTest(java.text.Normalizer.Form form,
571
                            String[][] tests,
572
                            int outCol) throws Exception {
573
        for (int i = 0; i < tests.length; i++) {
574
            String input = tests[i][0];
575
            logln("Normalizing '" + input + "' (" + toHexString(input) + ")" );
576

577
            String expect =tests[i][outCol];
578
            String output = java.text.Normalizer.normalize(input, form);
579

580
            if (!output.equals(expect)) {
581
                errln("FAIL: case " + i
582
                    + " expected '" + expect + "' (" + toHexString(expect) + ")"
583
                    + " but got '" + output + "' (" + toHexString(output) + ")"
584
);
585
            }
586
        }
587
    }
588

589
    // With Canonical decomposition, Hangul syllables should get decomposed
590
    // into Jamo, but Jamo characters should not be decomposed into
591
    // conjoining Jamo
592
    private String[][] hangulCanon = {
593
        // Input                Decomposed              Composed
594
        { "\ud4db",             "\u1111\u1171\u11b6",   "\ud4db"        },
595
        { "\u1111\u1171\u11b6", "\u1111\u1171\u11b6",   "\ud4db"        },
596
    };
597

598
    public void TestHangulCompose() throws Exception{
599
        logln("Canonical composition...");
600
        staticTest(NFC, hangulCanon,  2);
601
     }
602

603
    public void TestHangulDecomp() throws Exception{
604
        logln("Canonical decomposition...");
605
        staticTest(NFD, hangulCanon, 1);
606
    }
607

608
}
609

610
Product

Resources

Company