Path: blob/master/test/jdk/java/text/Normalizer/ConformanceTest.java
41149 views
/*1* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*/22/*23* @test24* @bug 4221795 6565620 6959267 7070436 7198195 8032446 8174270 8221431 823938325* @summary Confirm Normalizer's fundamental behavior26* @library /lib/testlibrary/java/lang27* @modules java.base/sun.text java.base/jdk.internal.icu.text28* @compile -XDignore.symbol.file ConformanceTest.java29* @run main/timeout=3000 ConformanceTest30*/3132import java.io.BufferedReader;33import java.io.File;34import java.io.FileInputStream;35import java.io.InputStreamReader;36import java.nio.charset.Charset;37import java.nio.charset.CharsetDecoder;38import java.util.BitSet;39import java.util.StringTokenizer;4041import jdk.internal.icu.text.NormalizerBase;4243/*44* Conformance test for java.text.Normalizer and sun.text.Normalizer.45*/46public class ConformanceTest {4748//49// Options to be used with sun.text.Normalizer50//5152/*53* Default Unicode 3.2.0 normalization. (Provided for IDNA/StringPrep)54*55* - Without Corrigendum 4 fix56* (Different from ICU4J 3.2's Normalizer.)57* - Without Public Review Issue #29 fix58* (Different from ICU4J 3.2's Normalizer.)59*/60private static final int UNICODE_3_2_0 = sun.text.Normalizer.UNICODE_3_2;6162/*63* Original Unicode 3.2.0 normalization. (Provided for testing only)64*65* - With Corrigendum 4 fix66* - With Public Revilew Issue #29 fix67*/68private static final int UNICODE_3_2_0_ORIGINAL =69NormalizerBase.UNICODE_3_2;7071/*72* Default normalization. In JDK 6,73* - Unicode 4.0.074* - With Corrigendum 4 fix75* - Without Public Review Issue #29 fix76*77* In JDK 7,78* - Unicode 5.1.079* (Different from ICU4J 3.2's Normalizer.)80* - With Corrigendum 4 fix81* - With Public Review Issue #29 fix82*83* In JDK 8,84* - Unicode 6.1.085* - With Corrigendum 4 fix86* - With Public Review Issue #29 fix87*88* When we support Unicode 4.1.0 or later, we need to do normalization89* with Public Review Issue #29 fix. For more details of PRI #29, see90* http://unicode.org/review/pr-29.html .91*/92private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST;9394//95// Conformance test datafiles96//9798/*99* Conformance test datafile for Unicode 3.2.0 with Corrigendum4100* corrections.101* This testdata is for sun.text.Normalize(UNICODE_3_2)102*103* This is NOT an original Conformace test data. Some inconvenient test104* cases are commented out. About corrigendum 4, please refer105* http://www.unicode.org/review/resolved-pri.html#pri29106*107*/108static final String DATA_3_2_0_CORRIGENDUM =109"NormalizationTest-3.2.0.Corrigendum4.txt";110111/*112* Conformance test datafile for Unicode 3.2.0 without Corrigendum4113* corrections. This is the original Conformace test data.114*115* This testdata is for sun.text.Normalize(UNICODE_3_2_IDNA)116*/117static final String DATA_3_2_0 = "NormalizationTest-3.2.0.txt";118119/*120* Conformance test datafile for the latest Unicode which is supported121* by J2SE.122* Unicode 4.0.0 is the latest version in JDK 5.0 and JDK 6. Unicode 5.1.0123* in JDK 7, and 6.1.0 in JDK 8. This Unicode can be used via both124* java.text.Normalizer and sun.text.Normalizer.125*126* This testdata is for sun.text.Normalize(UNICODE_LATEST)127*/128static final String DATA_LATEST = "NormalizationTest.txt";129130/*131* Conformance test datafile in ICU4J 3.2.132*/133static final String DATA_ICU = "ICUNormalizationTest.txt";134135/*136* Decorder137*/138static final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();139140/*141* List to pick up characters which are not listed in Part1142*/143static BitSet charList = new BitSet(Character.MAX_CODE_POINT+1);144145/*146* Shortcuts147*/148private static final java.text.Normalizer.Form NFC =149java.text.Normalizer.Form.NFC;150private static final java.text.Normalizer.Form NFD =151java.text.Normalizer.Form.NFD;152private static final java.text.Normalizer.Form NFKC =153java.text.Normalizer.Form.NFKC;154private static final java.text.Normalizer.Form NFKD =155java.text.Normalizer.Form.NFKD;156static final java.text.Normalizer.Form[] forms = {NFC, NFD, NFKC, NFKD};157158159static TestNormalizer normalizer;160161public static void main(String[] args) throws Exception {162ConformanceTest ct = new ConformanceTest();163ct.test();164}165166void test() throws Exception {167normalizer = new testJavaNormalizer();168test(DATA_LATEST, UNICODE_LATEST);169170normalizer = new testSunNormalizer();171test(DATA_3_2_0_CORRIGENDUM, UNICODE_3_2_0);172test(DATA_LATEST, UNICODE_LATEST);173test(DATA_ICU, UNICODE_LATEST);174175/* Unconformity test */176// test(DATA_3_2_0, UNICODE_LATEST);177// test(DATA_LATEST, UNICODE_3_2_0);178}179180/*181* Main routine of conformance test182*/183private static void test(String filename, int unicodeVer) throws Exception {184185File f = filename.equals(DATA_LATEST) ?186UCDFiles.NORMALIZATION_TEST.toFile() :187new File(System.getProperty("test.src", "."), filename);188FileInputStream fis = new FileInputStream(f);189BufferedReader in =190new BufferedReader(new InputStreamReader(fis, decoder));191192System.out.println("\nStart testing for " + normalizer.name +193" with " + filename + " for options: " +194(((unicodeVer & NormalizerBase.UNICODE_3_2) != 0) ?195"Unicode 3.2.0" : "the latest Unicode"));196197int lineNo = 0;198String text;199boolean part1test = false;200boolean part1testExists = false;201String[] columns = new String[6];202203while ((text = in.readLine()) != null) {204lineNo ++;205206char c = text.charAt(0);207if (c == '#') {208continue;209} else if (c == '@') {210if (text.startsWith("@Part")) {211System.out.println("# Testing data in " + text);212213if (text.startsWith("@Part1 ")) {214part1test = true;215part1testExists = true;216} else {217part1test = false;218}219220continue;221}222}223224prepareColumns(columns, text, filename, lineNo, part1test);225226testNFC(columns, unicodeVer, filename, lineNo);227testNFD(columns, unicodeVer, filename, lineNo);228testNFKC(columns, unicodeVer, filename, lineNo);229testNFKD(columns, unicodeVer, filename, lineNo);230}231232in.close();233fis.close();234235if (part1testExists) {236System.out.println("# Testing characters which are not listed in Part1");237testRemainingChars(filename, unicodeVer);238part1testExists = false;239}240}241242/*243* Test for NFC244*245* c2 == NFC(c1) == NFC(c2) == NFC(c3)246* c4 == NFC(c4) == NFC(c5)247*/248private static void testNFC(String[] c, int unicodeVer,249String file, int line) throws Exception {250test(2, c, 1, 3, NFC, unicodeVer, file, line);251test(4, c, 4, 5, NFC, unicodeVer, file, line);252}253254/*255* Test for NFD256*257* c3 == NFD(c1) == NFD(c2) == NFD(c3)258* c5 == NFD(c4) == NFD(c5)259*/260private static void testNFD(String[] c, int unicodeVer,261String file, int line) throws Exception {262test(3, c, 1, 3, NFD, unicodeVer, file, line);263test(5, c, 4, 5, NFD, unicodeVer, file, line);264}265266/*267* Test for NFKC268*269* c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)270*/271private static void testNFKC(String[] c, int unicodeVer,272String file, int line) throws Exception {273test(4, c, 1, 5, NFKC, unicodeVer, file, line);274}275276/*277* Test for NFKD278*279* c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)280*/281private static void testNFKD(String[] c, int unicodeVer,282String file, int line) throws Exception {283test(5, c, 1, 5, NFKD, unicodeVer, file, line);284}285286/*287* Test for characters which aren't listed in Part1288*289* X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)290*/291private static void testRemainingChars(String file,292int unicodeVer) throws Exception {293for (int i = Character.MIN_CODE_POINT;294i <= Character.MAX_CODE_POINT;295i++) {296if (!charList.get(i)) {297String from = String.valueOf(Character.toChars(i));298String to;299300for (int j = 0; j < forms.length; j++) {301java.text.Normalizer.Form form = forms[j];302303to = normalizer.normalize(from, form, unicodeVer);304if (!from.equals(to)) {305error(form, from, from, to, file, -1);306// } else {307// okay(form, from, from, to, file, -1);308}309310if (!normalizer.isNormalized(from, form, unicodeVer)) {311error(form, from, file, -1);312// } else {313// okay(form, from, file, -1);314}315}316}317}318}319320/*321* Test normalize() and isNormalized()322*/323private static void test(int col, String[] c,324int FROM, int TO,325java.text.Normalizer.Form form, int unicodeVer,326String file, int line) throws Exception {327for (int i = FROM; i <= TO; i++) {328String got = normalizer.normalize(c[i], form, unicodeVer);329if (!c[col].equals(got)) {330error(form, c[i], c[col], got, file, line);331// } else {332// okay(form, c[i], c[col], got, file, line);333}334335/*336* If the original String equals its normalized String, it means337* that the original String is normalizerd. Thus, isNormalized()338* should return true. And, vice versa!339*/340if (c[col].equals(c[i])) {341if (!normalizer.isNormalized(c[i], form, unicodeVer)) {342error(form, c[i], file, line);343// } else {344// okay(form, c[i], file, line);345}346} else {347if (normalizer.isNormalized(c[i], form, unicodeVer)) {348error(form, c[i], file, line);349// } else {350// okay(form, c[i], file, line);351}352}353}354}355356/*357* Generate an array of String from a line of conformance datafile.358*/359private static void prepareColumns(String[] cols, String text,360String file, int line,361boolean part1test) throws Exception {362int index = text.indexOf('#');363if (index != -1) {364text = text.substring(0, index);365}366367StringTokenizer st = new StringTokenizer(text, ";");368int tokenCount = st.countTokens();369if (tokenCount < 5) {370throw new RuntimeException("# of tokens in datafile should be 6, but got: " + tokenCount + " at line " + line + " in " + file);371}372373StringBuffer sb = new StringBuffer();374for (int i = 1; i <= 5; i++) {375StringTokenizer tst = new StringTokenizer(st.nextToken(), " ");376377while (tst.hasMoreTokens()) {378int code = Integer.parseInt(tst.nextToken(), 16);379sb.append(Character.toChars(code));380}381382cols[i] = sb.toString();383sb.setLength(0);384}385386if (part1test) {387charList.set(cols[1].codePointAt(0));388}389}390391/*392* Show an error message when normalize() didn't return the expected value.393* (An exception is sometimes convenient. Therefore, it is commented out394* for the moment.)395*/396private static void error(java.text.Normalizer.Form form,397String from, String to, String got,398String file, int line) throws Exception {399System.err.println("-\t" + form.toString() + ": normalize(" +400toHexString(from) + ") doesn't equal <" + toHexString(to) +401"> at line " + line + " in " + file + ". Got [" +402toHexString(got) + "]");403throw new RuntimeException("Normalization(" + form.toString() + ") failed");404}405406/*407* Show an error message when isNormalize() didn't return the expected408* value.409* (An exception is sometimes convenient. Therefore, it is commented out410* for the moment.)411*/412private static void error(java.text.Normalizer.Form form, String s,413String file, int line) throws Exception {414System.err.println("\t" + form.toString() + ": isNormalized(" +415toHexString(s) + ") returned the wrong value at line " + line +416" in " + file);417throw new RuntimeException("Normalization(" + form.toString() +") failed");418}419420/*421* (For debugging)422* Shows a message when normalize() returned the expected value.423*/424private static void okay(java.text.Normalizer.Form form,425String from, String to, String got,426String file, int line) {427System.out.println("\t" + form.toString() + ": normalize(" +428toHexString(from) + ") equals <" + toHexString(to) +429"> at line " + line + " in " + file + ". Got [" +430toHexString(got) + "]");431}432433/*434* (For debugging)435* Shows a message when isNormalized() returned the expected value.436*/437private static void okay(java.text.Normalizer.Form form, String s,438String file, int line) {439System.out.println("\t" + form.toString() + ": isNormalized(" +440toHexString(s) + ") returned the correct value at line " +441line + " in " + file);442}443444/*445* Returns a spece-delimited hex String446*/447private static String toHexString(String s) {448StringBuffer sb = new StringBuffer(" ");449450for (int i = 0; i < s.length(); i++) {451sb.append(Integer.toHexString(s.charAt(i)));452sb.append(' ');453}454455return sb.toString();456}457458/*459* Abstract class to call each Normalizer in java.text or sun.text.460*/461private abstract class TestNormalizer {462String name;463464TestNormalizer(String str) {465name = str;466}467468String getNormalizerName() {469return name;470}471472abstract String normalize(CharSequence cs,473java.text.Normalizer.Form form,474int option);475476abstract boolean isNormalized(CharSequence cs,477java.text.Normalizer.Form form,478int option);479}480481/*482* For java.text.Normalizer483* - normalize(CharSequence, Normalizer.Form)484* - isNormalized(CharSequence, Normalizer.Form)485*/486private class testJavaNormalizer extends TestNormalizer {487testJavaNormalizer() {488super("java.text.Normalizer");489}490491String normalize(CharSequence cs,492java.text.Normalizer.Form form,493int option) {494return java.text.Normalizer.normalize(cs, form);495}496497boolean isNormalized(CharSequence cs,498java.text.Normalizer.Form form,499int option) {500return java.text.Normalizer.isNormalized(cs, form);501}502}503504/*505* For sun.text.Normalizer506* - normalize(CharSequence, Normalizer.Form, int)507* - isNormalized(CharSequence, Normalizer.Form, int)508*/509private class testSunNormalizer extends TestNormalizer {510testSunNormalizer() {511super("sun.text.Normalizer");512}513514String normalize(CharSequence cs,515java.text.Normalizer.Form form,516int option) {517return sun.text.Normalizer.normalize(cs, form, option);518}519520boolean isNormalized(CharSequence cs,521java.text.Normalizer.Form form,522int option) {523return sun.text.Normalizer.isNormalized(cs, form, option);524}525}526}527528529