Path: blob/master/test/jdk/sun/nio/cs/NIOJISAutoDetectTest.java
41149 views
/*1* Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*/2223/*24* @test25* @bug 4831163 5053096 5056440 802222426* @summary NIO charset basic verification of JISAutodetect decoder27* @modules jdk.charsets28* @author Martin Buchholz29*/3031import java.io.*;32import java.nio.ByteBuffer;33import java.nio.CharBuffer;34import java.nio.charset.Charset;35import java.nio.charset.CharsetDecoder;36import java.nio.charset.CoderResult;37import static java.lang.System.*;3839public class NIOJISAutoDetectTest {40private static int failures = 0;4142private static void fail(String failureMsg) {43System.out.println(failureMsg);44failures++;45}4647private static void check(boolean cond, String msg) {48if (!cond) {49fail("test failed: " + msg);50new Exception().printStackTrace();51}52}5354private static String SJISName() throws Exception {55return detectingCharset(new byte[] {(byte)0xbb, (byte)0xdd,56(byte)0xcf, (byte)0xb2});57}5859private static String EUCJName() throws Exception {60return detectingCharset(new byte[] {(byte)0xa4, (byte)0xd2,61(byte)0xa4, (byte)0xe9});62}6364private static String detectingCharset(byte[] bytes) throws Exception {65//----------------------------------------------------------------66// Test special public methods of CharsetDecoder while we're here67//----------------------------------------------------------------68CharsetDecoder cd = Charset.forName("JISAutodetect").newDecoder();69check(cd.isAutoDetecting(), "isAutodecting()");70check(! cd.isCharsetDetected(), "isCharsetDetected");71cd.decode(ByteBuffer.wrap(new byte[] {(byte)'A'}));72check(! cd.isCharsetDetected(), "isCharsetDetected");73try {74cd.detectedCharset();75fail("no IllegalStateException");76} catch (IllegalStateException e) {}77cd.decode(ByteBuffer.wrap(bytes));78check(cd.isCharsetDetected(), "isCharsetDetected");79Charset cs = cd.detectedCharset();80check(cs != null, "cs != null");81check(! cs.newDecoder().isAutoDetecting(), "isAutodetecting()");82return cs.name();83}8485public static void main(String[] argv) throws Exception {86//----------------------------------------------------------------87// Used to throw BufferOverflowException88//----------------------------------------------------------------89out.println(new String(new byte[] {0x61}, "JISAutoDetect"));9091//----------------------------------------------------------------92// InputStreamReader(...JISAutoDetect) used to infloop93//----------------------------------------------------------------94{95byte[] bytes = "ABCD\n".getBytes();96ByteArrayInputStream bais = new ByteArrayInputStream(bytes);97InputStreamReader isr = new InputStreamReader(bais, "JISAutoDetect");98BufferedReader reader = new BufferedReader(isr);99check (reader.readLine().equals("ABCD"), "first read gets text");100// used to return "ABCD" on second and subsequent reads101check (reader.readLine() == null, "second read gets null");102}103104//----------------------------------------------------------------105// Check all Japanese chars for sanity106//----------------------------------------------------------------107String SJIS = SJISName();108String EUCJ = EUCJName();109out.printf("SJIS charset is %s%n", SJIS);110out.printf("EUCJ charset is %s%n", EUCJ);111112int cnt2022 = 0;113int cnteucj = 0;114int cntsjis = 0;115int cntBAD = 0;116for (char c = '\u0000'; c < '\uffff'; c++) {117if (c == '\u001b' || // ESC118c == '\u2014') // Em-Dash?119continue;120String s = new String (new char[] {c});121122//----------------------------------------------------------------123// JISAutoDetect can handle all chars that EUC-JP can,124// unless there is an ambiguity with SJIS.125//----------------------------------------------------------------126byte[] beucj = s.getBytes(EUCJ);127String seucj = new String(beucj, EUCJ);128if (seucj.equals(s)) {129cnteucj++;130String sauto = new String(beucj, "JISAutoDetect");131132if (! sauto.equals(seucj)) {133cntBAD++;134String ssjis = new String(beucj, SJIS);135if (! sauto.equals(ssjis)) {136fail("Autodetection agrees with neither EUC nor SJIS");137}138}139} else140continue; // Optimization141142//----------------------------------------------------------------143// JISAutoDetect can handle all chars that ISO-2022-JP can.144//----------------------------------------------------------------145byte[] b2022 = s.getBytes("ISO-2022-JP");146if (new String(b2022, "ISO-2022-JP").equals(s)) {147cnt2022++;148check(new String(b2022,"JISAutoDetect").equals(s),149"ISO2022 autodetection");150}151152//----------------------------------------------------------------153// JISAutoDetect can handle almost all chars that SJIS can.154//----------------------------------------------------------------155byte[] bsjis = s.getBytes(SJIS);156if (new String(bsjis, SJIS).equals(s)) {157cntsjis++;158check(new String(bsjis,"JISAutoDetect").equals(s),159"SJIS autodetection");160}161}162out.printf("There are %d ISO-2022-JP-encodable characters.%n", cnt2022);163out.printf("There are %d SJIS-encodable characters.%n", cntsjis);164out.printf("There are %d EUC-JP-encodable characters.%n", cnteucj);165out.printf("There are %d characters that are " +166"misdetected as SJIS after being EUC-encoded.%n", cntBAD);167168169//----------------------------------------------------------------170// tests for specific byte sequences171//----------------------------------------------------------------172test("ISO-2022-JP", new byte[] {'A', 'B', 'C'});173test("EUC-JP", new byte[] {'A', 'B', 'C'});174test("SJIS", new byte[] {'A', 'B', 'C'});175176test("SJIS",177new byte[] { 'C', 'o', 'p', 'y', 'r', 'i', 'g', 'h', 't',178' ', (byte)0xa9, ' ', '1', '9', '9', '8' });179180test("SJIS",181new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,182(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,183(byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde,184(byte)0x82, (byte)0xc5, (byte)0x82, (byte)0xb7 });185186test("EUC-JP",187new byte[] { (byte)0xa4, (byte)0xd2, (byte)0xa4, (byte)0xe9,188(byte)0xa4, (byte)0xac, (byte)0xa4, (byte)0xca });189190test("SJIS",191new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,192(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,193(byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde});194195test("SJIS",196new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,197(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,198(byte)0xc3, (byte)0xd1, (byte)0xbd });199200test("SJIS",201new byte[] { (byte)0x8f, (byte)0xa1, (byte)0xaa });202203test("EUC-JP",204new byte[] { (byte)0x8f, (byte)0xc5, (byte)0xe0, (byte)0x20});205206test("EUC-JP",207new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,208(byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,209(byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde,210(byte)0xa4, (byte)0xc7, (byte)0xa4, (byte)0xb9 });211212test("ISO-2022-JP",213new byte[] { 0x1b, '$', 'B', '#', '4', '$', '5', 0x1b, '(', 'B' });214215216//----------------------------------------------------------------217// Check handling of ambiguous end-of-input in middle of first char218//----------------------------------------------------------------219{220CharsetDecoder dc = Charset.forName("x-JISAutoDetect").newDecoder();221ByteBuffer bb = ByteBuffer.allocate(128);222CharBuffer cb = CharBuffer.allocate(128);223bb.put((byte)'A').put((byte)0x8f);224bb.flip();225CoderResult res = dc.decode(bb,cb,false);226check(res.isUnderflow(), "isUnderflow");227check(bb.position() == 1, "bb.position()");228check(cb.position() == 1, "cb.position()");229res = dc.decode(bb,cb,false);230check(res.isUnderflow(), "isUnderflow");231check(bb.position() == 1, "bb.position()");232check(cb.position() == 1, "cb.position()");233bb.compact();234bb.put((byte)0xa1);235bb.flip();236res = dc.decode(bb,cb,true);237check(res.isUnderflow(), "isUnderflow");238check(bb.position() == 2, "bb.position()");239check(cb.position() == 2, "cb.position()");240}241242// test #8022224243Charset cs = Charset.forName("x-JISAutoDetect");244ByteBuffer bb = ByteBuffer.wrap(new byte[] { 'a', 0x1b, 0x24, 0x40 });245CharBuffer cb = CharBuffer.wrap(new char[10]);246CoderResult cr = cs.newDecoder().decode(bb, cb, false);247bb.rewind();248cb.clear().limit(1);249check(cr == cs.newDecoder().decode(bb, cb, false), "#8022224");250251if (failures > 0)252throw new RuntimeException(failures + " tests failed");253}254255static void checkCoderResult(CoderResult result) {256check(result.isUnderflow(),257"Unexpected coder result: " + result);258}259260static void test(String expectedCharset, byte[] input) throws Exception {261Charset cs = Charset.forName("x-JISAutoDetect");262CharsetDecoder autoDetect = cs.newDecoder();263264Charset cs2 = Charset.forName(expectedCharset);265CharsetDecoder decoder = cs2.newDecoder();266267ByteBuffer bb = ByteBuffer.allocate(128);268CharBuffer charOutput = CharBuffer.allocate(128);269CharBuffer charExpected = CharBuffer.allocate(128);270271bb.put(input);272bb.flip();273bb.mark();274275CoderResult result = autoDetect.decode(bb, charOutput, true);276checkCoderResult(result);277charOutput.flip();278String actual = charOutput.toString();279280bb.reset();281282result = decoder.decode(bb, charExpected, true);283checkCoderResult(result);284charExpected.flip();285String expected = charExpected.toString();286287check(actual.equals(expected),288String.format("actual=%s expected=%s", actual, expected));289}290}291292293