Path: blob/master/test/jdk/sun/nio/cs/ISCIITest.java
41149 views
/*1* Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*/2223/* @test24* @bug 432817825* @summary Performs baseline and regression test on the ISCII91 charset26* @modules jdk.charsets27*/2829import java.io.*;3031public class ISCIITest {3233private static void failureReport() {34System.err.println ("Failed ISCII91 Regression Test");35}3637private static void mapEquiv(int start,38int end,39String testName)40throws Exception41{42byte[] singleByte = new byte[1];43byte[] encoded = new byte[1];4445for (int i = start; i <= end; i++ ) {46singleByte[0] = (byte) i;47try {48String unicodeStr =49new String (singleByte,"ISCII91");5051if (i != (int)unicodeStr.charAt(0)) {52System.err.println ("FAILED ISCII91 Regression test"53+ "input byte is " + i );54throw new Exception("");55}56encoded = unicodeStr.getBytes("ISCII91");5758if (encoded[0] != singleByte[0]) {59System.err.println("Encoding error " + testName);60throw new Exception("Failed ISCII91 Regression test");61}6263} catch (UnsupportedEncodingException e) {64failureReport();65}66}67return;68}6970private static void checkUnmapped(int start,71int end,72String testName)73throws Exception {7475byte[] singleByte = new byte[1];7677for (int i = start; i <= end; i++ ) {78singleByte[0] = (byte) i;79try {80String unicodeStr = new String (singleByte, "ISCII91");8182if (unicodeStr.charAt(0) != '\uFFFD') {83System.err.println("FAILED " + testName +84"input byte is " + i );85throw new Exception ("Failed ISCII91 regression test");86}87} catch (UnsupportedEncodingException e) {88System.err.println("Unsupported character encoding");89}90}91return;92}9394/*95*96*/97private static void checkRange(int start, int end,98char[] expectChars,99String testName)100throws Exception {101byte[] singleByte = new byte[1];102byte[] encoded = new byte[1];103int lookupOffset = 0;104105for (int i=start; i <= end; i++ ) {106singleByte[0] = (byte) i;107String unicodeStr = new String (singleByte, "ISCII91");108if (unicodeStr.charAt(0) != expectChars[lookupOffset++]) {109throw new Exception ("Failed ISCII91 Regression Test");110}111encoded = unicodeStr.getBytes("ISCII");112}113return;114}115116/*117* Tests the ISCII91 Indic character encoding118* as per IS 13194:1991 Bureau of Indian Standards.119*/120121private static void test () throws Exception {122123try {124125126// ISCII91 is an 8-byte encoding which retains the ASCII127// mappings in the lower half.128129mapEquiv(0, 0x7f, "7 bit ASCII range");130131// Checks a range of characters which are unmappable according132// to the standards.133134checkUnmapped(0x81, 0x9f, "UNMAPPED");135136// Vowel Modifier chars can be used to modify the vowel137// sound of the preceding consonant, vowel or matra character.138139byte[] testByte = new byte[1];140char[] vowelModChars = {141'\u0901', // Vowel modifier Chandrabindu142'\u0902', // Vowel modifier Anuswar143'\u0903' // Vowel modifier Visarg144};145146checkRange(0xa1, 0xa3, vowelModChars, "INDIC VOWEL MODIFIER CHARS");147148char[] expectChars = {149'\u0905', // a4 -- Vowel A150'\u0906', // a5 -- Vowel AA151'\u0907', // a6 -- Vowel I152'\u0908', // a7 -- Vowel II153'\u0909', // a8 -- Vowel U154'\u090a', // a9 -- Vowel UU155'\u090b', // aa -- Vowel RI156'\u090e', // ab -- Vowel E ( Southern Scripts )157'\u090f', // ac -- Vowel EY158'\u0910', // ad -- Vowel AI159'\u090d', // ae -- Vowel AYE ( Devanagari Script )160'\u0912', // af -- Vowel O ( Southern Scripts )161'\u0913', // b0 -- Vowel OW162'\u0914', // b1 -- Vowel AU163'\u0911', // b2 -- Vowel AWE ( Devanagari Script )164};165166checkRange(0xa4, 0xb2, expectChars, "INDIC VOWELS");167168char[] expectConsChars =169{170'\u0915', // b3 -- Consonant KA171'\u0916', // b4 -- Consonant KHA172'\u0917', // b5 -- Consonant GA173'\u0918', // b6 -- Consonant GHA174'\u0919', // b7 -- Consonant NGA175'\u091a', // b8 -- Consonant CHA176'\u091b', // b9 -- Consonant CHHA177'\u091c', // ba -- Consonant JA178'\u091d', // bb -- Consonant JHA179'\u091e', // bc -- Consonant JNA180'\u091f', // bd -- Consonant Hard TA181'\u0920', // be -- Consonant Hard THA182'\u0921', // bf -- Consonant Hard DA183'\u0922', // c0 -- Consonant Hard DHA184'\u0923', // c1 -- Consonant Hard NA185'\u0924', // c2 -- Consonant Soft TA186'\u0925', // c3 -- Consonant Soft THA187'\u0926', // c4 -- Consonant Soft DA188'\u0927', // c5 -- Consonant Soft DHA189'\u0928', // c6 -- Consonant Soft NA190'\u0929', // c7 -- Consonant NA ( Tamil )191'\u092a', // c8 -- Consonant PA192'\u092b', // c9 -- Consonant PHA193'\u092c', // ca -- Consonant BA194'\u092d', // cb -- Consonant BHA195'\u092e', // cc -- Consonant MA196'\u092f', // cd -- Consonant YA197'\u095f', // ce -- Consonant JYA ( Bengali, Assamese & Oriya )198'\u0930', // cf -- Consonant RA199'\u0931', // d0 -- Consonant Hard RA ( Southern Scripts )200'\u0932', // d1 -- Consonant LA201'\u0933', // d2 -- Consonant Hard LA202'\u0934', // d3 -- Consonant ZHA ( Tamil & Malayalam )203'\u0935', // d4 -- Consonant VA204'\u0936', // d5 -- Consonant SHA205'\u0937', // d6 -- Consonant Hard SHA206'\u0938', // d7 -- Consonant SA207'\u0939', // d8 -- Consonant HA208};209210checkRange(0xb3, 0xd8, expectConsChars, "INDIC CONSONANTS");211212char[] matraChars = {213'\u093e', // da -- Vowel Sign AA214'\u093f', // db -- Vowel Sign I215'\u0940', // dc -- Vowel Sign II216'\u0941', // dd -- Vowel Sign U217'\u0942', // de -- Vowel Sign UU218'\u0943', // df -- Vowel Sign RI219'\u0946', // e0 -- Vowel Sign E ( Southern Scripts )220'\u0947', // e1 -- Vowel Sign EY221'\u0948', // e2 -- Vowel Sign AI222'\u0945', // e3 -- Vowel Sign AYE ( Devanagari Script )223'\u094a', // e4 -- Vowel Sign O ( Southern Scripts )224'\u094b', // e5 -- Vowel Sign OW225'\u094c', // e6 -- Vowel Sign AU226'\u0949' // e7 -- Vowel Sign AWE ( Devanagari Script )227};228229// Matras or Vowel signs alter the implicit230// vowel sound associated with an Indic consonant.231232checkRange(0xda, 0xe7, matraChars, "INDIC MATRAS");233234char[] loneContextModifierChars = {235'\u094d', // e8 -- Vowel Omission Sign ( Halant )236'\u093c', // e9 -- Diacritic Sign ( Nukta )237'\u0964' // ea -- Full Stop ( Viram, Northern Scripts )238};239240checkRange(0xe8, 0xea,241loneContextModifierChars, "LONE INDIC CONTEXT CHARS");242243244// Test Indic script numeral chars245// (as opposed to international numerals)246247char[] expectNumeralChars =248{249'\u0966', // f1 -- Digit 0250'\u0967', // f2 -- Digit 1251'\u0968', // f3 -- Digit 2252'\u0969', // f4 -- Digit 3253'\u096a', // f5 -- Digit 4254'\u096b', // f6 -- Digit 5255'\u096c', // f7 -- Digit 6256'\u096d', // f8 -- Digit 7257'\u096e', // f9 -- Digit 8258'\u096f' // fa -- Digit 9259};260261checkRange(0xf1, 0xfa,262expectNumeralChars, "NUMERAL/DIGIT CHARACTERS");263int lookupOffset = 0;264265char[] expectNuktaSub = {266'\u0950',267'\u090c',268'\u0961',269'\u0960',270'\u0962',271'\u0963',272'\u0944',273'\u093d'274};275276/*277* ISCII uses a number of code extension techniques278* to access a number of lesser used characters.279* The Nukta character which ordinarily signifies280* a diacritic is used in combination with existing281* characters to escape them to a different character.282* value.283*/284285byte[] codeExtensionBytes = {286(byte)0xa1 , (byte)0xe9, // Chandrabindu + Nukta287// =>DEVANAGARI OM SIGN288(byte)0xa6 , (byte)0xe9, // Vowel I + Nukta289// => DEVANAGARI VOCALIC L290(byte)0xa7 , (byte)0xe9, // Vowel II + Nukta291// => DEVANAGARI VOCALIC LL292(byte)0xaa , (byte)0xe9, // Vowel RI + Nukta293// => DEVANAGARI VOCALIC RR294(byte)0xdb , (byte)0xe9, // Vowel sign I + Nukta295// => DEVANAGARI VOWEL SIGN VOCALIC L296(byte)0xdc , (byte)0xe9, // Vowel sign II + Nukta297// => DEVANAGARI VOWEL SIGN VOCALIC LL298299(byte)0xdf , (byte)0xe9, // Vowel sign Vocalic R + Nukta300// => DEVANAGARI VOWEL SIGN VOCALIC RR301(byte)0xea , (byte)0xe9 // Full stop/Phrase separator + Nukta302// => DEVANAGARI SIGN AVAGRAHA303};304305lookupOffset = 0;306byte[] bytePair = new byte[2];307308for (int i=0; i < (codeExtensionBytes.length)/2; i++ ) {309bytePair[0] = (byte) codeExtensionBytes[lookupOffset++];310bytePair[1] = (byte) codeExtensionBytes[lookupOffset++];311312String unicodeStr = new String (bytePair,"ISCII91");313if (unicodeStr.charAt(0) != expectNuktaSub[i]) {314throw new Exception("Failed Nukta Sub");315}316}317318lookupOffset = 0;319byte[] comboBytes = {320(byte)0xe8 , (byte)0xe8, //HALANT + HALANT321(byte)0xe8 , (byte)0xe9 //HALANT + NUKTA aka. Soft Halant322};323char[] expectCombChars = {324'\u094d',325'\u200c',326'\u094d',327'\u200d'328};329330for (int i=0; i < (comboBytes.length)/2; i++ ) {331bytePair[0] = (byte) comboBytes[lookupOffset++];332bytePair[1] = (byte) comboBytes[lookupOffset];333String unicodeStr = new String (bytePair, "ISCII91");334if (unicodeStr.charAt(0) != expectCombChars[lookupOffset-1]335&& unicodeStr.charAt(1) != expectCombChars[lookupOffset]) {336throw new Exception("Failed ISCII91 Regression Test");337}338lookupOffset++;339}340341} catch (UnsupportedEncodingException e) {342System.err.println ("ISCII91 encoding not supported");343throw new Exception ("Failed ISCII91 Regression Test");344}345}346347public static void main (String[] args) throws Exception {348test();349}350}351352353