Path: blob/master/src/java.base/share/classes/sun/nio/cs/CharsetMapping.java
41159 views
/*1* Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package sun.nio.cs;2627import java.io.InputStream;28import java.io.InputStreamReader;29import java.io.OutputStream;30import java.io.BufferedReader;31import java.io.IOException;32import java.util.regex.Matcher;33import java.util.regex.Pattern;34import java.util.*;35import java.security.*;3637public class CharsetMapping {38public static final char UNMAPPABLE_DECODING = '\uFFFD';39public static final int UNMAPPABLE_ENCODING = 0xFFFD;4041char[] b2cSB; //singlebyte b->c42char[] b2cDB1; //dobulebyte b->c /db143char[] b2cDB2; //dobulebyte b->c /db24445int b2Min, b2Max; //min/max(start/end) value of 2nd byte46int b1MinDB1, b1MaxDB1; //min/Max(start/end) value of 1st byte/db147int b1MinDB2, b1MaxDB2; //min/Max(start/end) value of 1st byte/db248int dbSegSize;4950char[] c2b;51char[] c2bIndex;5253// Supplementary54char[] b2cSupp;55char[] c2bSupp;5657// Composite58Entry[] b2cComp;59Entry[] c2bComp;6061public char decodeSingle(int b) {62return b2cSB[b];63}6465public char decodeDouble(int b1, int b2) {66if (b2 >= b2Min && b2 < b2Max) {67b2 -= b2Min;68if (b1 >= b1MinDB1 && b1 <= b1MaxDB1) {69b1 -= b1MinDB1;70return b2cDB1[b1 * dbSegSize + b2];71}72if (b1 >= b1MinDB2 && b1 <= b1MaxDB2) {73b1 -= b1MinDB2;74return b2cDB2[b1 * dbSegSize + b2];75}76}77return UNMAPPABLE_DECODING;78}7980// for jis0213 all supplementary characters are in 0x2xxxx range,81// so only the xxxx part is now stored, should actually store the82// codepoint value instead.83public char[] decodeSurrogate(int db, char[] cc) {84int end = b2cSupp.length / 2;85int i = Arrays.binarySearch(b2cSupp, 0, end, (char)db);86if (i >= 0) {87Character.toChars(b2cSupp[end + i] + 0x20000, cc, 0);88return cc;89}90return null;91}9293public char[] decodeComposite(Entry comp, char[] cc) {94int i = findBytes(b2cComp, comp);95if (i >= 0) {96cc[0] = (char)b2cComp[i].cp;97cc[1] = (char)b2cComp[i].cp2;98return cc;99}100return null;101}102103public int encodeChar(char ch) {104int index = c2bIndex[ch >> 8];105if (index == 0xffff)106return UNMAPPABLE_ENCODING;107return c2b[index + (ch & 0xff)];108}109110public int encodeSurrogate(char hi, char lo) {111int cp = Character.toCodePoint(hi, lo);112if (cp < 0x20000 || cp >= 0x30000)113return UNMAPPABLE_ENCODING;114int end = c2bSupp.length / 2;115int i = Arrays.binarySearch(c2bSupp, 0, end, (char)cp);116if (i >= 0)117return c2bSupp[end + i];118return UNMAPPABLE_ENCODING;119}120121public boolean isCompositeBase(Entry comp) {122if (comp.cp <= 0x31f7 && comp.cp >= 0xe6) {123return (findCP(c2bComp, comp) >= 0);124}125return false;126}127128public int encodeComposite(Entry comp) {129int i = findComp(c2bComp, comp);130if (i >= 0)131return c2bComp[i].bs;132return UNMAPPABLE_ENCODING;133}134135// init the CharsetMapping object from the .dat binary file136@SuppressWarnings("removal")137public static CharsetMapping get(final InputStream is) {138return AccessController.doPrivileged(new PrivilegedAction<>() {139public CharsetMapping run() {140return new CharsetMapping().load(is);141}142});143}144145public static class Entry {146public int bs; //byte sequence reps147public int cp; //Unicode codepoint148public int cp2; //CC of composite149}150151static Comparator<Entry> comparatorBytes =152new Comparator<Entry>() {153public int compare(Entry m1, Entry m2) {154return m1.bs - m2.bs;155}156public boolean equals(Object obj) {157return this == obj;158}159};160161static Comparator<Entry> comparatorCP =162new Comparator<Entry>() {163public int compare(Entry m1, Entry m2) {164return m1.cp - m2.cp;165}166public boolean equals(Object obj) {167return this == obj;168}169};170171static Comparator<Entry> comparatorComp =172new Comparator<Entry>() {173public int compare(Entry m1, Entry m2) {174int v = m1.cp - m2.cp;175if (v == 0)176v = m1.cp2 - m2.cp2;177return v;178}179public boolean equals(Object obj) {180return this == obj;181}182};183184static int findBytes(Entry[] a, Entry k) {185return Arrays.binarySearch(a, 0, a.length, k, comparatorBytes);186}187188static int findCP(Entry[] a, Entry k) {189return Arrays.binarySearch(a, 0, a.length, k, comparatorCP);190}191192static int findComp(Entry[] a, Entry k) {193return Arrays.binarySearch(a, 0, a.length, k, comparatorComp);194}195196/*****************************************************************************/197// tags of different charset mapping tables198private static final int MAP_SINGLEBYTE = 0x1; // 0..256 : c199private static final int MAP_DOUBLEBYTE1 = 0x2; // min..max: c200private static final int MAP_DOUBLEBYTE2 = 0x3; // min..max: c [DB2]201private static final int MAP_SUPPLEMENT = 0x5; // db,c202private static final int MAP_SUPPLEMENT_C2B = 0x6; // c,db203private static final int MAP_COMPOSITE = 0x7; // db,base,cc204private static final int MAP_INDEXC2B = 0x8; // index table of c->bb205206private static final boolean readNBytes(InputStream in, byte[] bb, int N)207throws IOException208{209int off = 0;210while (N > 0) {211int n = in.read(bb, off, N);212if (n == -1)213return false;214N = N - n;215off += n;216}217return true;218}219220int off = 0;221byte[] bb;222private char[] readCharArray() {223// first 2 bytes are the number of "chars" stored in this table224int size = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);225char [] cc = new char[size];226for (int i = 0; i < size; i++) {227cc[i] = (char)(((bb[off++]&0xff)<<8) | (bb[off++]&0xff));228}229return cc;230}231232void readSINGLEBYTE() {233char[] map = readCharArray();234for (int i = 0; i < map.length; i++) {235char c = map[i];236if (c != UNMAPPABLE_DECODING) {237c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)i;238}239}240b2cSB = map;241}242243void readINDEXC2B() {244char[] map = readCharArray();245for (int i = map.length - 1; i >= 0; i--) {246if (c2b == null && map[i] != -1) {247c2b = new char[map[i] + 256];248Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);249break;250}251}252c2bIndex = map;253}254255char[] readDB(int b1Min, int b2Min, int segSize) {256char[] map = readCharArray();257for (int i = 0; i < map.length; i++) {258char c = map[i];259if (c != UNMAPPABLE_DECODING) {260int b1 = i / segSize;261int b2 = i % segSize;262int b = (b1 + b1Min)* 256 + (b2 + b2Min);263//System.out.printf(" DB %x\t%x%n", b, c & 0xffff);264c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)(b);265}266}267return map;268}269270void readDOUBLEBYTE1() {271b1MinDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);272b1MaxDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);273b2Min = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);274b2Max = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);275dbSegSize = b2Max - b2Min + 1;276b2cDB1 = readDB(b1MinDB1, b2Min, dbSegSize);277}278279void readDOUBLEBYTE2() {280b1MinDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);281b1MaxDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);282b2Min = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);283b2Max = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);284dbSegSize = b2Max - b2Min + 1;285b2cDB2 = readDB(b1MinDB2, b2Min, dbSegSize);286}287288void readCOMPOSITE() {289char[] map = readCharArray();290int mLen = map.length/3;291b2cComp = new Entry[mLen];292c2bComp = new Entry[mLen];293for (int i = 0, j= 0; i < mLen; i++) {294Entry m = new Entry();295m.bs = map[j++];296m.cp = map[j++];297m.cp2 = map[j++];298b2cComp[i] = m;299c2bComp[i] = m;300}301Arrays.sort(c2bComp, 0, c2bComp.length, comparatorComp);302}303304CharsetMapping load(InputStream in) {305try {306// The first 4 bytes are the size of the total data followed in307// this .dat file.308int len = ((in.read()&0xff) << 24) | ((in.read()&0xff) << 16) |309((in.read()&0xff) << 8) | (in.read()&0xff);310bb = new byte[len];311off = 0;312//System.out.printf("In : Total=%d%n", len);313// Read in all bytes314if (!readNBytes(in, bb, len))315throw new RuntimeException("Corrupted data file");316in.close();317318while (off < len) {319int type = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);320switch(type) {321case MAP_INDEXC2B:322readINDEXC2B();323break;324case MAP_SINGLEBYTE:325readSINGLEBYTE();326break;327case MAP_DOUBLEBYTE1:328readDOUBLEBYTE1();329break;330case MAP_DOUBLEBYTE2:331readDOUBLEBYTE2();332break;333case MAP_SUPPLEMENT:334b2cSupp = readCharArray();335break;336case MAP_SUPPLEMENT_C2B:337c2bSupp = readCharArray();338break;339case MAP_COMPOSITE:340readCOMPOSITE();341break;342default:343throw new RuntimeException("Corrupted data file");344}345}346bb = null;347return this;348} catch (IOException x) {349x.printStackTrace();350return null;351}352}353}354355356