Path: blob/master/src/java.base/share/classes/sun/nio/cs/DoubleByte.java
41159 views
/*1* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package sun.nio.cs;2627import java.nio.ByteBuffer;28import java.nio.CharBuffer;29import java.nio.charset.Charset;30import java.nio.charset.CharsetDecoder;31import java.nio.charset.CharsetEncoder;32import java.nio.charset.CoderResult;33import java.util.Arrays;3435import jdk.internal.access.JavaLangAccess;36import jdk.internal.access.SharedSecrets;37import sun.nio.cs.Surrogate;38import sun.nio.cs.ArrayDecoder;39import sun.nio.cs.ArrayEncoder;40import static sun.nio.cs.CharsetMapping.*;4142/*43* Four types of "DoubleByte" charsets are implemented in this class44* (1)DoubleByte45* The "mostly widely used" multibyte charset, a combination of46* a singlebyte character set (usually the ASCII charset) and a47* doublebyte character set. The codepoint values of singlebyte48* and doublebyte don't overlap. Microsoft's multibyte charsets49* and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943,50* 948, 949 and 950 are such charsets.51*52* (2)DoubleByte_EBCDIC53* IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch)54* in and out between the singlebyte character set and doublebyte55* character set.56*57* (3)DoubleByte_SIMPLE_EUC58* It's a "simple" form of EUC encoding scheme, only have the59* singlebyte character set G0 and one doublebyte character set60* G1 are defined, G2 (with SS2) and G3 (with SS3) are not used.61* So it is actually the same as the "typical" type (1) mentioned62* above, except it return "malformed" for the SS2 and SS3 when63* decoding.64*65* (4)DoubleByte ONLY66* A "pure" doublebyte only character set. From implementation67* point of view, this is the type (1) with "decodeSingle" always68* returns unmappable.69*70* For simplicity, all implementations share the same decoding and71* encoding data structure.72*73* Decoding:74*75* char[][] b2c;76* char[] b2cSB;77* int b2Min, b2Max78*79* public char decodeSingle(int b) {80* return b2cSB.[b];81* }82*83* public char decodeDouble(int b1, int b2) {84* if (b2 < b2Min || b2 > b2Max)85* return UNMAPPABLE_DECODING;86* return b2c[b1][b2 - b2Min];87* }88*89* (1)b2Min, b2Max are the corresponding min and max value of the90* low-half of the double-byte.91* (2)The high 8-bit/b1 of the double-byte are used to indexed into92* b2c array.93*94* Encoding:95*96* char[] c2b;97* char[] c2bIndex;98*99* public int encodeChar(char ch) {100* return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];101* }102*103*/104105public class DoubleByte {106107public static final char[] B2C_UNMAPPABLE;108static {109B2C_UNMAPPABLE = new char[0x100];110Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING);111}112113public static class Decoder extends CharsetDecoder114implements DelegatableDecoder, ArrayDecoder115{116private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();117118final char[][] b2c;119final char[] b2cSB;120final int b2Min;121final int b2Max;122final boolean isASCIICompatible;123124// for SimpleEUC override125protected CoderResult crMalformedOrUnderFlow(int b) {126return CoderResult.UNDERFLOW;127}128129protected CoderResult crMalformedOrUnmappable(int b1, int b2) {130if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte(b1)131b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte(b2)132decodeSingle(b2) != UNMAPPABLE_DECODING) { // isSingle(b2)133return CoderResult.malformedForLength(1);134}135return CoderResult.unmappableForLength(2);136}137138public Decoder(Charset cs, float avgcpb, float maxcpb,139char[][] b2c, char[] b2cSB,140int b2Min, int b2Max,141boolean isASCIICompatible) {142super(cs, avgcpb, maxcpb);143this.b2c = b2c;144this.b2cSB = b2cSB;145this.b2Min = b2Min;146this.b2Max = b2Max;147this.isASCIICompatible = isASCIICompatible;148}149150public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,151boolean isASCIICompatible) {152this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);153}154155public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {156this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, false);157}158159protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {160byte[] sa = src.array();161int soff = src.arrayOffset();162int sp = soff + src.position();163int sl = soff + src.limit();164165char[] da = dst.array();166int doff = dst.arrayOffset();167int dp = doff + dst.position();168int dl = doff + dst.limit();169170try {171if (isASCIICompatible) {172int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(dl - dp, sl - sp));173dp += n;174sp += n;175}176while (sp < sl && dp < dl) {177// inline the decodeSingle/Double() for better performance178int inSize = 1;179int b1 = sa[sp] & 0xff;180char c = b2cSB[b1];181if (c == UNMAPPABLE_DECODING) {182if (sl - sp < 2)183return crMalformedOrUnderFlow(b1);184int b2 = sa[sp + 1] & 0xff;185if (b2 < b2Min || b2 > b2Max ||186(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {187return crMalformedOrUnmappable(b1, b2);188}189inSize++;190}191da[dp++] = c;192sp += inSize;193}194return (sp >= sl) ? CoderResult.UNDERFLOW195: CoderResult.OVERFLOW;196} finally {197src.position(sp - soff);198dst.position(dp - doff);199}200}201202protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {203int mark = src.position();204try {205206while (src.hasRemaining() && dst.hasRemaining()) {207int b1 = src.get() & 0xff;208char c = b2cSB[b1];209int inSize = 1;210if (c == UNMAPPABLE_DECODING) {211if (src.remaining() < 1)212return crMalformedOrUnderFlow(b1);213int b2 = src.get() & 0xff;214if (b2 < b2Min || b2 > b2Max ||215(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING)216return crMalformedOrUnmappable(b1, b2);217inSize++;218}219dst.put(c);220mark += inSize;221}222return src.hasRemaining()? CoderResult.OVERFLOW223: CoderResult.UNDERFLOW;224} finally {225src.position(mark);226}227}228229// Make some protected methods public for use by JISAutoDetect230public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {231if (src.hasArray() && dst.hasArray())232return decodeArrayLoop(src, dst);233else234return decodeBufferLoop(src, dst);235}236237@Override238public int decode(byte[] src, int sp, int len, char[] dst) {239int dp = 0;240int sl = sp + len;241char repl = replacement().charAt(0);242while (sp < sl) {243int b1 = src[sp++] & 0xff;244char c = b2cSB[b1];245if (c == UNMAPPABLE_DECODING) {246if (sp < sl) {247int b2 = src[sp++] & 0xff;248if (b2 < b2Min || b2 > b2Max ||249(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {250if (crMalformedOrUnmappable(b1, b2).length() == 1) {251sp--;252}253}254}255if (c == UNMAPPABLE_DECODING) {256c = repl;257}258}259dst[dp++] = c;260}261return dp;262}263264@Override265public boolean isASCIICompatible() {266return isASCIICompatible;267}268269public void implReset() {270super.implReset();271}272273public CoderResult implFlush(CharBuffer out) {274return super.implFlush(out);275}276277// decode loops are not using decodeSingle/Double() for performance278// reason.279public char decodeSingle(int b) {280return b2cSB[b];281}282283public char decodeDouble(int b1, int b2) {284if (b1 < 0 || b1 > b2c.length ||285b2 < b2Min || b2 > b2Max)286return UNMAPPABLE_DECODING;287return b2c[b1][b2 - b2Min];288}289}290291// IBM_EBCDIC_DBCS292public static class Decoder_EBCDIC extends Decoder {293private static final int SBCS = 0;294private static final int DBCS = 1;295private static final int SO = 0x0e;296private static final int SI = 0x0f;297private int currentState;298299public Decoder_EBCDIC(Charset cs,300char[][] b2c, char[] b2cSB, int b2Min, int b2Max,301boolean isASCIICompatible) {302super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);303}304305public Decoder_EBCDIC(Charset cs,306char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {307super(cs, b2c, b2cSB, b2Min, b2Max, false);308}309310public void implReset() {311currentState = SBCS;312}313314// Check validity of dbcs ebcdic byte pair values315//316// First byte : 0x41 -- 0xFE317// Second byte: 0x41 -- 0xFE318// Doublebyte blank: 0x4040319//320// The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io321// as322// if ((b1 != 0x40 || b2 != 0x40) &&323// (b2 < 0x41 || b2 > 0xfe)) {...}324// is not correct/complete (range check for b1)325//326private static boolean isDoubleByte(int b1, int b2) {327return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe)328|| (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE329}330331protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {332byte[] sa = src.array();333int sp = src.arrayOffset() + src.position();334int sl = src.arrayOffset() + src.limit();335char[] da = dst.array();336int dp = dst.arrayOffset() + dst.position();337int dl = dst.arrayOffset() + dst.limit();338339try {340// don't check dp/dl together here, it's possible to341// decdoe a SO/SI without space in output buffer.342while (sp < sl) {343int b1 = sa[sp] & 0xff;344int inSize = 1;345if (b1 == SO) { // Shift out346if (currentState != SBCS)347return CoderResult.malformedForLength(1);348else349currentState = DBCS;350} else if (b1 == SI) {351if (currentState != DBCS)352return CoderResult.malformedForLength(1);353else354currentState = SBCS;355} else {356char c;357if (currentState == SBCS) {358c = b2cSB[b1];359if (c == UNMAPPABLE_DECODING)360return CoderResult.unmappableForLength(1);361} else {362if (sl - sp < 2)363return CoderResult.UNDERFLOW;364int b2 = sa[sp + 1] & 0xff;365if (b2 < b2Min || b2 > b2Max ||366(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {367if (!isDoubleByte(b1, b2))368return CoderResult.malformedForLength(2);369return CoderResult.unmappableForLength(2);370}371inSize++;372}373if (dl - dp < 1)374return CoderResult.OVERFLOW;375376da[dp++] = c;377}378sp += inSize;379}380return CoderResult.UNDERFLOW;381} finally {382src.position(sp - src.arrayOffset());383dst.position(dp - dst.arrayOffset());384}385}386387protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {388int mark = src.position();389try {390while (src.hasRemaining()) {391int b1 = src.get() & 0xff;392int inSize = 1;393if (b1 == SO) { // Shift out394if (currentState != SBCS)395return CoderResult.malformedForLength(1);396else397currentState = DBCS;398} else if (b1 == SI) {399if (currentState != DBCS)400return CoderResult.malformedForLength(1);401else402currentState = SBCS;403} else {404char c = UNMAPPABLE_DECODING;405if (currentState == SBCS) {406c = b2cSB[b1];407if (c == UNMAPPABLE_DECODING)408return CoderResult.unmappableForLength(1);409} else {410if (src.remaining() < 1)411return CoderResult.UNDERFLOW;412int b2 = src.get()&0xff;413if (b2 < b2Min || b2 > b2Max ||414(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {415if (!isDoubleByte(b1, b2))416return CoderResult.malformedForLength(2);417return CoderResult.unmappableForLength(2);418}419inSize++;420}421422if (dst.remaining() < 1)423return CoderResult.OVERFLOW;424425dst.put(c);426}427mark += inSize;428}429return CoderResult.UNDERFLOW;430} finally {431src.position(mark);432}433}434435@Override436public int decode(byte[] src, int sp, int len, char[] dst) {437int dp = 0;438int sl = sp + len;439currentState = SBCS;440char repl = replacement().charAt(0);441while (sp < sl) {442int b1 = src[sp++] & 0xff;443if (b1 == SO) { // Shift out444if (currentState != SBCS)445dst[dp++] = repl;446else447currentState = DBCS;448} else if (b1 == SI) {449if (currentState != DBCS)450dst[dp++] = repl;451else452currentState = SBCS;453} else {454char c = UNMAPPABLE_DECODING;455if (currentState == SBCS) {456c = b2cSB[b1];457if (c == UNMAPPABLE_DECODING)458c = repl;459} else {460if (sl == sp) {461c = repl;462} else {463int b2 = src[sp++] & 0xff;464if (b2 < b2Min || b2 > b2Max ||465(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {466c = repl;467}468}469}470dst[dp++] = c;471}472}473return dp;474}475}476477// DBCS_ONLY478public static class Decoder_DBCSONLY extends Decoder {479static final char[] b2cSB_UNMAPPABLE;480static {481b2cSB_UNMAPPABLE = new char[0x100];482Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING);483}484485// always returns unmappableForLenth(2) for doublebyte_only486@Override487protected CoderResult crMalformedOrUnmappable(int b1, int b2) {488return CoderResult.unmappableForLength(2);489}490491public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,492boolean isASCIICompatible) {493super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, isASCIICompatible);494}495496public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {497super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, false);498}499}500501// EUC_SIMPLE502// The only thing we need to "override" is to check SS2/SS3 and503// return "malformed" if found504public static class Decoder_EUC_SIM extends Decoder {505private final int SS2 = 0x8E;506private final int SS3 = 0x8F;507508public Decoder_EUC_SIM(Charset cs,509char[][] b2c, char[] b2cSB, int b2Min, int b2Max,510boolean isASCIICompatible) {511super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);512}513514// No support provided for G2/G3 for SimpleEUC515protected CoderResult crMalformedOrUnderFlow(int b) {516if (b == SS2 || b == SS3 )517return CoderResult.malformedForLength(1);518return CoderResult.UNDERFLOW;519}520521protected CoderResult crMalformedOrUnmappable(int b1, int b2) {522if (b1 == SS2 || b1 == SS3 )523return CoderResult.malformedForLength(1);524return CoderResult.unmappableForLength(2);525}526527@Override528public int decode(byte[] src, int sp, int len, char[] dst) {529int dp = 0;530int sl = sp + len;531char repl = replacement().charAt(0);532while (sp < sl) {533int b1 = src[sp++] & 0xff;534char c = b2cSB[b1];535if (c == UNMAPPABLE_DECODING) {536if (sp < sl) {537int b2 = src[sp++] & 0xff;538if (b2 < b2Min || b2 > b2Max ||539(c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {540if (b1 == SS2 || b1 == SS3) {541sp--;542}543c = repl;544}545} else {546c = repl;547}548}549dst[dp++] = c;550}551return dp;552}553}554555public static class Encoder extends CharsetEncoder556implements ArrayEncoder557{558protected final int MAX_SINGLEBYTE = 0xff;559private final char[] c2b;560private final char[] c2bIndex;561protected Surrogate.Parser sgp;562final boolean isASCIICompatible;563564public Encoder(Charset cs, char[] c2b, char[] c2bIndex) {565this(cs, c2b, c2bIndex, false);566}567568public Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible) {569super(cs, 2.0f, 2.0f);570this.c2b = c2b;571this.c2bIndex = c2bIndex;572this.isASCIICompatible = isASCIICompatible;573}574575public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex,576boolean isASCIICompatible) {577super(cs, avg, max, repl);578this.c2b = c2b;579this.c2bIndex = c2bIndex;580this.isASCIICompatible = isASCIICompatible;581}582583public boolean canEncode(char c) {584return encodeChar(c) != UNMAPPABLE_ENCODING;585}586587protected Surrogate.Parser sgp() {588if (sgp == null)589sgp = new Surrogate.Parser();590return sgp;591}592593protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {594char[] sa = src.array();595int sp = src.arrayOffset() + src.position();596int sl = src.arrayOffset() + src.limit();597598byte[] da = dst.array();599int dp = dst.arrayOffset() + dst.position();600int dl = dst.arrayOffset() + dst.limit();601602try {603while (sp < sl) {604char c = sa[sp];605int bb = encodeChar(c);606if (bb == UNMAPPABLE_ENCODING) {607if (Character.isSurrogate(c)) {608if (sgp().parse(c, sa, sp, sl) < 0)609return sgp.error();610return sgp.unmappableResult();611}612return CoderResult.unmappableForLength(1);613}614615if (bb > MAX_SINGLEBYTE) { // DoubleByte616if (dl - dp < 2)617return CoderResult.OVERFLOW;618da[dp++] = (byte)(bb >> 8);619da[dp++] = (byte)bb;620} else { // SingleByte621if (dl - dp < 1)622return CoderResult.OVERFLOW;623da[dp++] = (byte)bb;624}625626sp++;627}628return CoderResult.UNDERFLOW;629} finally {630src.position(sp - src.arrayOffset());631dst.position(dp - dst.arrayOffset());632}633}634635protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {636int mark = src.position();637try {638while (src.hasRemaining()) {639char c = src.get();640int bb = encodeChar(c);641if (bb == UNMAPPABLE_ENCODING) {642if (Character.isSurrogate(c)) {643if (sgp().parse(c, src) < 0)644return sgp.error();645return sgp.unmappableResult();646}647return CoderResult.unmappableForLength(1);648}649if (bb > MAX_SINGLEBYTE) { // DoubleByte650if (dst.remaining() < 2)651return CoderResult.OVERFLOW;652dst.put((byte)(bb >> 8));653dst.put((byte)(bb));654} else {655if (dst.remaining() < 1)656return CoderResult.OVERFLOW;657dst.put((byte)bb);658}659mark++;660}661return CoderResult.UNDERFLOW;662} finally {663src.position(mark);664}665}666667protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) {668if (src.hasArray() && dst.hasArray())669return encodeArrayLoop(src, dst);670else671return encodeBufferLoop(src, dst);672}673674protected byte[] repl = replacement();675protected void implReplaceWith(byte[] newReplacement) {676repl = newReplacement;677}678679@Override680public int encode(char[] src, int sp, int len, byte[] dst) {681int dp = 0;682int sl = sp + len;683int dl = dst.length;684while (sp < sl) {685char c = src[sp++];686int bb = encodeChar(c);687if (bb == UNMAPPABLE_ENCODING) {688if (Character.isHighSurrogate(c) && sp < sl &&689Character.isLowSurrogate(src[sp])) {690sp++;691}692dst[dp++] = repl[0];693if (repl.length > 1)694dst[dp++] = repl[1];695continue;696} //else697if (bb > MAX_SINGLEBYTE) { // DoubleByte698dst[dp++] = (byte)(bb >> 8);699dst[dp++] = (byte)bb;700} else { // SingleByte701dst[dp++] = (byte)bb;702}703}704return dp;705}706707@Override708public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {709int dp = 0;710int sl = sp + len;711while (sp < sl) {712char c = (char)(src[sp++] & 0xff);713int bb = encodeChar(c);714if (bb == UNMAPPABLE_ENCODING) {715// no surrogate pair in latin1 string716dst[dp++] = repl[0];717if (repl.length > 1) {718dst[dp++] = repl[1];719}720continue;721} //else722if (bb > MAX_SINGLEBYTE) { // DoubleByte723dst[dp++] = (byte)(bb >> 8);724dst[dp++] = (byte)bb;725} else { // SingleByte726dst[dp++] = (byte)bb;727}728729}730return dp;731}732733@Override734public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {735int dp = 0;736int sl = sp + len;737while (sp < sl) {738char c = StringUTF16.getChar(src, sp++);739int bb = encodeChar(c);740if (bb == UNMAPPABLE_ENCODING) {741if (Character.isHighSurrogate(c) && sp < sl &&742Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {743sp++;744}745dst[dp++] = repl[0];746if (repl.length > 1) {747dst[dp++] = repl[1];748}749continue;750} //else751if (bb > MAX_SINGLEBYTE) { // DoubleByte752dst[dp++] = (byte)(bb >> 8);753dst[dp++] = (byte)bb;754} else { // SingleByte755dst[dp++] = (byte)bb;756}757}758return dp;759}760761@Override762public boolean isASCIICompatible() {763return isASCIICompatible;764}765766public int encodeChar(char ch) {767return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];768}769770// init the c2b and c2bIndex tables from b2c.771public static void initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR,772int b2Min, int b2Max,773char[] c2b, char[] c2bIndex)774{775Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);776int off = 0x100;777778char[][] b2c_ca = new char[b2c.length][];779char[] b2cSB_ca = null;780if (b2cSB != null)781b2cSB_ca = b2cSB.toCharArray();782783for (int i = 0; i < b2c.length; i++) {784if (b2c[i] == null)785continue;786b2c_ca[i] = b2c[i].toCharArray();787}788789if (b2cNR != null) {790int j = 0;791while (j < b2cNR.length()) {792char b = b2cNR.charAt(j++);793char c = b2cNR.charAt(j++);794if (b < 0x100 && b2cSB_ca != null) {795if (b2cSB_ca[b] == c)796b2cSB_ca[b] = UNMAPPABLE_DECODING;797} else {798if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c)799b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING;800}801}802}803804if (b2cSB_ca != null) { // SingleByte805for (int b = 0; b < b2cSB_ca.length; b++) {806char c = b2cSB_ca[b];807if (c == UNMAPPABLE_DECODING)808continue;809int index = c2bIndex[c >> 8];810if (index == 0) {811index = off;812off += 0x100;813c2bIndex[c >> 8] = (char)index;814}815c2b[index + (c & 0xff)] = (char)b;816}817}818819for (int b1 = 0; b1 < b2c.length; b1++) { // DoubleByte820char[] db = b2c_ca[b1];821if (db == null)822continue;823for (int b2 = b2Min; b2 <= b2Max; b2++) {824char c = db[b2 - b2Min];825if (c == UNMAPPABLE_DECODING)826continue;827int index = c2bIndex[c >> 8];828if (index == 0) {829index = off;830off += 0x100;831c2bIndex[c >> 8] = (char)index;832}833c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2);834}835}836837if (c2bNR != null) {838// add c->b only nr entries839for (int i = 0; i < c2bNR.length(); i += 2) {840char b = c2bNR.charAt(i);841char c = c2bNR.charAt(i + 1);842int index = (c >> 8);843if (c2bIndex[index] == 0) {844c2bIndex[index] = (char)off;845off += 0x100;846}847index = c2bIndex[index] + (c & 0xff);848c2b[index] = b;849}850}851}852}853854public static class Encoder_DBCSONLY extends Encoder {855856public Encoder_DBCSONLY(Charset cs, byte[] repl,857char[] c2b, char[] c2bIndex,858boolean isASCIICompatible) {859super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex, isASCIICompatible);860}861862public int encodeChar(char ch) {863int bb = super.encodeChar(ch);864if (bb <= MAX_SINGLEBYTE)865return UNMAPPABLE_ENCODING;866return bb;867}868}869870public static class Encoder_EBCDIC extends Encoder {871static final int SBCS = 0;872static final int DBCS = 1;873static final byte SO = 0x0e;874static final byte SI = 0x0f;875876protected int currentState = SBCS;877878public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex,879boolean isASCIICompatible) {880super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex, isASCIICompatible);881}882883protected void implReset() {884currentState = SBCS;885}886887protected CoderResult implFlush(ByteBuffer out) {888if (currentState == DBCS) {889if (out.remaining() < 1)890return CoderResult.OVERFLOW;891out.put(SI);892}893implReset();894return CoderResult.UNDERFLOW;895}896897protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {898char[] sa = src.array();899int sp = src.arrayOffset() + src.position();900int sl = src.arrayOffset() + src.limit();901byte[] da = dst.array();902int dp = dst.arrayOffset() + dst.position();903int dl = dst.arrayOffset() + dst.limit();904905try {906while (sp < sl) {907char c = sa[sp];908int bb = encodeChar(c);909if (bb == UNMAPPABLE_ENCODING) {910if (Character.isSurrogate(c)) {911if (sgp().parse(c, sa, sp, sl) < 0)912return sgp.error();913return sgp.unmappableResult();914}915return CoderResult.unmappableForLength(1);916}917if (bb > MAX_SINGLEBYTE) { // DoubleByte918if (currentState == SBCS) {919if (dl - dp < 1)920return CoderResult.OVERFLOW;921currentState = DBCS;922da[dp++] = SO;923}924if (dl - dp < 2)925return CoderResult.OVERFLOW;926da[dp++] = (byte)(bb >> 8);927da[dp++] = (byte)bb;928} else { // SingleByte929if (currentState == DBCS) {930if (dl - dp < 1)931return CoderResult.OVERFLOW;932currentState = SBCS;933da[dp++] = SI;934}935if (dl - dp < 1)936return CoderResult.OVERFLOW;937da[dp++] = (byte)bb;938939}940sp++;941}942return CoderResult.UNDERFLOW;943} finally {944src.position(sp - src.arrayOffset());945dst.position(dp - dst.arrayOffset());946}947}948949protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {950int mark = src.position();951try {952while (src.hasRemaining()) {953char c = src.get();954int bb = encodeChar(c);955if (bb == UNMAPPABLE_ENCODING) {956if (Character.isSurrogate(c)) {957if (sgp().parse(c, src) < 0)958return sgp.error();959return sgp.unmappableResult();960}961return CoderResult.unmappableForLength(1);962}963if (bb > MAX_SINGLEBYTE) { // DoubleByte964if (currentState == SBCS) {965if (dst.remaining() < 1)966return CoderResult.OVERFLOW;967currentState = DBCS;968dst.put(SO);969}970if (dst.remaining() < 2)971return CoderResult.OVERFLOW;972dst.put((byte)(bb >> 8));973dst.put((byte)(bb));974} else { // Single-byte975if (currentState == DBCS) {976if (dst.remaining() < 1)977return CoderResult.OVERFLOW;978currentState = SBCS;979dst.put(SI);980}981if (dst.remaining() < 1)982return CoderResult.OVERFLOW;983dst.put((byte)bb);984}985mark++;986}987return CoderResult.UNDERFLOW;988} finally {989src.position(mark);990}991}992993@Override994public int encode(char[] src, int sp, int len, byte[] dst) {995int dp = 0;996int sl = sp + len;997while (sp < sl) {998char c = src[sp++];999int bb = encodeChar(c);10001001if (bb == UNMAPPABLE_ENCODING) {1002if (Character.isHighSurrogate(c) && sp < sl &&1003Character.isLowSurrogate(src[sp])) {1004sp++;1005}1006dst[dp++] = repl[0];1007if (repl.length > 1)1008dst[dp++] = repl[1];1009continue;1010} //else1011if (bb > MAX_SINGLEBYTE) { // DoubleByte1012if (currentState == SBCS) {1013currentState = DBCS;1014dst[dp++] = SO;1015}1016dst[dp++] = (byte)(bb >> 8);1017dst[dp++] = (byte)bb;1018} else { // SingleByte1019if (currentState == DBCS) {1020currentState = SBCS;1021dst[dp++] = SI;1022}1023dst[dp++] = (byte)bb;1024}1025}10261027if (currentState == DBCS) {1028currentState = SBCS;1029dst[dp++] = SI;1030}1031return dp;1032}10331034@Override1035public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {1036int dp = 0;1037int sl = sp + len;1038while (sp < sl) {1039char c = (char)(src[sp++] & 0xff);1040int bb = encodeChar(c);1041if (bb == UNMAPPABLE_ENCODING) {1042// no surrogate pair in latin1 string1043dst[dp++] = repl[0];1044if (repl.length > 1)1045dst[dp++] = repl[1];1046continue;1047} //else1048if (bb > MAX_SINGLEBYTE) { // DoubleByte1049if (currentState == SBCS) {1050currentState = DBCS;1051dst[dp++] = SO;1052}1053dst[dp++] = (byte)(bb >> 8);1054dst[dp++] = (byte)bb;1055} else { // SingleByte1056if (currentState == DBCS) {1057currentState = SBCS;1058dst[dp++] = SI;1059}1060dst[dp++] = (byte)bb;1061}1062}1063if (currentState == DBCS) {1064currentState = SBCS;1065dst[dp++] = SI;1066}1067return dp;1068}10691070@Override1071public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {1072int dp = 0;1073int sl = sp + len;1074while (sp < sl) {1075char c = StringUTF16.getChar(src, sp++);1076int bb = encodeChar(c);1077if (bb == UNMAPPABLE_ENCODING) {1078if (Character.isHighSurrogate(c) && sp < sl &&1079Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {1080sp++;1081}1082dst[dp++] = repl[0];1083if (repl.length > 1)1084dst[dp++] = repl[1];1085continue;1086} //else1087if (bb > MAX_SINGLEBYTE) { // DoubleByte1088if (currentState == SBCS) {1089currentState = DBCS;1090dst[dp++] = SO;1091}1092dst[dp++] = (byte)(bb >> 8);1093dst[dp++] = (byte)bb;1094} else { // SingleByte1095if (currentState == DBCS) {1096currentState = SBCS;1097dst[dp++] = SI;1098}1099dst[dp++] = (byte)bb;1100}1101}1102if (currentState == DBCS) {1103currentState = SBCS;1104dst[dp++] = SI;1105}1106return dp;1107}1108}11091110// EUC_SIMPLE1111public static class Encoder_EUC_SIM extends Encoder {1112public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex,1113boolean isASCIICompatible) {1114super(cs, c2b, c2bIndex, isASCIICompatible);1115}1116}11171118}111911201121