Path: blob/master/src/java.desktop/share/classes/sun/font/CMap.java
41155 views
/*1* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package sun.font;2627import java.nio.ByteBuffer;28import java.nio.CharBuffer;29import java.nio.IntBuffer;30import java.util.Locale;31import java.nio.charset.*;3233/*34* A tt font has a CMAP table which is in turn made up of sub-tables which35* describe the char to glyph mapping in (possibly) multiple ways.36* CMAP subtables are described by 3 values.37* 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK)38* 2. Encoding (eg 0=symbol, 1=unicode)39* 3. TrueType subtable format (how the char->glyph mapping for the encoding40* is stored in the subtable). See the TrueType spec. Format 4 is required41* by MS in fonts for windows. Its uses segmented mapping to delta values.42* Most typically we see are (3,1,4) :43* CMAP Platform ID=3 is what we use.44* Encodings that are used in practice by JDK on Solaris are45* symbol (3,0)46* unicode (3,1)47* GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5)48* The format for almost all subtables is 4. However the solaris (3,5)49* encodings are typically in format 2.50*/51abstract class CMap {5253// static char WingDings_b2c[] = {54// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,55// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,56// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,57// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,58// 0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd,59// 0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,60// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,61// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d,62// 0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,63// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,64// 0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd,65// 0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,66// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,67// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,68// 0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd,69// 0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd,70// 0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,71// 0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d,72// 0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd,73// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,74// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd,75// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735,76// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd,77// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,78// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd,79// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,80// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,81// 0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd,82// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,83// 0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,84// 0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,85// 0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd,86// };8788// static char Symbols_b2c[] = {89// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,90// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,91// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,92// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,93// 0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d,94// 0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd,95// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,96// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,97// 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,98// 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,99// 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,100// 0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd,101// 0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,102// 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,103// 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,104// 0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd,105// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,106// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,107// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,108// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,109// 0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd,110// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,111// 0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219,112// 0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd,113// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229,114// 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,115// 0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5,116// 0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,117// 0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd,118// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,119// 0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,120// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,121// };122123static final short ShiftJISEncoding = 2;124static final short GBKEncoding = 3;125static final short Big5Encoding = 4;126static final short WansungEncoding = 5;127static final short JohabEncoding = 6;128static final short MSUnicodeSurrogateEncoding = 10;129130static final char noSuchChar = (char)0xfffd;131static final int SHORTMASK = 0x0000ffff;132static final int INTMASK = 0x7fffffff;133134static final char[][] converterMaps = new char[7][];135136/*137* Unicode->other encoding translation array. A pre-computed look up138* which can be shared across all fonts using that encoding.139* Using this saves running character coverters repeatedly.140*/141char[] xlat;142UVS uvs = null;143144static CMap initialize(TrueTypeFont font) {145146CMap cmap = null;147148int offset, platformID, encodingID=-1;149150int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0,151three6=0, three10=0;152int zero5 = 0; // for Unicode Variation Sequences153boolean threeStar = false;154155ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag);156int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag);157short numberSubTables = cmapBuffer.getShort(2);158159/* locate the offsets of all 3,* (ie Microsoft platform) encodings */160for (int i=0; i<numberSubTables; i++) {161cmapBuffer.position(i * 8 + 4);162platformID = cmapBuffer.getShort();163if (platformID == 3) {164threeStar = true;165encodingID = cmapBuffer.getShort();166offset = cmapBuffer.getInt();167switch (encodingID) {168case 0: three0 = offset; break; // MS Symbol encoding169case 1: three1 = offset; break; // MS Unicode cmap170case 2: three2 = offset; break; // ShiftJIS cmap.171case 3: three3 = offset; break; // GBK cmap172case 4: three4 = offset; break; // Big 5 cmap173case 5: three5 = offset; break; // Wansung174case 6: three6 = offset; break; // Johab175case 10: three10 = offset; break; // MS Unicode surrogates176}177} else if (platformID == 0) {178encodingID = cmapBuffer.getShort();179offset = cmapBuffer.getInt();180if (encodingID == 5) {181zero5 = offset;182}183}184}185186/* This defines the preference order for cmap subtables */187if (threeStar) {188if (three10 != 0) {189cmap = createCMap(cmapBuffer, three10, null);190}191else if (three0 != 0) {192/* The special case treatment of these fonts leads to193* anomalies where a user can view "wingdings" and "wingdings2"194* and the latter shows all its code points in the unicode195* private use area at 0xF000->0XF0FF and the former shows196* a scattered subset of its glyphs that are known mappings to197* unicode code points.198* The primary purpose of these mappings was to facilitate199* display of symbol chars etc in composite fonts, however200* this is not needed as all these code points are covered201* by some other platform symbol font.202* Commenting this out reduces the role of these two files203* (assuming that they continue to be used in font.properties)204* to just one of contributing to the overall composite205* font metrics, and also AWT can still access the fonts.206* Clients which explicitly accessed these fonts as names207* "Symbol" and "Wingdings" (ie as physical fonts) and208* expected to see a scattering of these characters will209* see them now as missing. How much of a problem is this?210* Perhaps we could still support this mapping just for211* "Symbol.ttf" but I suspect some users would prefer it212* to be mapped in to the Latin range as that is how213* the "symbol" font is used in native apps.214*/215// String name = font.platName.toLowerCase(Locale.ENGLISH);216// if (name.endsWith("symbol.ttf")) {217// cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c);218// } else if (name.endsWith("wingding.ttf")) {219// cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c);220// } else {221cmap = createCMap(cmapBuffer, three0, null);222// }223}224else if (three1 != 0) {225cmap = createCMap(cmapBuffer, three1, null);226}227else if (three2 != 0) {228cmap = createCMap(cmapBuffer, three2,229getConverterMap(ShiftJISEncoding));230}231else if (three3 != 0) {232cmap = createCMap(cmapBuffer, three3,233getConverterMap(GBKEncoding));234}235else if (three4 != 0) {236cmap = createCMap(cmapBuffer, three4,237getConverterMap(Big5Encoding));238}239else if (three5 != 0) {240cmap = createCMap(cmapBuffer, three5,241getConverterMap(WansungEncoding));242}243else if (three6 != 0) {244cmap = createCMap(cmapBuffer, three6,245getConverterMap(JohabEncoding));246}247} else {248/* No 3,* subtable was found. Just use whatever is the first249* table listed. Not very useful but maybe better than250* rejecting the font entirely?251*/252cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null);253}254// For Unicode Variation Sequences255if (cmap != null && zero5 != 0) {256cmap.createUVS(cmapBuffer, zero5);257}258return cmap;259}260261/* speed up the converting by setting the range for double262* byte characters;263*/264static char[] getConverter(short encodingID) {265int dBegin = 0x8000;266int dEnd = 0xffff;267String encoding;268269switch (encodingID) {270case ShiftJISEncoding:271dBegin = 0x8140;272dEnd = 0xfcfc;273encoding = "SJIS";274break;275case GBKEncoding:276dBegin = 0x8140;277dEnd = 0xfea0;278encoding = "GBK";279break;280case Big5Encoding:281dBegin = 0xa140;282dEnd = 0xfefe;283encoding = "Big5";284break;285case WansungEncoding:286dBegin = 0xa1a1;287dEnd = 0xfede;288encoding = "EUC_KR";289break;290case JohabEncoding:291dBegin = 0x8141;292dEnd = 0xfdfe;293encoding = "Johab";294break;295default:296return null;297}298299try {300char[] convertedChars = new char[65536];301for (int i=0; i<65536; i++) {302convertedChars[i] = noSuchChar;303}304305byte[] inputBytes = new byte[(dEnd-dBegin+1)*2];306char[] outputChars = new char[(dEnd-dBegin+1)];307308int j = 0;309int firstByte;310if (encodingID == ShiftJISEncoding) {311for (int i = dBegin; i <= dEnd; i++) {312firstByte = (i >> 8 & 0xff);313if (firstByte >= 0xa1 && firstByte <= 0xdf) {314//sjis halfwidth katakana315inputBytes[j++] = (byte)0xff;316inputBytes[j++] = (byte)0xff;317} else {318inputBytes[j++] = (byte)firstByte;319inputBytes[j++] = (byte)(i & 0xff);320}321}322} else {323for (int i = dBegin; i <= dEnd; i++) {324inputBytes[j++] = (byte)(i>>8 & 0xff);325inputBytes[j++] = (byte)(i & 0xff);326}327}328329Charset.forName(encoding).newDecoder()330.onMalformedInput(CodingErrorAction.REPLACE)331.onUnmappableCharacter(CodingErrorAction.REPLACE)332.replaceWith("\u0000")333.decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length),334CharBuffer.wrap(outputChars, 0, outputChars.length),335true);336337// ensure single byte ascii338for (int i = 0x20; i <= 0x7e; i++) {339convertedChars[i] = (char)i;340}341342//sjis halfwidth katakana343if (encodingID == ShiftJISEncoding) {344for (int i = 0xa1; i <= 0xdf; i++) {345convertedChars[i] = (char)(i - 0xa1 + 0xff61);346}347}348349/* It would save heap space (approx 60Kbytes for each of these350* converters) if stored only valid ranges (ie returned351* outputChars directly. But this is tricky since want to352* include the ASCII range too.353*/354// System.err.println("oc.len="+outputChars.length);355// System.err.println("cc.len="+convertedChars.length);356// System.err.println("dbegin="+dBegin);357System.arraycopy(outputChars, 0, convertedChars, dBegin,358outputChars.length);359360//return convertedChars;361/* invert this map as now want it to map from Unicode362* to other encoding.363*/364char [] invertedChars = new char[65536];365for (int i=0;i<65536;i++) {366if (convertedChars[i] != noSuchChar) {367invertedChars[convertedChars[i]] = (char)i;368}369}370return invertedChars;371372} catch (Exception e) {373e.printStackTrace();374}375return null;376}377378/*379* The returned array maps to unicode from some other 2 byte encoding380* eg for a 2byte index which represents a SJIS char, the indexed381* value is the corresponding unicode char.382*/383static char[] getConverterMap(short encodingID) {384if (converterMaps[encodingID] == null) {385converterMaps[encodingID] = getConverter(encodingID);386}387return converterMaps[encodingID];388}389390391static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) {392/* First do a sanity check that this cmap subtable is contained393* within the cmap table.394*/395int subtableFormat = buffer.getChar(offset);396long subtableLength;397if (subtableFormat < 8) {398subtableLength = buffer.getChar(offset+2);399} else {400subtableLength = buffer.getInt(offset+4) & INTMASK;401}402if (FontUtilities.isLogging() && offset + subtableLength > buffer.capacity()) {403FontUtilities.logWarning("Cmap subtable overflows buffer.");404}405switch (subtableFormat) {406case 0: return new CMapFormat0(buffer, offset);407case 2: return new CMapFormat2(buffer, offset, xlat);408case 4: return new CMapFormat4(buffer, offset, xlat);409case 6: return new CMapFormat6(buffer, offset, xlat);410case 8: return new CMapFormat8(buffer, offset, xlat);411case 10: return new CMapFormat10(buffer, offset, xlat);412case 12: return new CMapFormat12(buffer, offset, xlat);413default: throw new RuntimeException("Cmap format unimplemented: " +414(int)buffer.getChar(offset));415}416}417418private void createUVS(ByteBuffer buffer, int offset) {419int subtableFormat = buffer.getChar(offset);420if (subtableFormat == 14) {421long subtableLength = buffer.getInt(offset + 2) & INTMASK;422if (FontUtilities.isLogging() && offset + subtableLength > buffer.capacity()) {423FontUtilities.logWarning("Cmap UVS subtable overflows buffer.");424}425try {426this.uvs = new UVS(buffer, offset);427} catch (Throwable t) {428t.printStackTrace();429}430}431return;432}433434/*435final char charVal(byte[] cmap, int index) {436return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));437}438439final short shortVal(byte[] cmap, int index) {440return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));441}442*/443abstract char getGlyph(int charCode);444445/* Format 4 Header is446* ushort format (off=0)447* ushort length (off=2)448* ushort language (off=4)449* ushort segCountX2 (off=6)450* ushort searchRange (off=8)451* ushort entrySelector (off=10)452* ushort rangeShift (off=12)453* ushort endCount[segCount] (off=14)454* ushort reservedPad455* ushort startCount[segCount]456* short idDelta[segCount]457* idRangeOFfset[segCount]458* ushort glyphIdArray[]459*/460static class CMapFormat4 extends CMap {461int segCount;462int entrySelector;463int rangeShift;464char[] endCount;465char[] startCount;466short[] idDelta;467char[] idRangeOffset;468char[] glyphIds;469470CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) {471472this.xlat = xlat;473474bbuffer.position(offset);475CharBuffer buffer = bbuffer.asCharBuffer();476buffer.get(); // skip, we already know format=4477int subtableLength = buffer.get();478/* Try to recover from some bad fonts which specify a subtable479* length that would overflow the byte buffer holding the whole480* cmap table. If this isn't a recoverable situation an exception481* may be thrown which is caught higher up the call stack.482* Whilst this may seem lenient, in practice, unless the "bad"483* subtable we are using is the last one in the cmap table we484* would have no way of knowing about this problem anyway.485*/486if (offset+subtableLength > bbuffer.capacity()) {487subtableLength = bbuffer.capacity() - offset;488}489buffer.get(); // skip language490segCount = buffer.get()/2;491int searchRange = buffer.get();492entrySelector = buffer.get();493rangeShift = buffer.get()/2;494startCount = new char[segCount];495endCount = new char[segCount];496idDelta = new short[segCount];497idRangeOffset = new char[segCount];498499for (int i=0; i<segCount; i++) {500endCount[i] = buffer.get();501}502buffer.get(); // 2 bytes for reserved pad503for (int i=0; i<segCount; i++) {504startCount[i] = buffer.get();505}506507for (int i=0; i<segCount; i++) {508idDelta[i] = (short)buffer.get();509}510511for (int i=0; i<segCount; i++) {512char ctmp = buffer.get();513idRangeOffset[i] = (char)((ctmp>>1)&0xffff);514}515/* Can calculate the number of glyph IDs by subtracting516* "pos" from the length of the cmap517*/518int pos = (segCount*8+16)/2;519buffer.position(pos);520int numGlyphIds = (subtableLength/2 - pos);521glyphIds = new char[numGlyphIds];522for (int i=0;i<numGlyphIds;i++) {523glyphIds[i] = buffer.get();524}525/*526System.err.println("segcount="+segCount);527System.err.println("entrySelector="+entrySelector);528System.err.println("rangeShift="+rangeShift);529for (int j=0;j<segCount;j++) {530System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+531" ec="+(int)(endCount[j]&0xffff)+532" delta="+idDelta[j] +533" ro="+(int)idRangeOffset[j]);534}535536//System.err.println("numglyphs="+glyphIds.length);537for (int i=0;i<numGlyphIds;i++) {538System.err.println("gid["+i+"]="+(int)glyphIds[i]);539}540*/541}542543char getGlyph(int charCode) {544545final int origCharCode = charCode;546int index = 0;547char glyphCode = 0;548549int controlGlyph = getControlCodeGlyph(charCode, true);550if (controlGlyph >= 0) {551return (char)controlGlyph;552}553554/* presence of translation array indicates that this555* cmap is in some other (non-unicode encoding).556* In order to look-up a char->glyph mapping we need to557* translate the unicode code point to the encoding of558* the cmap.559* REMIND: VALID CHARCODES??560*/561if (xlat != null) {562charCode = xlat[charCode];563}564565/*566* Citation from the TrueType (and OpenType) spec:567* The segments are sorted in order of increasing endCode568* values, and the segment values are specified in four parallel569* arrays. You search for the first endCode that is greater than570* or equal to the character code you want to map. If the571* corresponding startCode is less than or equal to the572* character code, then you use the corresponding idDelta and573* idRangeOffset to map the character code to a glyph index574* (otherwise, the missingGlyph is returned).575*/576577/*578* CMAP format4 defines several fields for optimized search of579* the segment list (entrySelector, searchRange, rangeShift).580* However, benefits are neglible and some fonts have incorrect581* data - so we use straightforward binary search (see bug 6247425)582*/583int left = 0, right = startCount.length;584index = startCount.length >> 1;585while (left < right) {586if (endCount[index] < charCode) {587left = index + 1;588} else {589right = index;590}591index = (left + right) >> 1;592}593594if (charCode >= startCount[index] && charCode <= endCount[index]) {595int rangeOffset = idRangeOffset[index];596597if (rangeOffset == 0) {598glyphCode = (char)(charCode + idDelta[index]);599} else {600/* Calculate an index into the glyphIds array */601602/*603System.err.println("rangeoffset="+rangeOffset+604" charCode=" + charCode +605" scnt["+index+"]="+(int)startCount[index] +606" segCnt="+segCount);607*/608609int glyphIDIndex = rangeOffset - segCount + index610+ (charCode - startCount[index]);611glyphCode = glyphIds[glyphIDIndex];612if (glyphCode != 0) {613glyphCode = (char)(glyphCode + idDelta[index]);614}615}616}617if (glyphCode == 0) {618glyphCode = getFormatCharGlyph(origCharCode);619}620return glyphCode;621}622}623624// Format 0: Byte Encoding table625static class CMapFormat0 extends CMap {626byte [] cmap;627628CMapFormat0(ByteBuffer buffer, int offset) {629630/* skip 6 bytes of format, length, and version */631int len = buffer.getChar(offset+2);632cmap = new byte[len-6];633buffer.position(offset+6);634buffer.get(cmap);635}636637char getGlyph(int charCode) {638if (charCode < 256) {639if (charCode < 0x0010) {640switch (charCode) {641case 0x0009:642case 0x000a:643case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;644}645}646return (char)(0xff & cmap[charCode]);647} else {648return 0;649}650}651}652653// static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) {654655// CMap cmap = createCMap(buffer, offset, null);656// if (cmap == null) {657// return null;658// } else {659// return new CMapFormatSymbol(cmap, syms);660// }661// }662663// static class CMapFormatSymbol extends CMap {664665// CMap cmap;666// static final int NUM_BUCKETS = 128;667// Bucket[] buckets = new Bucket[NUM_BUCKETS];668669// class Bucket {670// char unicode;671// char glyph;672// Bucket next;673674// Bucket(char u, char g) {675// unicode = u;676// glyph = g;677// }678// }679680// CMapFormatSymbol(CMap cmap, char[] syms) {681682// this.cmap = cmap;683684// for (int i=0;i<syms.length;i++) {685// char unicode = syms[i];686// if (unicode != noSuchChar) {687// char glyph = cmap.getGlyph(i + 0xf000);688// int hash = unicode % NUM_BUCKETS;689// Bucket bucket = new Bucket(unicode, glyph);690// if (buckets[hash] == null) {691// buckets[hash] = bucket;692// } else {693// Bucket b = buckets[hash];694// while (b.next != null) {695// b = b.next;696// }697// b.next = bucket;698// }699// }700// }701// }702703// char getGlyph(int unicode) {704// if (unicode >= 0x1000) {705// return 0;706// }707// else if (unicode >=0xf000 && unicode < 0xf100) {708// return cmap.getGlyph(unicode);709// } else {710// Bucket b = buckets[unicode % NUM_BUCKETS];711// while (b != null) {712// if (b.unicode == unicode) {713// return b.glyph;714// } else {715// b = b.next;716// }717// }718// return 0;719// }720// }721// }722723// Format 2: High-byte mapping through table724static class CMapFormat2 extends CMap {725726char[] subHeaderKey = new char[256];727/* Store subheaders in individual arrays728* A SubHeader entry theortically looks like {729* char firstCode;730* char entryCount;731* short idDelta;732* char idRangeOffset;733* }734*/735char[] firstCodeArray;736char[] entryCountArray;737short[] idDeltaArray;738char[] idRangeOffSetArray;739740char[] glyphIndexArray;741742CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) {743744this.xlat = xlat;745746int tableLen = buffer.getChar(offset+2);747buffer.position(offset+6);748CharBuffer cBuffer = buffer.asCharBuffer();749char maxSubHeader = 0;750for (int i=0;i<256;i++) {751subHeaderKey[i] = cBuffer.get();752if (subHeaderKey[i] > maxSubHeader) {753maxSubHeader = subHeaderKey[i];754}755}756/* The value of the subHeaderKey is 8 * the subHeader index,757* so the number of subHeaders can be obtained by dividing758* this value bv 8 and adding 1.759*/760int numSubHeaders = (maxSubHeader >> 3) +1;761firstCodeArray = new char[numSubHeaders];762entryCountArray = new char[numSubHeaders];763idDeltaArray = new short[numSubHeaders];764idRangeOffSetArray = new char[numSubHeaders];765for (int i=0; i<numSubHeaders; i++) {766firstCodeArray[i] = cBuffer.get();767entryCountArray[i] = cBuffer.get();768idDeltaArray[i] = (short)cBuffer.get();769idRangeOffSetArray[i] = cBuffer.get();770// System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+771// " ec="+(int)entryCountArray[i]+772// " delta="+(int)idDeltaArray[i]+773// " offset="+(int)idRangeOffSetArray[i]);774}775776int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2;777glyphIndexArray = new char[glyphIndexArrSize];778for (int i=0; i<glyphIndexArrSize;i++) {779glyphIndexArray[i] = cBuffer.get();780}781}782783char getGlyph(int charCode) {784final int origCharCode = charCode;785int controlGlyph = getControlCodeGlyph(charCode, true);786if (controlGlyph >= 0) {787return (char)controlGlyph;788}789790if (xlat != null) {791charCode = xlat[charCode];792}793794char highByte = (char)(charCode >> 8);795char lowByte = (char)(charCode & 0xff);796int key = subHeaderKey[highByte]>>3; // index into subHeaders797char mapMe;798799if (key != 0) {800mapMe = lowByte;801} else {802mapMe = highByte;803if (mapMe == 0) {804mapMe = lowByte;805}806}807808// System.err.println("charCode="+Integer.toHexString(charCode)+809// " key="+key+ " mapMe="+Integer.toHexString(mapMe));810char firstCode = firstCodeArray[key];811if (mapMe < firstCode) {812return 0;813} else {814mapMe -= firstCode;815}816817if (mapMe < entryCountArray[key]) {818/* "address" arithmetic is needed to calculate the offset819* into glyphIndexArray. "idRangeOffSetArray[key]" specifies820* the number of bytes from that location in the table where821* the subarray of glyphIndexes starting at "firstCode" begins.822* Each entry in the subHeader table is 8 bytes, and the823* idRangeOffSetArray field is at offset 6 in the entry.824* The glyphIndexArray immediately follows the subHeaders.825* So if there are "N" entries then the number of bytes to the826* start of glyphIndexArray is (N-key)*8-6.827* Subtract this from the idRangeOffSetArray value to get828* the number of bytes into glyphIndexArray and divide by 2 to829* get the (char) array index.830*/831int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6;832int glyphSubArrayStart =833(idRangeOffSetArray[key] - glyphArrayOffset)/2;834char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe];835if (glyphCode != 0) {836glyphCode += idDeltaArray[key]; //idDelta837return glyphCode;838}839}840return getFormatCharGlyph(origCharCode);841}842}843844// Format 6: Trimmed table mapping845static class CMapFormat6 extends CMap {846847char firstCode;848char entryCount;849char[] glyphIdArray;850851CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) {852853bbuffer.position(offset+6);854CharBuffer buffer = bbuffer.asCharBuffer();855firstCode = buffer.get();856entryCount = buffer.get();857glyphIdArray = new char[entryCount];858for (int i=0; i< entryCount; i++) {859glyphIdArray[i] = buffer.get();860}861}862863char getGlyph(int charCode) {864final int origCharCode = charCode;865int controlGlyph = getControlCodeGlyph(charCode, true);866if (controlGlyph >= 0) {867return (char)controlGlyph;868}869870if (xlat != null) {871charCode = xlat[charCode];872}873874charCode -= firstCode;875if (charCode < 0 || charCode >= entryCount) {876return getFormatCharGlyph(origCharCode);877} else {878return glyphIdArray[charCode];879}880}881}882883// Format 8: mixed 16-bit and 32-bit coverage884// Seems unlikely this code will ever get tested as we look for885// MS platform Cmaps and MS states (in the Opentype spec on their website)886// that MS doesn't support this format887static class CMapFormat8 extends CMap {888byte[] is32 = new byte[8192];889int nGroups;890int[] startCharCode;891int[] endCharCode;892int[] startGlyphID;893894CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) {895896bbuffer.position(12);897bbuffer.get(is32);898nGroups = bbuffer.getInt() & INTMASK;899// A map group record is three uint32's making for 12 bytes total900if (bbuffer.remaining() < (12 * (long)nGroups)) {901throw new RuntimeException("Format 8 table exceeded");902}903startCharCode = new int[nGroups];904endCharCode = new int[nGroups];905startGlyphID = new int[nGroups];906}907908char getGlyph(int charCode) {909if (xlat != null) {910throw new RuntimeException("xlat array for cmap fmt=8");911}912return 0;913}914915}916917918// Format 4-byte 10: Trimmed table mapping919// Seems unlikely this code will ever get tested as we look for920// MS platform Cmaps and MS states (in the Opentype spec on their website)921// that MS doesn't support this format922static class CMapFormat10 extends CMap {923924long firstCode;925int entryCount;926char[] glyphIdArray;927928CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) {929930bbuffer.position(offset+12);931firstCode = bbuffer.getInt() & INTMASK;932entryCount = bbuffer.getInt() & INTMASK;933// each glyph is a uint16, so 2 bytes per value.934if (bbuffer.remaining() < (2 * (long)entryCount)) {935throw new RuntimeException("Format 10 table exceeded");936}937CharBuffer buffer = bbuffer.asCharBuffer();938glyphIdArray = new char[entryCount];939for (int i=0; i< entryCount; i++) {940glyphIdArray[i] = buffer.get();941}942}943944char getGlyph(int charCode) {945946if (xlat != null) {947throw new RuntimeException("xlat array for cmap fmt=10");948}949950int code = (int)(charCode - firstCode);951if (code < 0 || code >= entryCount) {952return 0;953} else {954return glyphIdArray[code];955}956}957}958959// Format 12: Segmented coverage for UCS-4 (fonts supporting960// surrogate pairs)961static class CMapFormat12 extends CMap {962963int numGroups;964int highBit =0;965int power;966int extra;967long[] startCharCode;968long[] endCharCode;969int[] startGlyphID;970971CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) {972if (xlat != null) {973throw new RuntimeException("xlat array for cmap fmt=12");974}975976buffer.position(offset+12);977numGroups = buffer.getInt() & INTMASK;978// A map group record is three uint32's making for 12 bytes total979if (buffer.remaining() < (12 * (long)numGroups)) {980throw new RuntimeException("Format 12 table exceeded");981}982startCharCode = new long[numGroups];983endCharCode = new long[numGroups];984startGlyphID = new int[numGroups];985buffer = buffer.slice();986IntBuffer ibuffer = buffer.asIntBuffer();987for (int i=0; i<numGroups; i++) {988startCharCode[i] = ibuffer.get() & INTMASK;989endCharCode[i] = ibuffer.get() & INTMASK;990startGlyphID[i] = ibuffer.get() & INTMASK;991}992993/* Finds the high bit by binary searching through the bits */994int value = numGroups;995996if (value >= 1 << 16) {997value >>= 16;998highBit += 16;999}10001001if (value >= 1 << 8) {1002value >>= 8;1003highBit += 8;1004}10051006if (value >= 1 << 4) {1007value >>= 4;1008highBit += 4;1009}10101011if (value >= 1 << 2) {1012value >>= 2;1013highBit += 2;1014}10151016if (value >= 1 << 1) {1017value >>= 1;1018highBit += 1;1019}10201021power = 1 << highBit;1022extra = numGroups - power;1023}10241025char getGlyph(int charCode) {1026final int origCharCode = charCode;1027int controlGlyph = getControlCodeGlyph(charCode, false);1028if (controlGlyph >= 0) {1029return (char)controlGlyph;1030}1031int probe = power;1032int range = 0;10331034if (startCharCode[extra] <= charCode) {1035range = extra;1036}10371038while (probe > 1) {1039probe >>= 1;10401041if (startCharCode[range+probe] <= charCode) {1042range += probe;1043}1044}10451046if (startCharCode[range] <= charCode &&1047endCharCode[range] >= charCode) {1048return (char)1049(startGlyphID[range] + (charCode - startCharCode[range]));1050}10511052return getFormatCharGlyph(origCharCode);1053}10541055}10561057/* Used to substitute for bad Cmaps. */1058static class NullCMapClass extends CMap {10591060char getGlyph(int charCode) {1061return 0;1062}1063}10641065public static final NullCMapClass theNullCmap = new NullCMapClass();10661067final int getControlCodeGlyph(int charCode, boolean noSurrogates) {1068if (charCode < 0x0010) {1069switch (charCode) {1070case 0x0009:1071case 0x000a:1072case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;1073}1074} else if (noSurrogates && charCode >= 0xFFFF) {1075return 0;1076}1077return -1;1078}10791080final char getFormatCharGlyph(int charCode) {1081if (charCode >= 0x200c) {1082if ((charCode <= 0x200f) ||1083(charCode >= 0x2028 && charCode <= 0x202e) ||1084(charCode >= 0x206a && charCode <= 0x206f)) {1085return (char)CharToGlyphMapper.INVISIBLE_GLYPH_ID;1086}1087}1088return 0;1089}10901091static class UVS {1092int numSelectors;1093int[] selector;10941095//for Non-Default UVS Table1096int[] numUVSMapping;1097int[][] unicodeValue;1098char[][] glyphID;10991100UVS(ByteBuffer buffer, int offset) {1101buffer.position(offset+6);1102numSelectors = buffer.getInt() & INTMASK;1103// A variation selector record is one 3 byte int + two int32's1104// making for 11 bytes per record.1105if (buffer.remaining() < (11 * (long)numSelectors)) {1106throw new RuntimeException("Variations exceed buffer");1107}1108selector = new int[numSelectors];1109numUVSMapping = new int[numSelectors];1110unicodeValue = new int[numSelectors][];1111glyphID = new char[numSelectors][];11121113for (int i = 0; i < numSelectors; i++) {1114buffer.position(offset + 10 + i * 11);1115selector[i] = (buffer.get() & 0xff) << 16; //UINT241116selector[i] += (buffer.get() & 0xff) << 8;1117selector[i] += buffer.get() & 0xff;11181119//skip Default UVS Table11201121//for Non-Default UVS Table1122int tableOffset = buffer.getInt(offset + 10 + i * 11 + 7);1123if (tableOffset == 0) {1124numUVSMapping[i] = 0;1125} else if (tableOffset > 0) {1126buffer.position(offset+tableOffset);1127numUVSMapping[i] = buffer.getInt() & INTMASK;1128// a UVS mapping record is one 3 byte int + uint161129// making for 5 bytes per record.1130if (buffer.remaining() < (5 * (long)numUVSMapping[i])) {1131throw new RuntimeException("Variations exceed buffer");1132}1133unicodeValue[i] = new int[numUVSMapping[i]];1134glyphID[i] = new char[numUVSMapping[i]];11351136for (int j = 0; j < numUVSMapping[i]; j++) {1137int temp = (buffer.get() & 0xff) << 16; //UINT241138temp += (buffer.get() & 0xff) << 8;1139temp += buffer.get() & 0xff;1140unicodeValue[i][j] = temp;1141glyphID[i][j] = buffer.getChar();1142}1143}1144}1145}11461147static final int VS_NOGLYPH = 0;1148private int getGlyph(int charCode, int variationSelector) {1149int targetSelector = -1;1150for (int i = 0; i < numSelectors; i++) {1151if (selector[i] == variationSelector) {1152targetSelector = i;1153break;1154}1155}1156if (targetSelector == -1) {1157return VS_NOGLYPH;1158}1159if (numUVSMapping[targetSelector] > 0) {1160int index = java.util.Arrays.binarySearch(1161unicodeValue[targetSelector], charCode);1162if (index >= 0) {1163return glyphID[targetSelector][index];1164}1165}1166return VS_NOGLYPH;1167}1168}11691170char getVariationGlyph(int charCode, int variationSelector) {1171char glyph = 0;1172if (uvs == null) {1173glyph = getGlyph(charCode);1174} else {1175int result = uvs.getGlyph(charCode, variationSelector);1176if (result > 0) {1177glyph = (char)(result & 0xFFFF);1178} else {1179glyph = getGlyph(charCode);1180}1181}1182return glyph;1183}1184}118511861187