Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.desktop/share/classes/sun/font/CMap.java
41155 views
1
/*
2
* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
package sun.font;
27
28
import java.nio.ByteBuffer;
29
import java.nio.CharBuffer;
30
import java.nio.IntBuffer;
31
import java.util.Locale;
32
import java.nio.charset.*;
33
34
/*
35
* A tt font has a CMAP table which is in turn made up of sub-tables which
36
* describe the char to glyph mapping in (possibly) multiple ways.
37
* CMAP subtables are described by 3 values.
38
* 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK)
39
* 2. Encoding (eg 0=symbol, 1=unicode)
40
* 3. TrueType subtable format (how the char->glyph mapping for the encoding
41
* is stored in the subtable). See the TrueType spec. Format 4 is required
42
* by MS in fonts for windows. Its uses segmented mapping to delta values.
43
* Most typically we see are (3,1,4) :
44
* CMAP Platform ID=3 is what we use.
45
* Encodings that are used in practice by JDK on Solaris are
46
* symbol (3,0)
47
* unicode (3,1)
48
* GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5)
49
* The format for almost all subtables is 4. However the solaris (3,5)
50
* encodings are typically in format 2.
51
*/
52
abstract class CMap {
53
54
// static char WingDings_b2c[] = {
55
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
56
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
57
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
58
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
59
// 0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
60
// 0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
61
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
62
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d,
63
// 0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
64
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
65
// 0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd,
66
// 0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
67
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
68
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
69
// 0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd,
70
// 0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd,
71
// 0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,
72
// 0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d,
73
// 0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd,
74
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
75
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd,
76
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735,
77
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd,
78
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
79
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd,
80
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
81
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
82
// 0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd,
83
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
84
// 0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
85
// 0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
86
// 0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd,
87
// };
88
89
// static char Symbols_b2c[] = {
90
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
91
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
92
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
93
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
94
// 0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d,
95
// 0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd,
96
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
97
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
98
// 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
99
// 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
100
// 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
101
// 0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd,
102
// 0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,
103
// 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
104
// 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
105
// 0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd,
106
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
107
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
108
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
109
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
110
// 0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd,
111
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
112
// 0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219,
113
// 0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd,
114
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229,
115
// 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
116
// 0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5,
117
// 0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
118
// 0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd,
119
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
120
// 0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
121
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
122
// };
123
124
static final short ShiftJISEncoding = 2;
125
static final short GBKEncoding = 3;
126
static final short Big5Encoding = 4;
127
static final short WansungEncoding = 5;
128
static final short JohabEncoding = 6;
129
static final short MSUnicodeSurrogateEncoding = 10;
130
131
static final char noSuchChar = (char)0xfffd;
132
static final int SHORTMASK = 0x0000ffff;
133
static final int INTMASK = 0x7fffffff;
134
135
static final char[][] converterMaps = new char[7][];
136
137
/*
138
* Unicode->other encoding translation array. A pre-computed look up
139
* which can be shared across all fonts using that encoding.
140
* Using this saves running character coverters repeatedly.
141
*/
142
char[] xlat;
143
UVS uvs = null;
144
145
static CMap initialize(TrueTypeFont font) {
146
147
CMap cmap = null;
148
149
int offset, platformID, encodingID=-1;
150
151
int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0,
152
three6=0, three10=0;
153
int zero5 = 0; // for Unicode Variation Sequences
154
boolean threeStar = false;
155
156
ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag);
157
int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag);
158
short numberSubTables = cmapBuffer.getShort(2);
159
160
/* locate the offsets of all 3,* (ie Microsoft platform) encodings */
161
for (int i=0; i<numberSubTables; i++) {
162
cmapBuffer.position(i * 8 + 4);
163
platformID = cmapBuffer.getShort();
164
if (platformID == 3) {
165
threeStar = true;
166
encodingID = cmapBuffer.getShort();
167
offset = cmapBuffer.getInt();
168
switch (encodingID) {
169
case 0: three0 = offset; break; // MS Symbol encoding
170
case 1: three1 = offset; break; // MS Unicode cmap
171
case 2: three2 = offset; break; // ShiftJIS cmap.
172
case 3: three3 = offset; break; // GBK cmap
173
case 4: three4 = offset; break; // Big 5 cmap
174
case 5: three5 = offset; break; // Wansung
175
case 6: three6 = offset; break; // Johab
176
case 10: three10 = offset; break; // MS Unicode surrogates
177
}
178
} else if (platformID == 0) {
179
encodingID = cmapBuffer.getShort();
180
offset = cmapBuffer.getInt();
181
if (encodingID == 5) {
182
zero5 = offset;
183
}
184
}
185
}
186
187
/* This defines the preference order for cmap subtables */
188
if (threeStar) {
189
if (three10 != 0) {
190
cmap = createCMap(cmapBuffer, three10, null);
191
}
192
else if (three0 != 0) {
193
/* The special case treatment of these fonts leads to
194
* anomalies where a user can view "wingdings" and "wingdings2"
195
* and the latter shows all its code points in the unicode
196
* private use area at 0xF000->0XF0FF and the former shows
197
* a scattered subset of its glyphs that are known mappings to
198
* unicode code points.
199
* The primary purpose of these mappings was to facilitate
200
* display of symbol chars etc in composite fonts, however
201
* this is not needed as all these code points are covered
202
* by some other platform symbol font.
203
* Commenting this out reduces the role of these two files
204
* (assuming that they continue to be used in font.properties)
205
* to just one of contributing to the overall composite
206
* font metrics, and also AWT can still access the fonts.
207
* Clients which explicitly accessed these fonts as names
208
* "Symbol" and "Wingdings" (ie as physical fonts) and
209
* expected to see a scattering of these characters will
210
* see them now as missing. How much of a problem is this?
211
* Perhaps we could still support this mapping just for
212
* "Symbol.ttf" but I suspect some users would prefer it
213
* to be mapped in to the Latin range as that is how
214
* the "symbol" font is used in native apps.
215
*/
216
// String name = font.platName.toLowerCase(Locale.ENGLISH);
217
// if (name.endsWith("symbol.ttf")) {
218
// cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c);
219
// } else if (name.endsWith("wingding.ttf")) {
220
// cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c);
221
// } else {
222
cmap = createCMap(cmapBuffer, three0, null);
223
// }
224
}
225
else if (three1 != 0) {
226
cmap = createCMap(cmapBuffer, three1, null);
227
}
228
else if (three2 != 0) {
229
cmap = createCMap(cmapBuffer, three2,
230
getConverterMap(ShiftJISEncoding));
231
}
232
else if (three3 != 0) {
233
cmap = createCMap(cmapBuffer, three3,
234
getConverterMap(GBKEncoding));
235
}
236
else if (three4 != 0) {
237
cmap = createCMap(cmapBuffer, three4,
238
getConverterMap(Big5Encoding));
239
}
240
else if (three5 != 0) {
241
cmap = createCMap(cmapBuffer, three5,
242
getConverterMap(WansungEncoding));
243
}
244
else if (three6 != 0) {
245
cmap = createCMap(cmapBuffer, three6,
246
getConverterMap(JohabEncoding));
247
}
248
} else {
249
/* No 3,* subtable was found. Just use whatever is the first
250
* table listed. Not very useful but maybe better than
251
* rejecting the font entirely?
252
*/
253
cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null);
254
}
255
// For Unicode Variation Sequences
256
if (cmap != null && zero5 != 0) {
257
cmap.createUVS(cmapBuffer, zero5);
258
}
259
return cmap;
260
}
261
262
/* speed up the converting by setting the range for double
263
* byte characters;
264
*/
265
static char[] getConverter(short encodingID) {
266
int dBegin = 0x8000;
267
int dEnd = 0xffff;
268
String encoding;
269
270
switch (encodingID) {
271
case ShiftJISEncoding:
272
dBegin = 0x8140;
273
dEnd = 0xfcfc;
274
encoding = "SJIS";
275
break;
276
case GBKEncoding:
277
dBegin = 0x8140;
278
dEnd = 0xfea0;
279
encoding = "GBK";
280
break;
281
case Big5Encoding:
282
dBegin = 0xa140;
283
dEnd = 0xfefe;
284
encoding = "Big5";
285
break;
286
case WansungEncoding:
287
dBegin = 0xa1a1;
288
dEnd = 0xfede;
289
encoding = "EUC_KR";
290
break;
291
case JohabEncoding:
292
dBegin = 0x8141;
293
dEnd = 0xfdfe;
294
encoding = "Johab";
295
break;
296
default:
297
return null;
298
}
299
300
try {
301
char[] convertedChars = new char[65536];
302
for (int i=0; i<65536; i++) {
303
convertedChars[i] = noSuchChar;
304
}
305
306
byte[] inputBytes = new byte[(dEnd-dBegin+1)*2];
307
char[] outputChars = new char[(dEnd-dBegin+1)];
308
309
int j = 0;
310
int firstByte;
311
if (encodingID == ShiftJISEncoding) {
312
for (int i = dBegin; i <= dEnd; i++) {
313
firstByte = (i >> 8 & 0xff);
314
if (firstByte >= 0xa1 && firstByte <= 0xdf) {
315
//sjis halfwidth katakana
316
inputBytes[j++] = (byte)0xff;
317
inputBytes[j++] = (byte)0xff;
318
} else {
319
inputBytes[j++] = (byte)firstByte;
320
inputBytes[j++] = (byte)(i & 0xff);
321
}
322
}
323
} else {
324
for (int i = dBegin; i <= dEnd; i++) {
325
inputBytes[j++] = (byte)(i>>8 & 0xff);
326
inputBytes[j++] = (byte)(i & 0xff);
327
}
328
}
329
330
Charset.forName(encoding).newDecoder()
331
.onMalformedInput(CodingErrorAction.REPLACE)
332
.onUnmappableCharacter(CodingErrorAction.REPLACE)
333
.replaceWith("\u0000")
334
.decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length),
335
CharBuffer.wrap(outputChars, 0, outputChars.length),
336
true);
337
338
// ensure single byte ascii
339
for (int i = 0x20; i <= 0x7e; i++) {
340
convertedChars[i] = (char)i;
341
}
342
343
//sjis halfwidth katakana
344
if (encodingID == ShiftJISEncoding) {
345
for (int i = 0xa1; i <= 0xdf; i++) {
346
convertedChars[i] = (char)(i - 0xa1 + 0xff61);
347
}
348
}
349
350
/* It would save heap space (approx 60Kbytes for each of these
351
* converters) if stored only valid ranges (ie returned
352
* outputChars directly. But this is tricky since want to
353
* include the ASCII range too.
354
*/
355
// System.err.println("oc.len="+outputChars.length);
356
// System.err.println("cc.len="+convertedChars.length);
357
// System.err.println("dbegin="+dBegin);
358
System.arraycopy(outputChars, 0, convertedChars, dBegin,
359
outputChars.length);
360
361
//return convertedChars;
362
/* invert this map as now want it to map from Unicode
363
* to other encoding.
364
*/
365
char [] invertedChars = new char[65536];
366
for (int i=0;i<65536;i++) {
367
if (convertedChars[i] != noSuchChar) {
368
invertedChars[convertedChars[i]] = (char)i;
369
}
370
}
371
return invertedChars;
372
373
} catch (Exception e) {
374
e.printStackTrace();
375
}
376
return null;
377
}
378
379
/*
380
* The returned array maps to unicode from some other 2 byte encoding
381
* eg for a 2byte index which represents a SJIS char, the indexed
382
* value is the corresponding unicode char.
383
*/
384
static char[] getConverterMap(short encodingID) {
385
if (converterMaps[encodingID] == null) {
386
converterMaps[encodingID] = getConverter(encodingID);
387
}
388
return converterMaps[encodingID];
389
}
390
391
392
static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) {
393
/* First do a sanity check that this cmap subtable is contained
394
* within the cmap table.
395
*/
396
int subtableFormat = buffer.getChar(offset);
397
long subtableLength;
398
if (subtableFormat < 8) {
399
subtableLength = buffer.getChar(offset+2);
400
} else {
401
subtableLength = buffer.getInt(offset+4) & INTMASK;
402
}
403
if (FontUtilities.isLogging() && offset + subtableLength > buffer.capacity()) {
404
FontUtilities.logWarning("Cmap subtable overflows buffer.");
405
}
406
switch (subtableFormat) {
407
case 0: return new CMapFormat0(buffer, offset);
408
case 2: return new CMapFormat2(buffer, offset, xlat);
409
case 4: return new CMapFormat4(buffer, offset, xlat);
410
case 6: return new CMapFormat6(buffer, offset, xlat);
411
case 8: return new CMapFormat8(buffer, offset, xlat);
412
case 10: return new CMapFormat10(buffer, offset, xlat);
413
case 12: return new CMapFormat12(buffer, offset, xlat);
414
default: throw new RuntimeException("Cmap format unimplemented: " +
415
(int)buffer.getChar(offset));
416
}
417
}
418
419
private void createUVS(ByteBuffer buffer, int offset) {
420
int subtableFormat = buffer.getChar(offset);
421
if (subtableFormat == 14) {
422
long subtableLength = buffer.getInt(offset + 2) & INTMASK;
423
if (FontUtilities.isLogging() && offset + subtableLength > buffer.capacity()) {
424
FontUtilities.logWarning("Cmap UVS subtable overflows buffer.");
425
}
426
try {
427
this.uvs = new UVS(buffer, offset);
428
} catch (Throwable t) {
429
t.printStackTrace();
430
}
431
}
432
return;
433
}
434
435
/*
436
final char charVal(byte[] cmap, int index) {
437
return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
438
}
439
440
final short shortVal(byte[] cmap, int index) {
441
return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
442
}
443
*/
444
abstract char getGlyph(int charCode);
445
446
/* Format 4 Header is
447
* ushort format (off=0)
448
* ushort length (off=2)
449
* ushort language (off=4)
450
* ushort segCountX2 (off=6)
451
* ushort searchRange (off=8)
452
* ushort entrySelector (off=10)
453
* ushort rangeShift (off=12)
454
* ushort endCount[segCount] (off=14)
455
* ushort reservedPad
456
* ushort startCount[segCount]
457
* short idDelta[segCount]
458
* idRangeOFfset[segCount]
459
* ushort glyphIdArray[]
460
*/
461
static class CMapFormat4 extends CMap {
462
int segCount;
463
int entrySelector;
464
int rangeShift;
465
char[] endCount;
466
char[] startCount;
467
short[] idDelta;
468
char[] idRangeOffset;
469
char[] glyphIds;
470
471
CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) {
472
473
this.xlat = xlat;
474
475
bbuffer.position(offset);
476
CharBuffer buffer = bbuffer.asCharBuffer();
477
buffer.get(); // skip, we already know format=4
478
int subtableLength = buffer.get();
479
/* Try to recover from some bad fonts which specify a subtable
480
* length that would overflow the byte buffer holding the whole
481
* cmap table. If this isn't a recoverable situation an exception
482
* may be thrown which is caught higher up the call stack.
483
* Whilst this may seem lenient, in practice, unless the "bad"
484
* subtable we are using is the last one in the cmap table we
485
* would have no way of knowing about this problem anyway.
486
*/
487
if (offset+subtableLength > bbuffer.capacity()) {
488
subtableLength = bbuffer.capacity() - offset;
489
}
490
buffer.get(); // skip language
491
segCount = buffer.get()/2;
492
int searchRange = buffer.get();
493
entrySelector = buffer.get();
494
rangeShift = buffer.get()/2;
495
startCount = new char[segCount];
496
endCount = new char[segCount];
497
idDelta = new short[segCount];
498
idRangeOffset = new char[segCount];
499
500
for (int i=0; i<segCount; i++) {
501
endCount[i] = buffer.get();
502
}
503
buffer.get(); // 2 bytes for reserved pad
504
for (int i=0; i<segCount; i++) {
505
startCount[i] = buffer.get();
506
}
507
508
for (int i=0; i<segCount; i++) {
509
idDelta[i] = (short)buffer.get();
510
}
511
512
for (int i=0; i<segCount; i++) {
513
char ctmp = buffer.get();
514
idRangeOffset[i] = (char)((ctmp>>1)&0xffff);
515
}
516
/* Can calculate the number of glyph IDs by subtracting
517
* "pos" from the length of the cmap
518
*/
519
int pos = (segCount*8+16)/2;
520
buffer.position(pos);
521
int numGlyphIds = (subtableLength/2 - pos);
522
glyphIds = new char[numGlyphIds];
523
for (int i=0;i<numGlyphIds;i++) {
524
glyphIds[i] = buffer.get();
525
}
526
/*
527
System.err.println("segcount="+segCount);
528
System.err.println("entrySelector="+entrySelector);
529
System.err.println("rangeShift="+rangeShift);
530
for (int j=0;j<segCount;j++) {
531
System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+
532
" ec="+(int)(endCount[j]&0xffff)+
533
" delta="+idDelta[j] +
534
" ro="+(int)idRangeOffset[j]);
535
}
536
537
//System.err.println("numglyphs="+glyphIds.length);
538
for (int i=0;i<numGlyphIds;i++) {
539
System.err.println("gid["+i+"]="+(int)glyphIds[i]);
540
}
541
*/
542
}
543
544
char getGlyph(int charCode) {
545
546
final int origCharCode = charCode;
547
int index = 0;
548
char glyphCode = 0;
549
550
int controlGlyph = getControlCodeGlyph(charCode, true);
551
if (controlGlyph >= 0) {
552
return (char)controlGlyph;
553
}
554
555
/* presence of translation array indicates that this
556
* cmap is in some other (non-unicode encoding).
557
* In order to look-up a char->glyph mapping we need to
558
* translate the unicode code point to the encoding of
559
* the cmap.
560
* REMIND: VALID CHARCODES??
561
*/
562
if (xlat != null) {
563
charCode = xlat[charCode];
564
}
565
566
/*
567
* Citation from the TrueType (and OpenType) spec:
568
* The segments are sorted in order of increasing endCode
569
* values, and the segment values are specified in four parallel
570
* arrays. You search for the first endCode that is greater than
571
* or equal to the character code you want to map. If the
572
* corresponding startCode is less than or equal to the
573
* character code, then you use the corresponding idDelta and
574
* idRangeOffset to map the character code to a glyph index
575
* (otherwise, the missingGlyph is returned).
576
*/
577
578
/*
579
* CMAP format4 defines several fields for optimized search of
580
* the segment list (entrySelector, searchRange, rangeShift).
581
* However, benefits are neglible and some fonts have incorrect
582
* data - so we use straightforward binary search (see bug 6247425)
583
*/
584
int left = 0, right = startCount.length;
585
index = startCount.length >> 1;
586
while (left < right) {
587
if (endCount[index] < charCode) {
588
left = index + 1;
589
} else {
590
right = index;
591
}
592
index = (left + right) >> 1;
593
}
594
595
if (charCode >= startCount[index] && charCode <= endCount[index]) {
596
int rangeOffset = idRangeOffset[index];
597
598
if (rangeOffset == 0) {
599
glyphCode = (char)(charCode + idDelta[index]);
600
} else {
601
/* Calculate an index into the glyphIds array */
602
603
/*
604
System.err.println("rangeoffset="+rangeOffset+
605
" charCode=" + charCode +
606
" scnt["+index+"]="+(int)startCount[index] +
607
" segCnt="+segCount);
608
*/
609
610
int glyphIDIndex = rangeOffset - segCount + index
611
+ (charCode - startCount[index]);
612
glyphCode = glyphIds[glyphIDIndex];
613
if (glyphCode != 0) {
614
glyphCode = (char)(glyphCode + idDelta[index]);
615
}
616
}
617
}
618
if (glyphCode == 0) {
619
glyphCode = getFormatCharGlyph(origCharCode);
620
}
621
return glyphCode;
622
}
623
}
624
625
// Format 0: Byte Encoding table
626
static class CMapFormat0 extends CMap {
627
byte [] cmap;
628
629
CMapFormat0(ByteBuffer buffer, int offset) {
630
631
/* skip 6 bytes of format, length, and version */
632
int len = buffer.getChar(offset+2);
633
cmap = new byte[len-6];
634
buffer.position(offset+6);
635
buffer.get(cmap);
636
}
637
638
char getGlyph(int charCode) {
639
if (charCode < 256) {
640
if (charCode < 0x0010) {
641
switch (charCode) {
642
case 0x0009:
643
case 0x000a:
644
case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
645
}
646
}
647
return (char)(0xff & cmap[charCode]);
648
} else {
649
return 0;
650
}
651
}
652
}
653
654
// static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) {
655
656
// CMap cmap = createCMap(buffer, offset, null);
657
// if (cmap == null) {
658
// return null;
659
// } else {
660
// return new CMapFormatSymbol(cmap, syms);
661
// }
662
// }
663
664
// static class CMapFormatSymbol extends CMap {
665
666
// CMap cmap;
667
// static final int NUM_BUCKETS = 128;
668
// Bucket[] buckets = new Bucket[NUM_BUCKETS];
669
670
// class Bucket {
671
// char unicode;
672
// char glyph;
673
// Bucket next;
674
675
// Bucket(char u, char g) {
676
// unicode = u;
677
// glyph = g;
678
// }
679
// }
680
681
// CMapFormatSymbol(CMap cmap, char[] syms) {
682
683
// this.cmap = cmap;
684
685
// for (int i=0;i<syms.length;i++) {
686
// char unicode = syms[i];
687
// if (unicode != noSuchChar) {
688
// char glyph = cmap.getGlyph(i + 0xf000);
689
// int hash = unicode % NUM_BUCKETS;
690
// Bucket bucket = new Bucket(unicode, glyph);
691
// if (buckets[hash] == null) {
692
// buckets[hash] = bucket;
693
// } else {
694
// Bucket b = buckets[hash];
695
// while (b.next != null) {
696
// b = b.next;
697
// }
698
// b.next = bucket;
699
// }
700
// }
701
// }
702
// }
703
704
// char getGlyph(int unicode) {
705
// if (unicode >= 0x1000) {
706
// return 0;
707
// }
708
// else if (unicode >=0xf000 && unicode < 0xf100) {
709
// return cmap.getGlyph(unicode);
710
// } else {
711
// Bucket b = buckets[unicode % NUM_BUCKETS];
712
// while (b != null) {
713
// if (b.unicode == unicode) {
714
// return b.glyph;
715
// } else {
716
// b = b.next;
717
// }
718
// }
719
// return 0;
720
// }
721
// }
722
// }
723
724
// Format 2: High-byte mapping through table
725
static class CMapFormat2 extends CMap {
726
727
char[] subHeaderKey = new char[256];
728
/* Store subheaders in individual arrays
729
* A SubHeader entry theortically looks like {
730
* char firstCode;
731
* char entryCount;
732
* short idDelta;
733
* char idRangeOffset;
734
* }
735
*/
736
char[] firstCodeArray;
737
char[] entryCountArray;
738
short[] idDeltaArray;
739
char[] idRangeOffSetArray;
740
741
char[] glyphIndexArray;
742
743
CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) {
744
745
this.xlat = xlat;
746
747
int tableLen = buffer.getChar(offset+2);
748
buffer.position(offset+6);
749
CharBuffer cBuffer = buffer.asCharBuffer();
750
char maxSubHeader = 0;
751
for (int i=0;i<256;i++) {
752
subHeaderKey[i] = cBuffer.get();
753
if (subHeaderKey[i] > maxSubHeader) {
754
maxSubHeader = subHeaderKey[i];
755
}
756
}
757
/* The value of the subHeaderKey is 8 * the subHeader index,
758
* so the number of subHeaders can be obtained by dividing
759
* this value bv 8 and adding 1.
760
*/
761
int numSubHeaders = (maxSubHeader >> 3) +1;
762
firstCodeArray = new char[numSubHeaders];
763
entryCountArray = new char[numSubHeaders];
764
idDeltaArray = new short[numSubHeaders];
765
idRangeOffSetArray = new char[numSubHeaders];
766
for (int i=0; i<numSubHeaders; i++) {
767
firstCodeArray[i] = cBuffer.get();
768
entryCountArray[i] = cBuffer.get();
769
idDeltaArray[i] = (short)cBuffer.get();
770
idRangeOffSetArray[i] = cBuffer.get();
771
// System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+
772
// " ec="+(int)entryCountArray[i]+
773
// " delta="+(int)idDeltaArray[i]+
774
// " offset="+(int)idRangeOffSetArray[i]);
775
}
776
777
int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2;
778
glyphIndexArray = new char[glyphIndexArrSize];
779
for (int i=0; i<glyphIndexArrSize;i++) {
780
glyphIndexArray[i] = cBuffer.get();
781
}
782
}
783
784
char getGlyph(int charCode) {
785
final int origCharCode = charCode;
786
int controlGlyph = getControlCodeGlyph(charCode, true);
787
if (controlGlyph >= 0) {
788
return (char)controlGlyph;
789
}
790
791
if (xlat != null) {
792
charCode = xlat[charCode];
793
}
794
795
char highByte = (char)(charCode >> 8);
796
char lowByte = (char)(charCode & 0xff);
797
int key = subHeaderKey[highByte]>>3; // index into subHeaders
798
char mapMe;
799
800
if (key != 0) {
801
mapMe = lowByte;
802
} else {
803
mapMe = highByte;
804
if (mapMe == 0) {
805
mapMe = lowByte;
806
}
807
}
808
809
// System.err.println("charCode="+Integer.toHexString(charCode)+
810
// " key="+key+ " mapMe="+Integer.toHexString(mapMe));
811
char firstCode = firstCodeArray[key];
812
if (mapMe < firstCode) {
813
return 0;
814
} else {
815
mapMe -= firstCode;
816
}
817
818
if (mapMe < entryCountArray[key]) {
819
/* "address" arithmetic is needed to calculate the offset
820
* into glyphIndexArray. "idRangeOffSetArray[key]" specifies
821
* the number of bytes from that location in the table where
822
* the subarray of glyphIndexes starting at "firstCode" begins.
823
* Each entry in the subHeader table is 8 bytes, and the
824
* idRangeOffSetArray field is at offset 6 in the entry.
825
* The glyphIndexArray immediately follows the subHeaders.
826
* So if there are "N" entries then the number of bytes to the
827
* start of glyphIndexArray is (N-key)*8-6.
828
* Subtract this from the idRangeOffSetArray value to get
829
* the number of bytes into glyphIndexArray and divide by 2 to
830
* get the (char) array index.
831
*/
832
int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6;
833
int glyphSubArrayStart =
834
(idRangeOffSetArray[key] - glyphArrayOffset)/2;
835
char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe];
836
if (glyphCode != 0) {
837
glyphCode += idDeltaArray[key]; //idDelta
838
return glyphCode;
839
}
840
}
841
return getFormatCharGlyph(origCharCode);
842
}
843
}
844
845
// Format 6: Trimmed table mapping
846
static class CMapFormat6 extends CMap {
847
848
char firstCode;
849
char entryCount;
850
char[] glyphIdArray;
851
852
CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) {
853
854
bbuffer.position(offset+6);
855
CharBuffer buffer = bbuffer.asCharBuffer();
856
firstCode = buffer.get();
857
entryCount = buffer.get();
858
glyphIdArray = new char[entryCount];
859
for (int i=0; i< entryCount; i++) {
860
glyphIdArray[i] = buffer.get();
861
}
862
}
863
864
char getGlyph(int charCode) {
865
final int origCharCode = charCode;
866
int controlGlyph = getControlCodeGlyph(charCode, true);
867
if (controlGlyph >= 0) {
868
return (char)controlGlyph;
869
}
870
871
if (xlat != null) {
872
charCode = xlat[charCode];
873
}
874
875
charCode -= firstCode;
876
if (charCode < 0 || charCode >= entryCount) {
877
return getFormatCharGlyph(origCharCode);
878
} else {
879
return glyphIdArray[charCode];
880
}
881
}
882
}
883
884
// Format 8: mixed 16-bit and 32-bit coverage
885
// Seems unlikely this code will ever get tested as we look for
886
// MS platform Cmaps and MS states (in the Opentype spec on their website)
887
// that MS doesn't support this format
888
static class CMapFormat8 extends CMap {
889
byte[] is32 = new byte[8192];
890
int nGroups;
891
int[] startCharCode;
892
int[] endCharCode;
893
int[] startGlyphID;
894
895
CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) {
896
897
bbuffer.position(12);
898
bbuffer.get(is32);
899
nGroups = bbuffer.getInt() & INTMASK;
900
// A map group record is three uint32's making for 12 bytes total
901
if (bbuffer.remaining() < (12 * (long)nGroups)) {
902
throw new RuntimeException("Format 8 table exceeded");
903
}
904
startCharCode = new int[nGroups];
905
endCharCode = new int[nGroups];
906
startGlyphID = new int[nGroups];
907
}
908
909
char getGlyph(int charCode) {
910
if (xlat != null) {
911
throw new RuntimeException("xlat array for cmap fmt=8");
912
}
913
return 0;
914
}
915
916
}
917
918
919
// Format 4-byte 10: Trimmed table mapping
920
// Seems unlikely this code will ever get tested as we look for
921
// MS platform Cmaps and MS states (in the Opentype spec on their website)
922
// that MS doesn't support this format
923
static class CMapFormat10 extends CMap {
924
925
long firstCode;
926
int entryCount;
927
char[] glyphIdArray;
928
929
CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) {
930
931
bbuffer.position(offset+12);
932
firstCode = bbuffer.getInt() & INTMASK;
933
entryCount = bbuffer.getInt() & INTMASK;
934
// each glyph is a uint16, so 2 bytes per value.
935
if (bbuffer.remaining() < (2 * (long)entryCount)) {
936
throw new RuntimeException("Format 10 table exceeded");
937
}
938
CharBuffer buffer = bbuffer.asCharBuffer();
939
glyphIdArray = new char[entryCount];
940
for (int i=0; i< entryCount; i++) {
941
glyphIdArray[i] = buffer.get();
942
}
943
}
944
945
char getGlyph(int charCode) {
946
947
if (xlat != null) {
948
throw new RuntimeException("xlat array for cmap fmt=10");
949
}
950
951
int code = (int)(charCode - firstCode);
952
if (code < 0 || code >= entryCount) {
953
return 0;
954
} else {
955
return glyphIdArray[code];
956
}
957
}
958
}
959
960
// Format 12: Segmented coverage for UCS-4 (fonts supporting
961
// surrogate pairs)
962
static class CMapFormat12 extends CMap {
963
964
int numGroups;
965
int highBit =0;
966
int power;
967
int extra;
968
long[] startCharCode;
969
long[] endCharCode;
970
int[] startGlyphID;
971
972
CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) {
973
if (xlat != null) {
974
throw new RuntimeException("xlat array for cmap fmt=12");
975
}
976
977
buffer.position(offset+12);
978
numGroups = buffer.getInt() & INTMASK;
979
// A map group record is three uint32's making for 12 bytes total
980
if (buffer.remaining() < (12 * (long)numGroups)) {
981
throw new RuntimeException("Format 12 table exceeded");
982
}
983
startCharCode = new long[numGroups];
984
endCharCode = new long[numGroups];
985
startGlyphID = new int[numGroups];
986
buffer = buffer.slice();
987
IntBuffer ibuffer = buffer.asIntBuffer();
988
for (int i=0; i<numGroups; i++) {
989
startCharCode[i] = ibuffer.get() & INTMASK;
990
endCharCode[i] = ibuffer.get() & INTMASK;
991
startGlyphID[i] = ibuffer.get() & INTMASK;
992
}
993
994
/* Finds the high bit by binary searching through the bits */
995
int value = numGroups;
996
997
if (value >= 1 << 16) {
998
value >>= 16;
999
highBit += 16;
1000
}
1001
1002
if (value >= 1 << 8) {
1003
value >>= 8;
1004
highBit += 8;
1005
}
1006
1007
if (value >= 1 << 4) {
1008
value >>= 4;
1009
highBit += 4;
1010
}
1011
1012
if (value >= 1 << 2) {
1013
value >>= 2;
1014
highBit += 2;
1015
}
1016
1017
if (value >= 1 << 1) {
1018
value >>= 1;
1019
highBit += 1;
1020
}
1021
1022
power = 1 << highBit;
1023
extra = numGroups - power;
1024
}
1025
1026
char getGlyph(int charCode) {
1027
final int origCharCode = charCode;
1028
int controlGlyph = getControlCodeGlyph(charCode, false);
1029
if (controlGlyph >= 0) {
1030
return (char)controlGlyph;
1031
}
1032
int probe = power;
1033
int range = 0;
1034
1035
if (startCharCode[extra] <= charCode) {
1036
range = extra;
1037
}
1038
1039
while (probe > 1) {
1040
probe >>= 1;
1041
1042
if (startCharCode[range+probe] <= charCode) {
1043
range += probe;
1044
}
1045
}
1046
1047
if (startCharCode[range] <= charCode &&
1048
endCharCode[range] >= charCode) {
1049
return (char)
1050
(startGlyphID[range] + (charCode - startCharCode[range]));
1051
}
1052
1053
return getFormatCharGlyph(origCharCode);
1054
}
1055
1056
}
1057
1058
/* Used to substitute for bad Cmaps. */
1059
static class NullCMapClass extends CMap {
1060
1061
char getGlyph(int charCode) {
1062
return 0;
1063
}
1064
}
1065
1066
public static final NullCMapClass theNullCmap = new NullCMapClass();
1067
1068
final int getControlCodeGlyph(int charCode, boolean noSurrogates) {
1069
if (charCode < 0x0010) {
1070
switch (charCode) {
1071
case 0x0009:
1072
case 0x000a:
1073
case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1074
}
1075
} else if (noSurrogates && charCode >= 0xFFFF) {
1076
return 0;
1077
}
1078
return -1;
1079
}
1080
1081
final char getFormatCharGlyph(int charCode) {
1082
if (charCode >= 0x200c) {
1083
if ((charCode <= 0x200f) ||
1084
(charCode >= 0x2028 && charCode <= 0x202e) ||
1085
(charCode >= 0x206a && charCode <= 0x206f)) {
1086
return (char)CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1087
}
1088
}
1089
return 0;
1090
}
1091
1092
static class UVS {
1093
int numSelectors;
1094
int[] selector;
1095
1096
//for Non-Default UVS Table
1097
int[] numUVSMapping;
1098
int[][] unicodeValue;
1099
char[][] glyphID;
1100
1101
UVS(ByteBuffer buffer, int offset) {
1102
buffer.position(offset+6);
1103
numSelectors = buffer.getInt() & INTMASK;
1104
// A variation selector record is one 3 byte int + two int32's
1105
// making for 11 bytes per record.
1106
if (buffer.remaining() < (11 * (long)numSelectors)) {
1107
throw new RuntimeException("Variations exceed buffer");
1108
}
1109
selector = new int[numSelectors];
1110
numUVSMapping = new int[numSelectors];
1111
unicodeValue = new int[numSelectors][];
1112
glyphID = new char[numSelectors][];
1113
1114
for (int i = 0; i < numSelectors; i++) {
1115
buffer.position(offset + 10 + i * 11);
1116
selector[i] = (buffer.get() & 0xff) << 16; //UINT24
1117
selector[i] += (buffer.get() & 0xff) << 8;
1118
selector[i] += buffer.get() & 0xff;
1119
1120
//skip Default UVS Table
1121
1122
//for Non-Default UVS Table
1123
int tableOffset = buffer.getInt(offset + 10 + i * 11 + 7);
1124
if (tableOffset == 0) {
1125
numUVSMapping[i] = 0;
1126
} else if (tableOffset > 0) {
1127
buffer.position(offset+tableOffset);
1128
numUVSMapping[i] = buffer.getInt() & INTMASK;
1129
// a UVS mapping record is one 3 byte int + uint16
1130
// making for 5 bytes per record.
1131
if (buffer.remaining() < (5 * (long)numUVSMapping[i])) {
1132
throw new RuntimeException("Variations exceed buffer");
1133
}
1134
unicodeValue[i] = new int[numUVSMapping[i]];
1135
glyphID[i] = new char[numUVSMapping[i]];
1136
1137
for (int j = 0; j < numUVSMapping[i]; j++) {
1138
int temp = (buffer.get() & 0xff) << 16; //UINT24
1139
temp += (buffer.get() & 0xff) << 8;
1140
temp += buffer.get() & 0xff;
1141
unicodeValue[i][j] = temp;
1142
glyphID[i][j] = buffer.getChar();
1143
}
1144
}
1145
}
1146
}
1147
1148
static final int VS_NOGLYPH = 0;
1149
private int getGlyph(int charCode, int variationSelector) {
1150
int targetSelector = -1;
1151
for (int i = 0; i < numSelectors; i++) {
1152
if (selector[i] == variationSelector) {
1153
targetSelector = i;
1154
break;
1155
}
1156
}
1157
if (targetSelector == -1) {
1158
return VS_NOGLYPH;
1159
}
1160
if (numUVSMapping[targetSelector] > 0) {
1161
int index = java.util.Arrays.binarySearch(
1162
unicodeValue[targetSelector], charCode);
1163
if (index >= 0) {
1164
return glyphID[targetSelector][index];
1165
}
1166
}
1167
return VS_NOGLYPH;
1168
}
1169
}
1170
1171
char getVariationGlyph(int charCode, int variationSelector) {
1172
char glyph = 0;
1173
if (uvs == null) {
1174
glyph = getGlyph(charCode);
1175
} else {
1176
int result = uvs.getGlyph(charCode, variationSelector);
1177
if (result > 0) {
1178
glyph = (char)(result & 0xFFFF);
1179
} else {
1180
glyph = getGlyph(charCode);
1181
}
1182
}
1183
return glyph;
1184
}
1185
}
1186
1187