Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/jdk/internal/icu/util/CodePointTrie.java
41161 views
1
/*
2
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
// (c) 2018 and later: Unicode, Inc. and others.
26
// License & terms of use: http://www.unicode.org/copyright.html#License
27
28
// created: 2018may04 Markus W. Scherer
29
30
package jdk.internal.icu.util;
31
32
import jdk.internal.icu.impl.ICUBinary;
33
34
import java.io.DataOutputStream;
35
import java.io.IOException;
36
import java.io.UncheckedIOException;
37
import java.io.OutputStream;
38
import java.nio.ByteBuffer;
39
import java.nio.ByteOrder;
40
41
import static jdk.internal.icu.impl.NormalizerImpl.UTF16Plus;
42
43
/**
44
* Immutable Unicode code point trie.
45
* Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
46
* For details see http://site.icu-project.org/design/struct/utrie
47
*
48
* <p>This class is not intended for public subclassing.
49
*
50
* @see MutableCodePointTrie
51
* @stable ICU 63
52
*/
53
@SuppressWarnings("deprecation")
54
public abstract class CodePointTrie extends CodePointMap {
55
/**
56
* Selectors for the type of a CodePointTrie.
57
* Different trade-offs for size vs. speed.
58
*
59
* <p>Use null for {@link #fromBinary} to accept any type;
60
* {@link #getType} will return the actual type.
61
*
62
* @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth)
63
* @see #fromBinary
64
* @see #getType
65
* @stable ICU 63
66
*/
67
public enum Type {
68
/**
69
* Fast/simple/larger BMP data structure.
70
* The {@link Fast} subclasses have additional functions for lookup for BMP and supplementary code points.
71
*
72
* @see Fast
73
* @stable ICU 63
74
*/
75
FAST,
76
/**
77
* Small/slower BMP data structure.
78
*
79
* @see Small
80
* @stable ICU 63
81
*/
82
SMALL
83
}
84
85
/**
86
* Selectors for the number of bits in a CodePointTrie data value.
87
*
88
* <p>Use null for {@link #fromBinary} to accept any data value width;
89
* {@link #getValueWidth} will return the actual data value width.
90
*
91
* @stable ICU 63
92
*/
93
public enum ValueWidth {
94
/**
95
* The trie stores 16 bits per data value.
96
* It returns them as unsigned values 0..0xffff=65535.
97
*
98
* @stable ICU 63
99
*/
100
BITS_16,
101
/**
102
* The trie stores 32 bits per data value.
103
*
104
* @stable ICU 63
105
*/
106
BITS_32,
107
/**
108
* The trie stores 8 bits per data value.
109
* It returns them as unsigned values 0..0xff=255.
110
*
111
* @stable ICU 63
112
*/
113
BITS_8
114
}
115
116
private CodePointTrie(char[] index, Data data, int highStart,
117
int index3NullOffset, int dataNullOffset) {
118
this.ascii = new int[ASCII_LIMIT];
119
this.index = index;
120
this.data = data;
121
this.dataLength = data.getDataLength();
122
this.highStart = highStart;
123
this.index3NullOffset = index3NullOffset;
124
this.dataNullOffset = dataNullOffset;
125
126
for (int c = 0; c < ASCII_LIMIT; ++c) {
127
ascii[c] = data.getFromIndex(c);
128
}
129
130
int nullValueOffset = dataNullOffset;
131
if (nullValueOffset >= dataLength) {
132
nullValueOffset = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
133
}
134
nullValue = data.getFromIndex(nullValueOffset);
135
}
136
137
/**
138
* Creates a trie from its binary form,
139
* stored in the ByteBuffer starting at the current position.
140
* Advances the buffer position to just after the trie data.
141
* Inverse of {@link #toBinary(OutputStream)}.
142
*
143
* <p>The data is copied from the buffer;
144
* later modification of the buffer will not affect the trie.
145
*
146
* @param type selects the trie type; this method throws an exception
147
* if the type does not match the binary data;
148
* use null to accept any type
149
* @param valueWidth selects the number of bits in a data value; this method throws an exception
150
* if the valueWidth does not match the binary data;
151
* use null to accept any data value width
152
* @param bytes a buffer containing the binary data of a CodePointTrie
153
* @return the trie
154
* @see MutableCodePointTrie#MutableCodePointTrie(int, int)
155
* @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth)
156
* @see #toBinary(OutputStream)
157
* @stable ICU 63
158
*/
159
public static CodePointTrie fromBinary(Type type, ValueWidth valueWidth, ByteBuffer bytes) {
160
ByteOrder outerByteOrder = bytes.order();
161
try {
162
// Enough data for a trie header?
163
if (bytes.remaining() < 16 /* sizeof(UCPTrieHeader) */) {
164
throw new InternalError("Buffer too short for a CodePointTrie header");
165
}
166
167
// struct UCPTrieHeader
168
/** "Tri3" in big-endian US-ASCII (0x54726933) */
169
int signature = bytes.getInt();
170
171
// Check the signature.
172
switch (signature) {
173
case 0x54726933:
174
// The buffer is already set to the trie data byte order.
175
break;
176
case 0x33697254:
177
// Temporarily reverse the byte order.
178
boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN;
179
bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
180
signature = 0x54726933;
181
break;
182
default:
183
throw new InternalError("Buffer does not contain a serialized CodePointTrie");
184
}
185
186
// struct UCPTrieHeader continued
187
/**
188
* Options bit field:
189
* Bits 15..12: Data length bits 19..16.
190
* Bits 11..8: Data null block offset bits 19..16.
191
* Bits 7..6: UCPTrieType
192
* Bits 5..3: Reserved (0).
193
* Bits 2..0: UCPTrieValueWidth
194
*/
195
int options = bytes.getChar();
196
197
/** Total length of the index tables. */
198
int indexLength = bytes.getChar();
199
200
/** Data length bits 15..0. */
201
int dataLength = bytes.getChar();
202
203
/** Index-3 null block offset, 0x7fff or 0xffff if none. */
204
int index3NullOffset = bytes.getChar();
205
206
/** Data null block offset bits 15..0, 0xfffff if none. */
207
int dataNullOffset = bytes.getChar();
208
209
/**
210
* First code point of the single-value range ending with U+10ffff,
211
* rounded up and then shifted right by SHIFT_2.
212
*/
213
int shiftedHighStart = bytes.getChar();
214
// struct UCPTrieHeader end
215
216
int typeInt = (options >> 6) & 3;
217
Type actualType;
218
switch (typeInt) {
219
case 0: actualType = Type.FAST; break;
220
case 1: actualType = Type.SMALL; break;
221
default:
222
throw new InternalError("CodePointTrie data header has an unsupported type");
223
}
224
225
int valueWidthInt = options & OPTIONS_VALUE_BITS_MASK;
226
ValueWidth actualValueWidth;
227
switch (valueWidthInt) {
228
case 0: actualValueWidth = ValueWidth.BITS_16; break;
229
case 1: actualValueWidth = ValueWidth.BITS_32; break;
230
case 2: actualValueWidth = ValueWidth.BITS_8; break;
231
default:
232
throw new InternalError("CodePointTrie data header has an unsupported value width");
233
}
234
235
if ((options & OPTIONS_RESERVED_MASK) != 0) {
236
throw new InternalError("CodePointTrie data header has unsupported options");
237
}
238
239
if (type == null) {
240
type = actualType;
241
}
242
if (valueWidth == null) {
243
valueWidth = actualValueWidth;
244
}
245
if (type != actualType || valueWidth != actualValueWidth) {
246
throw new InternalError("CodePointTrie data header has a different type or value width than required");
247
}
248
249
// Get the length values and offsets.
250
dataLength |= ((options & OPTIONS_DATA_LENGTH_MASK) << 4);
251
dataNullOffset |= ((options & OPTIONS_DATA_NULL_OFFSET_MASK) << 8);
252
253
int highStart = shiftedHighStart << SHIFT_2;
254
255
// Calculate the actual length, minus the header.
256
int actualLength = indexLength * 2;
257
if (valueWidth == ValueWidth.BITS_16) {
258
actualLength += dataLength * 2;
259
} else if (valueWidth == ValueWidth.BITS_32) {
260
actualLength += dataLength * 4;
261
} else {
262
actualLength += dataLength;
263
}
264
if (bytes.remaining() < actualLength) {
265
throw new InternalError("Buffer too short for the CodePointTrie data");
266
}
267
268
char[] index = ICUBinary.getChars(bytes, indexLength, 0);
269
switch (valueWidth) {
270
case BITS_16: {
271
char[] data16 = ICUBinary.getChars(bytes, dataLength, 0);
272
return type == Type.FAST ?
273
new Fast16(index, data16, highStart, index3NullOffset, dataNullOffset) :
274
new Small16(index, data16, highStart, index3NullOffset, dataNullOffset);
275
}
276
case BITS_32: {
277
int[] data32 = ICUBinary.getInts(bytes, dataLength, 0);
278
return type == Type.FAST ?
279
new Fast32(index, data32, highStart, index3NullOffset, dataNullOffset) :
280
new Small32(index, data32, highStart, index3NullOffset, dataNullOffset);
281
}
282
case BITS_8: {
283
byte[] data8 = ICUBinary.getBytes(bytes, dataLength, 0);
284
return type == Type.FAST ?
285
new Fast8(index, data8, highStart, index3NullOffset, dataNullOffset) :
286
new Small8(index, data8, highStart, index3NullOffset, dataNullOffset);
287
}
288
default:
289
throw new AssertionError("should be unreachable");
290
}
291
} finally {
292
bytes.order(outerByteOrder);
293
}
294
}
295
296
/**
297
* Returns the trie type.
298
*
299
* @return the trie type
300
* @stable ICU 63
301
*/
302
public abstract Type getType();
303
/**
304
* Returns the number of bits in a trie data value.
305
*
306
* @return the number of bits in a trie data value
307
* @stable ICU 63
308
*/
309
public final ValueWidth getValueWidth() { return data.getValueWidth(); }
310
311
/**
312
* {@inheritDoc}
313
* @stable ICU 63
314
*/
315
@Override
316
public int get(int c) {
317
return data.getFromIndex(cpIndex(c));
318
}
319
320
/**
321
* Returns a trie value for an ASCII code point, without range checking.
322
*
323
* @param c the input code point; must be U+0000..U+007F
324
* @return The ASCII code point's trie value.
325
* @stable ICU 63
326
*/
327
public final int asciiGet(int c) {
328
return ascii[c];
329
}
330
331
private static final int MAX_UNICODE = 0x10ffff;
332
333
private static final int ASCII_LIMIT = 0x80;
334
335
private static final int maybeFilterValue(int value, int trieNullValue, int nullValue,
336
ValueFilter filter) {
337
if (value == trieNullValue) {
338
value = nullValue;
339
} else if (filter != null) {
340
value = filter.apply(value);
341
}
342
return value;
343
}
344
345
/**
346
* {@inheritDoc}
347
* @stable ICU 63
348
*/
349
@Override
350
public final boolean getRange(int start, ValueFilter filter, Range range) {
351
if (start < 0 || MAX_UNICODE < start) {
352
return false;
353
}
354
if (start >= highStart) {
355
int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
356
int value = data.getFromIndex(di);
357
if (filter != null) { value = filter.apply(value); }
358
range.set(start, MAX_UNICODE, value);
359
return true;
360
}
361
362
int nullValue = this.nullValue;
363
if (filter != null) { nullValue = filter.apply(nullValue); }
364
Type type = getType();
365
366
int prevI3Block = -1;
367
int prevBlock = -1;
368
int c = start;
369
// Initialize to make compiler happy. Real value when haveValue is true.
370
int trieValue = 0, value = 0;
371
boolean haveValue = false;
372
do {
373
int i3Block;
374
int i3;
375
int i3BlockLength;
376
int dataBlockLength;
377
if (c <= 0xffff && (type == Type.FAST || c <= SMALL_MAX)) {
378
i3Block = 0;
379
i3 = c >> FAST_SHIFT;
380
i3BlockLength = type == Type.FAST ? BMP_INDEX_LENGTH : SMALL_INDEX_LENGTH;
381
dataBlockLength = FAST_DATA_BLOCK_LENGTH;
382
} else {
383
// Use the multi-stage index.
384
int i1 = c >> SHIFT_1;
385
if (type == Type.FAST) {
386
assert(0xffff < c && c < highStart);
387
i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
388
} else {
389
assert(c < highStart && highStart > SMALL_LIMIT);
390
i1 += SMALL_INDEX_LENGTH;
391
}
392
i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)];
393
if (i3Block == prevI3Block && (c - start) >= CP_PER_INDEX_2_ENTRY) {
394
// The index-3 block is the same as the previous one, and filled with value.
395
assert((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0);
396
c += CP_PER_INDEX_2_ENTRY;
397
continue;
398
}
399
prevI3Block = i3Block;
400
if (i3Block == index3NullOffset) {
401
// This is the index-3 null block.
402
if (haveValue) {
403
if (nullValue != value) {
404
range.set(start, c - 1, value);
405
return true;
406
}
407
} else {
408
trieValue = this.nullValue;
409
value = nullValue;
410
haveValue = true;
411
}
412
prevBlock = dataNullOffset;
413
c = (c + CP_PER_INDEX_2_ENTRY) & ~(CP_PER_INDEX_2_ENTRY - 1);
414
continue;
415
}
416
i3 = (c >> SHIFT_3) & INDEX_3_MASK;
417
i3BlockLength = INDEX_3_BLOCK_LENGTH;
418
dataBlockLength = SMALL_DATA_BLOCK_LENGTH;
419
}
420
// Enumerate data blocks for one index-3 block.
421
do {
422
int block;
423
if ((i3Block & 0x8000) == 0) {
424
block = index[i3Block + i3];
425
} else {
426
// 18-bit indexes stored in groups of 9 entries per 8 indexes.
427
int group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
428
int gi = i3 & 7;
429
block = (index[group++] << (2 + (2 * gi))) & 0x30000;
430
block |= index[group + gi];
431
}
432
if (block == prevBlock && (c - start) >= dataBlockLength) {
433
// The block is the same as the previous one, and filled with value.
434
assert((c & (dataBlockLength - 1)) == 0);
435
c += dataBlockLength;
436
} else {
437
int dataMask = dataBlockLength - 1;
438
prevBlock = block;
439
if (block == dataNullOffset) {
440
// This is the data null block.
441
if (haveValue) {
442
if (nullValue != value) {
443
range.set(start, c - 1, value);
444
return true;
445
}
446
} else {
447
trieValue = this.nullValue;
448
value = nullValue;
449
haveValue = true;
450
}
451
c = (c + dataBlockLength) & ~dataMask;
452
} else {
453
int di = block + (c & dataMask);
454
int trieValue2 = data.getFromIndex(di);
455
if (haveValue) {
456
if (trieValue2 != trieValue) {
457
if (filter == null ||
458
maybeFilterValue(trieValue2, this.nullValue, nullValue,
459
filter) != value) {
460
range.set(start, c - 1, value);
461
return true;
462
}
463
trieValue = trieValue2; // may or may not help
464
}
465
} else {
466
trieValue = trieValue2;
467
value = maybeFilterValue(trieValue2, this.nullValue, nullValue, filter);
468
haveValue = true;
469
}
470
while ((++c & dataMask) != 0) {
471
trieValue2 = data.getFromIndex(++di);
472
if (trieValue2 != trieValue) {
473
if (filter == null ||
474
maybeFilterValue(trieValue2, this.nullValue, nullValue,
475
filter) != value) {
476
range.set(start, c - 1, value);
477
return true;
478
}
479
trieValue = trieValue2; // may or may not help
480
}
481
}
482
}
483
}
484
} while (++i3 < i3BlockLength);
485
} while (c < highStart);
486
assert(haveValue);
487
int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
488
int highValue = data.getFromIndex(di);
489
if (maybeFilterValue(highValue, this.nullValue, nullValue, filter) != value) {
490
--c;
491
} else {
492
c = MAX_UNICODE;
493
}
494
range.set(start, c, value);
495
return true;
496
}
497
498
/**
499
* Writes a representation of the trie to the output stream.
500
* Inverse of {@link #fromBinary}.
501
*
502
* @param os the output stream
503
* @return the number of bytes written
504
* @stable ICU 63
505
*/
506
public final int toBinary(OutputStream os) {
507
try {
508
DataOutputStream dos = new DataOutputStream(os);
509
510
// Write the UCPTrieHeader
511
dos.writeInt(0x54726933); // signature="Tri3"
512
dos.writeChar( // options
513
((dataLength & 0xf0000) >> 4) |
514
((dataNullOffset & 0xf0000) >> 8) |
515
(getType().ordinal() << 6) |
516
getValueWidth().ordinal());
517
dos.writeChar(index.length);
518
dos.writeChar(dataLength);
519
dos.writeChar(index3NullOffset);
520
dos.writeChar(dataNullOffset);
521
dos.writeChar(highStart >> SHIFT_2); // shiftedHighStart
522
int length = 16; // sizeof(UCPTrieHeader)
523
524
for (char i : index) { dos.writeChar(i); }
525
length += index.length * 2;
526
length += data.write(dos);
527
return length;
528
} catch (IOException e) {
529
throw new UncheckedIOException(e);
530
}
531
}
532
533
/** @internal */
534
static final int FAST_SHIFT = 6;
535
536
/** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
537
static final int FAST_DATA_BLOCK_LENGTH = 1 << FAST_SHIFT;
538
539
/** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
540
private static final int FAST_DATA_MASK = FAST_DATA_BLOCK_LENGTH - 1;
541
542
/** @internal */
543
private static final int SMALL_MAX = 0xfff;
544
545
/**
546
* Offset from dataLength (to be subtracted) for fetching the
547
* value returned for out-of-range code points and ill-formed UTF-8/16.
548
* @internal
549
*/
550
private static final int ERROR_VALUE_NEG_DATA_OFFSET = 1;
551
/**
552
* Offset from dataLength (to be subtracted) for fetching the
553
* value returned for code points highStart..U+10FFFF.
554
* @internal
555
*/
556
private static final int HIGH_VALUE_NEG_DATA_OFFSET = 2;
557
558
// ucptrie_impl.h
559
560
/** The length of the BMP index table. 1024=0x400 */
561
private static final int BMP_INDEX_LENGTH = 0x10000 >> FAST_SHIFT;
562
563
static final int SMALL_LIMIT = 0x1000;
564
private static final int SMALL_INDEX_LENGTH = SMALL_LIMIT >> FAST_SHIFT;
565
566
/** Shift size for getting the index-3 table offset. */
567
static final int SHIFT_3 = 4;
568
569
/** Shift size for getting the index-2 table offset. */
570
private static final int SHIFT_2 = 5 + SHIFT_3;
571
572
/** Shift size for getting the index-1 table offset. */
573
private static final int SHIFT_1 = 5 + SHIFT_2;
574
575
/**
576
* Difference between two shift sizes,
577
* for getting an index-2 offset from an index-3 offset. 5=9-4
578
*/
579
static final int SHIFT_2_3 = SHIFT_2 - SHIFT_3;
580
581
/**
582
* Difference between two shift sizes,
583
* for getting an index-1 offset from an index-2 offset. 5=14-9
584
*/
585
static final int SHIFT_1_2 = SHIFT_1 - SHIFT_2;
586
587
/**
588
* Number of index-1 entries for the BMP. (4)
589
* This part of the index-1 table is omitted from the serialized form.
590
*/
591
private static final int OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1;
592
593
/** Number of entries in an index-2 block. 32=0x20 */
594
static final int INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2;
595
596
/** Mask for getting the lower bits for the in-index-2-block offset. */
597
static final int INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1;
598
599
/** Number of code points per index-2 table entry. 512=0x200 */
600
static final int CP_PER_INDEX_2_ENTRY = 1 << SHIFT_2;
601
602
/** Number of entries in an index-3 block. 32=0x20 */
603
static final int INDEX_3_BLOCK_LENGTH = 1 << SHIFT_2_3;
604
605
/** Mask for getting the lower bits for the in-index-3-block offset. */
606
private static final int INDEX_3_MASK = INDEX_3_BLOCK_LENGTH - 1;
607
608
/** Number of entries in a small data block. 16=0x10 */
609
static final int SMALL_DATA_BLOCK_LENGTH = 1 << SHIFT_3;
610
611
/** Mask for getting the lower bits for the in-small-data-block offset. */
612
static final int SMALL_DATA_MASK = SMALL_DATA_BLOCK_LENGTH - 1;
613
614
// ucptrie_impl.h: Constants for use with UCPTrieHeader.options.
615
private static final int OPTIONS_DATA_LENGTH_MASK = 0xf000;
616
private static final int OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00;
617
private static final int OPTIONS_RESERVED_MASK = 0x38;
618
private static final int OPTIONS_VALUE_BITS_MASK = 7;
619
/**
620
* Value for index3NullOffset which indicates that there is no index-3 null block.
621
* Bit 15 is unused for this value because this bit is used if the index-3 contains
622
* 18-bit indexes.
623
*/
624
static final int NO_INDEX3_NULL_OFFSET = 0x7fff;
625
static final int NO_DATA_NULL_OFFSET = 0xfffff;
626
627
private static abstract class Data {
628
abstract ValueWidth getValueWidth();
629
abstract int getDataLength();
630
abstract int getFromIndex(int index);
631
abstract int write(DataOutputStream dos) throws IOException;
632
}
633
634
private static final class Data16 extends Data {
635
char[] array;
636
Data16(char[] a) { array = a; }
637
@Override ValueWidth getValueWidth() { return ValueWidth.BITS_16; }
638
@Override int getDataLength() { return array.length; }
639
@Override int getFromIndex(int index) { return array[index]; }
640
@Override int write(DataOutputStream dos) throws IOException {
641
for (char v : array) { dos.writeChar(v); }
642
return array.length * 2;
643
}
644
}
645
646
private static final class Data32 extends Data {
647
int[] array;
648
Data32(int[] a) { array = a; }
649
@Override ValueWidth getValueWidth() { return ValueWidth.BITS_32; }
650
@Override int getDataLength() { return array.length; }
651
@Override int getFromIndex(int index) { return array[index]; }
652
@Override int write(DataOutputStream dos) throws IOException {
653
for (int v : array) { dos.writeInt(v); }
654
return array.length * 4;
655
}
656
}
657
658
private static final class Data8 extends Data {
659
byte[] array;
660
Data8(byte[] a) { array = a; }
661
@Override ValueWidth getValueWidth() { return ValueWidth.BITS_8; }
662
@Override int getDataLength() { return array.length; }
663
@Override int getFromIndex(int index) { return array[index] & 0xff; }
664
@Override int write(DataOutputStream dos) throws IOException {
665
for (byte v : array) { dos.writeByte(v); }
666
return array.length;
667
}
668
}
669
670
/** @internal */
671
private final int[] ascii;
672
673
/** @internal */
674
private final char[] index;
675
676
/**
677
* @internal
678
* @deprecated This API is ICU internal only.
679
*/
680
@Deprecated
681
protected final Data data;
682
/**
683
* @internal
684
* @deprecated This API is ICU internal only.
685
*/
686
@Deprecated
687
protected final int dataLength;
688
/**
689
* Start of the last range which ends at U+10FFFF.
690
* @internal
691
* @deprecated This API is ICU internal only.
692
*/
693
@Deprecated
694
protected final int highStart;
695
696
/**
697
* Internal index-3 null block offset.
698
* Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.
699
* @internal
700
*/
701
private final int index3NullOffset;
702
/**
703
* Internal data null block offset, not shifted.
704
* Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.
705
* @internal
706
*/
707
private final int dataNullOffset;
708
/** @internal */
709
private final int nullValue;
710
711
/**
712
* @internal
713
* @deprecated This API is ICU internal only.
714
*/
715
@Deprecated
716
protected final int fastIndex(int c) {
717
return index[c >> FAST_SHIFT] + (c & FAST_DATA_MASK);
718
}
719
720
/**
721
* @internal
722
* @deprecated This API is ICU internal only.
723
*/
724
@Deprecated
725
protected final int smallIndex(Type type, int c) {
726
// Split into two methods to make this part inline-friendly.
727
// In C, this part is a macro.
728
if (c >= highStart) {
729
return dataLength - HIGH_VALUE_NEG_DATA_OFFSET;
730
}
731
return internalSmallIndex(type, c);
732
}
733
734
private final int internalSmallIndex(Type type, int c) {
735
int i1 = c >> SHIFT_1;
736
if (type == Type.FAST) {
737
assert(0xffff < c && c < highStart);
738
i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;
739
} else {
740
assert(0 <= c && c < highStart && highStart > SMALL_LIMIT);
741
i1 += SMALL_INDEX_LENGTH;
742
}
743
int i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)];
744
int i3 = (c >> SHIFT_3) & INDEX_3_MASK;
745
int dataBlock;
746
if ((i3Block & 0x8000) == 0) {
747
// 16-bit indexes
748
dataBlock = index[i3Block + i3];
749
} else {
750
// 18-bit indexes stored in groups of 9 entries per 8 indexes.
751
i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
752
i3 &= 7;
753
dataBlock = (index[i3Block++] << (2 + (2 * i3))) & 0x30000;
754
dataBlock |= index[i3Block + i3];
755
}
756
return dataBlock + (c & SMALL_DATA_MASK);
757
}
758
759
/**
760
* @internal
761
* @deprecated This API is ICU internal only.
762
*/
763
@Deprecated
764
protected abstract int cpIndex(int c);
765
766
/**
767
* A CodePointTrie with {@link Type#FAST}.
768
*
769
* @stable ICU 63
770
*/
771
public static abstract class Fast extends CodePointTrie {
772
private Fast(char[] index, Data data, int highStart,
773
int index3NullOffset, int dataNullOffset) {
774
super(index, data, highStart, index3NullOffset, dataNullOffset);
775
}
776
777
/**
778
* Creates a trie from its binary form.
779
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
780
* with {@link Type#FAST}.
781
*
782
* @param valueWidth selects the number of bits in a data value; this method throws an exception
783
* if the valueWidth does not match the binary data;
784
* use null to accept any data value width
785
* @param bytes a buffer containing the binary data of a CodePointTrie
786
* @return the trie
787
* @stable ICU 63
788
*/
789
public static Fast fromBinary(ValueWidth valueWidth, ByteBuffer bytes) {
790
return (Fast) CodePointTrie.fromBinary(Type.FAST, valueWidth, bytes);
791
}
792
793
/**
794
* @return {@link Type#FAST}
795
* @stable ICU 63
796
*/
797
@Override
798
public final Type getType() { return Type.FAST; }
799
800
/**
801
* Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.
802
* Can be used to look up a value for a UTF-16 code unit if other parts of
803
* the string processing check for surrogates.
804
*
805
* @param c the input code point, must be U+0000..U+FFFF
806
* @return The BMP code point's trie value.
807
* @stable ICU 63
808
*/
809
public abstract int bmpGet(int c);
810
811
/**
812
* Returns a trie value for a supplementary code point (U+10000..U+10FFFF),
813
* without range checking.
814
*
815
* @param c the input code point, must be U+10000..U+10FFFF
816
* @return The supplementary code point's trie value.
817
* @stable ICU 63
818
*/
819
public abstract int suppGet(int c);
820
821
/**
822
* @internal
823
* @deprecated This API is ICU internal only.
824
*/
825
@Deprecated
826
@Override
827
protected final int cpIndex(int c) {
828
if (c >= 0) {
829
if (c <= 0xffff) {
830
return fastIndex(c);
831
} else if (c <= 0x10ffff) {
832
return smallIndex(Type.FAST, c);
833
}
834
}
835
return dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
836
}
837
838
/**
839
* {@inheritDoc}
840
* @stable ICU 63
841
*/
842
@Override
843
public final StringIterator stringIterator(CharSequence s, int sIndex) {
844
return new FastStringIterator(s, sIndex);
845
}
846
847
private final class FastStringIterator extends StringIterator {
848
private FastStringIterator(CharSequence s, int sIndex) {
849
super(s, sIndex);
850
}
851
852
@Override
853
public boolean next() {
854
if (sIndex >= s.length()) {
855
return false;
856
}
857
char lead = s.charAt(sIndex++);
858
c = lead;
859
int dataIndex;
860
if (!Character.isSurrogate(lead)) {
861
dataIndex = fastIndex(c);
862
} else {
863
char trail;
864
if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() &&
865
Character.isLowSurrogate(trail = s.charAt(sIndex))) {
866
++sIndex;
867
c = Character.toCodePoint(lead, trail);
868
dataIndex = smallIndex(Type.FAST, c);
869
} else {
870
dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
871
}
872
}
873
value = data.getFromIndex(dataIndex);
874
return true;
875
}
876
877
@Override
878
public boolean previous() {
879
if (sIndex <= 0) {
880
return false;
881
}
882
char trail = s.charAt(--sIndex);
883
c = trail;
884
int dataIndex;
885
if (!Character.isSurrogate(trail)) {
886
dataIndex = fastIndex(c);
887
} else {
888
char lead;
889
if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 &&
890
Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) {
891
--sIndex;
892
c = Character.toCodePoint(lead, trail);
893
dataIndex = smallIndex(Type.FAST, c);
894
} else {
895
dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
896
}
897
}
898
value = data.getFromIndex(dataIndex);
899
return true;
900
}
901
}
902
}
903
904
/**
905
* A CodePointTrie with {@link Type#SMALL}.
906
*
907
* @stable ICU 63
908
*/
909
public static abstract class Small extends CodePointTrie {
910
private Small(char[] index, Data data, int highStart,
911
int index3NullOffset, int dataNullOffset) {
912
super(index, data, highStart, index3NullOffset, dataNullOffset);
913
}
914
915
/**
916
* Creates a trie from its binary form.
917
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
918
* with {@link Type#SMALL}.
919
*
920
* @param valueWidth selects the number of bits in a data value; this method throws an exception
921
* if the valueWidth does not match the binary data;
922
* use null to accept any data value width
923
* @param bytes a buffer containing the binary data of a CodePointTrie
924
* @return the trie
925
* @stable ICU 63
926
*/
927
public static Small fromBinary(ValueWidth valueWidth, ByteBuffer bytes) {
928
return (Small) CodePointTrie.fromBinary(Type.SMALL, valueWidth, bytes);
929
}
930
931
/**
932
* @return {@link Type#SMALL}
933
* @stable ICU 63
934
*/
935
@Override
936
public final Type getType() { return Type.SMALL; }
937
938
/**
939
* @internal
940
* @deprecated This API is ICU internal only.
941
*/
942
@Deprecated
943
@Override
944
protected final int cpIndex(int c) {
945
if (c >= 0) {
946
if (c <= SMALL_MAX) {
947
return fastIndex(c);
948
} else if (c <= 0x10ffff) {
949
return smallIndex(Type.SMALL, c);
950
}
951
}
952
return dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
953
}
954
955
/**
956
* {@inheritDoc}
957
* @stable ICU 63
958
*/
959
@Override
960
public final StringIterator stringIterator(CharSequence s, int sIndex) {
961
return new SmallStringIterator(s, sIndex);
962
}
963
964
private final class SmallStringIterator extends StringIterator {
965
private SmallStringIterator(CharSequence s, int sIndex) {
966
super(s, sIndex);
967
}
968
969
@Override
970
public boolean next() {
971
if (sIndex >= s.length()) {
972
return false;
973
}
974
char lead = s.charAt(sIndex++);
975
c = lead;
976
int dataIndex;
977
if (!Character.isSurrogate(lead)) {
978
dataIndex = cpIndex(c);
979
} else {
980
char trail;
981
if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() &&
982
Character.isLowSurrogate(trail = s.charAt(sIndex))) {
983
++sIndex;
984
c = Character.toCodePoint(lead, trail);
985
dataIndex = smallIndex(Type.SMALL, c);
986
} else {
987
dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
988
}
989
}
990
value = data.getFromIndex(dataIndex);
991
return true;
992
}
993
994
@Override
995
public boolean previous() {
996
if (sIndex <= 0) {
997
return false;
998
}
999
char trail = s.charAt(--sIndex);
1000
c = trail;
1001
int dataIndex;
1002
if (!Character.isSurrogate(trail)) {
1003
dataIndex = cpIndex(c);
1004
} else {
1005
char lead;
1006
if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 &&
1007
Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) {
1008
--sIndex;
1009
c = Character.toCodePoint(lead, trail);
1010
dataIndex = smallIndex(Type.SMALL, c);
1011
} else {
1012
dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;
1013
}
1014
}
1015
value = data.getFromIndex(dataIndex);
1016
return true;
1017
}
1018
}
1019
}
1020
1021
/**
1022
* A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_16}.
1023
*
1024
* @stable ICU 63
1025
*/
1026
public static final class Fast16 extends Fast {
1027
private final char[] dataArray;
1028
1029
Fast16(char[] index, char[] data16, int highStart,
1030
int index3NullOffset, int dataNullOffset) {
1031
super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset);
1032
this.dataArray = data16;
1033
}
1034
1035
/**
1036
* Creates a trie from its binary form.
1037
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
1038
* with {@link Type#FAST} and {@link ValueWidth#BITS_16}.
1039
*
1040
* @param bytes a buffer containing the binary data of a CodePointTrie
1041
* @return the trie
1042
* @stable ICU 63
1043
*/
1044
public static Fast16 fromBinary(ByteBuffer bytes) {
1045
return (Fast16) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_16, bytes);
1046
}
1047
1048
/**
1049
* {@inheritDoc}
1050
* @stable ICU 63
1051
*/
1052
@Override
1053
public final int get(int c) {
1054
return dataArray[cpIndex(c)];
1055
}
1056
1057
/**
1058
* {@inheritDoc}
1059
* @stable ICU 63
1060
*/
1061
@Override
1062
public final int bmpGet(int c) {
1063
assert 0 <= c && c <= 0xffff;
1064
return dataArray[fastIndex(c)];
1065
}
1066
1067
/**
1068
* {@inheritDoc}
1069
* @stable ICU 63
1070
*/
1071
@Override
1072
public final int suppGet(int c) {
1073
assert 0x10000 <= c && c <= 0x10ffff;
1074
return dataArray[smallIndex(Type.FAST, c)];
1075
}
1076
}
1077
1078
/**
1079
* A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_32}.
1080
*
1081
* @stable ICU 63
1082
*/
1083
public static final class Fast32 extends Fast {
1084
private final int[] dataArray;
1085
1086
Fast32(char[] index, int[] data32, int highStart,
1087
int index3NullOffset, int dataNullOffset) {
1088
super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset);
1089
this.dataArray = data32;
1090
}
1091
1092
/**
1093
* Creates a trie from its binary form.
1094
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
1095
* with {@link Type#FAST} and {@link ValueWidth#BITS_32}.
1096
*
1097
* @param bytes a buffer containing the binary data of a CodePointTrie
1098
* @return the trie
1099
* @stable ICU 63
1100
*/
1101
public static Fast32 fromBinary(ByteBuffer bytes) {
1102
return (Fast32) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_32, bytes);
1103
}
1104
1105
/**
1106
* {@inheritDoc}
1107
* @stable ICU 63
1108
*/
1109
@Override
1110
public final int get(int c) {
1111
return dataArray[cpIndex(c)];
1112
}
1113
1114
/**
1115
* {@inheritDoc}
1116
* @stable ICU 63
1117
*/
1118
@Override
1119
public final int bmpGet(int c) {
1120
assert 0 <= c && c <= 0xffff;
1121
return dataArray[fastIndex(c)];
1122
}
1123
1124
/**
1125
* {@inheritDoc}
1126
* @stable ICU 63
1127
*/
1128
@Override
1129
public final int suppGet(int c) {
1130
assert 0x10000 <= c && c <= 0x10ffff;
1131
return dataArray[smallIndex(Type.FAST, c)];
1132
}
1133
}
1134
1135
/**
1136
* A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_8}.
1137
*
1138
* @stable ICU 63
1139
*/
1140
public static final class Fast8 extends Fast {
1141
private final byte[] dataArray;
1142
1143
Fast8(char[] index, byte[] data8, int highStart,
1144
int index3NullOffset, int dataNullOffset) {
1145
super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset);
1146
this.dataArray = data8;
1147
}
1148
1149
/**
1150
* Creates a trie from its binary form.
1151
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
1152
* with {@link Type#FAST} and {@link ValueWidth#BITS_8}.
1153
*
1154
* @param bytes a buffer containing the binary data of a CodePointTrie
1155
* @return the trie
1156
* @stable ICU 63
1157
*/
1158
public static Fast8 fromBinary(ByteBuffer bytes) {
1159
return (Fast8) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_8, bytes);
1160
}
1161
1162
/**
1163
* {@inheritDoc}
1164
* @stable ICU 63
1165
*/
1166
@Override
1167
public final int get(int c) {
1168
return dataArray[cpIndex(c)] & 0xff;
1169
}
1170
1171
/**
1172
* {@inheritDoc}
1173
* @stable ICU 63
1174
*/
1175
@Override
1176
public final int bmpGet(int c) {
1177
assert 0 <= c && c <= 0xffff;
1178
return dataArray[fastIndex(c)] & 0xff;
1179
}
1180
1181
/**
1182
* {@inheritDoc}
1183
* @stable ICU 63
1184
*/
1185
@Override
1186
public final int suppGet(int c) {
1187
assert 0x10000 <= c && c <= 0x10ffff;
1188
return dataArray[smallIndex(Type.FAST, c)] & 0xff;
1189
}
1190
}
1191
1192
/**
1193
* A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_16}.
1194
*
1195
* @stable ICU 63
1196
*/
1197
public static final class Small16 extends Small {
1198
Small16(char[] index, char[] data16, int highStart,
1199
int index3NullOffset, int dataNullOffset) {
1200
super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset);
1201
}
1202
1203
/**
1204
* Creates a trie from its binary form.
1205
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
1206
* with {@link Type#SMALL} and {@link ValueWidth#BITS_16}.
1207
*
1208
* @param bytes a buffer containing the binary data of a CodePointTrie
1209
* @return the trie
1210
* @stable ICU 63
1211
*/
1212
public static Small16 fromBinary(ByteBuffer bytes) {
1213
return (Small16) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_16, bytes);
1214
}
1215
}
1216
1217
/**
1218
* A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_32}.
1219
*
1220
* @stable ICU 63
1221
*/
1222
public static final class Small32 extends Small {
1223
Small32(char[] index, int[] data32, int highStart,
1224
int index3NullOffset, int dataNullOffset) {
1225
super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset);
1226
}
1227
1228
/**
1229
* Creates a trie from its binary form.
1230
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
1231
* with {@link Type#SMALL} and {@link ValueWidth#BITS_32}.
1232
*
1233
* @param bytes a buffer containing the binary data of a CodePointTrie
1234
* @return the trie
1235
* @stable ICU 63
1236
*/
1237
public static Small32 fromBinary(ByteBuffer bytes) {
1238
return (Small32) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_32, bytes);
1239
}
1240
}
1241
1242
/**
1243
* A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_8}.
1244
*
1245
* @stable ICU 63
1246
*/
1247
public static final class Small8 extends Small {
1248
Small8(char[] index, byte[] data8, int highStart,
1249
int index3NullOffset, int dataNullOffset) {
1250
super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset);
1251
}
1252
1253
/**
1254
* Creates a trie from its binary form.
1255
* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}
1256
* with {@link Type#SMALL} and {@link ValueWidth#BITS_8}.
1257
*
1258
* @param bytes a buffer containing the binary data of a CodePointTrie
1259
* @return the trie
1260
* @stable ICU 63
1261
*/
1262
public static Small8 fromBinary(ByteBuffer bytes) {
1263
return (Small8) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_8, bytes);
1264
}
1265
}
1266
}
1267
1268