Path: blob/master/src/java.base/share/classes/jdk/internal/icu/util/CodePointTrie.java
41161 views
/*1* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/24// (c) 2018 and later: Unicode, Inc. and others.25// License & terms of use: http://www.unicode.org/copyright.html#License2627// created: 2018may04 Markus W. Scherer2829package jdk.internal.icu.util;3031import jdk.internal.icu.impl.ICUBinary;3233import java.io.DataOutputStream;34import java.io.IOException;35import java.io.UncheckedIOException;36import java.io.OutputStream;37import java.nio.ByteBuffer;38import java.nio.ByteOrder;3940import static jdk.internal.icu.impl.NormalizerImpl.UTF16Plus;4142/**43* Immutable Unicode code point trie.44* Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.45* For details see http://site.icu-project.org/design/struct/utrie46*47* <p>This class is not intended for public subclassing.48*49* @see MutableCodePointTrie50* @stable ICU 6351*/52@SuppressWarnings("deprecation")53public abstract class CodePointTrie extends CodePointMap {54/**55* Selectors for the type of a CodePointTrie.56* Different trade-offs for size vs. speed.57*58* <p>Use null for {@link #fromBinary} to accept any type;59* {@link #getType} will return the actual type.60*61* @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth)62* @see #fromBinary63* @see #getType64* @stable ICU 6365*/66public enum Type {67/**68* Fast/simple/larger BMP data structure.69* The {@link Fast} subclasses have additional functions for lookup for BMP and supplementary code points.70*71* @see Fast72* @stable ICU 6373*/74FAST,75/**76* Small/slower BMP data structure.77*78* @see Small79* @stable ICU 6380*/81SMALL82}8384/**85* Selectors for the number of bits in a CodePointTrie data value.86*87* <p>Use null for {@link #fromBinary} to accept any data value width;88* {@link #getValueWidth} will return the actual data value width.89*90* @stable ICU 6391*/92public enum ValueWidth {93/**94* The trie stores 16 bits per data value.95* It returns them as unsigned values 0..0xffff=65535.96*97* @stable ICU 6398*/99BITS_16,100/**101* The trie stores 32 bits per data value.102*103* @stable ICU 63104*/105BITS_32,106/**107* The trie stores 8 bits per data value.108* It returns them as unsigned values 0..0xff=255.109*110* @stable ICU 63111*/112BITS_8113}114115private CodePointTrie(char[] index, Data data, int highStart,116int index3NullOffset, int dataNullOffset) {117this.ascii = new int[ASCII_LIMIT];118this.index = index;119this.data = data;120this.dataLength = data.getDataLength();121this.highStart = highStart;122this.index3NullOffset = index3NullOffset;123this.dataNullOffset = dataNullOffset;124125for (int c = 0; c < ASCII_LIMIT; ++c) {126ascii[c] = data.getFromIndex(c);127}128129int nullValueOffset = dataNullOffset;130if (nullValueOffset >= dataLength) {131nullValueOffset = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;132}133nullValue = data.getFromIndex(nullValueOffset);134}135136/**137* Creates a trie from its binary form,138* stored in the ByteBuffer starting at the current position.139* Advances the buffer position to just after the trie data.140* Inverse of {@link #toBinary(OutputStream)}.141*142* <p>The data is copied from the buffer;143* later modification of the buffer will not affect the trie.144*145* @param type selects the trie type; this method throws an exception146* if the type does not match the binary data;147* use null to accept any type148* @param valueWidth selects the number of bits in a data value; this method throws an exception149* if the valueWidth does not match the binary data;150* use null to accept any data value width151* @param bytes a buffer containing the binary data of a CodePointTrie152* @return the trie153* @see MutableCodePointTrie#MutableCodePointTrie(int, int)154* @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth)155* @see #toBinary(OutputStream)156* @stable ICU 63157*/158public static CodePointTrie fromBinary(Type type, ValueWidth valueWidth, ByteBuffer bytes) {159ByteOrder outerByteOrder = bytes.order();160try {161// Enough data for a trie header?162if (bytes.remaining() < 16 /* sizeof(UCPTrieHeader) */) {163throw new InternalError("Buffer too short for a CodePointTrie header");164}165166// struct UCPTrieHeader167/** "Tri3" in big-endian US-ASCII (0x54726933) */168int signature = bytes.getInt();169170// Check the signature.171switch (signature) {172case 0x54726933:173// The buffer is already set to the trie data byte order.174break;175case 0x33697254:176// Temporarily reverse the byte order.177boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN;178bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);179signature = 0x54726933;180break;181default:182throw new InternalError("Buffer does not contain a serialized CodePointTrie");183}184185// struct UCPTrieHeader continued186/**187* Options bit field:188* Bits 15..12: Data length bits 19..16.189* Bits 11..8: Data null block offset bits 19..16.190* Bits 7..6: UCPTrieType191* Bits 5..3: Reserved (0).192* Bits 2..0: UCPTrieValueWidth193*/194int options = bytes.getChar();195196/** Total length of the index tables. */197int indexLength = bytes.getChar();198199/** Data length bits 15..0. */200int dataLength = bytes.getChar();201202/** Index-3 null block offset, 0x7fff or 0xffff if none. */203int index3NullOffset = bytes.getChar();204205/** Data null block offset bits 15..0, 0xfffff if none. */206int dataNullOffset = bytes.getChar();207208/**209* First code point of the single-value range ending with U+10ffff,210* rounded up and then shifted right by SHIFT_2.211*/212int shiftedHighStart = bytes.getChar();213// struct UCPTrieHeader end214215int typeInt = (options >> 6) & 3;216Type actualType;217switch (typeInt) {218case 0: actualType = Type.FAST; break;219case 1: actualType = Type.SMALL; break;220default:221throw new InternalError("CodePointTrie data header has an unsupported type");222}223224int valueWidthInt = options & OPTIONS_VALUE_BITS_MASK;225ValueWidth actualValueWidth;226switch (valueWidthInt) {227case 0: actualValueWidth = ValueWidth.BITS_16; break;228case 1: actualValueWidth = ValueWidth.BITS_32; break;229case 2: actualValueWidth = ValueWidth.BITS_8; break;230default:231throw new InternalError("CodePointTrie data header has an unsupported value width");232}233234if ((options & OPTIONS_RESERVED_MASK) != 0) {235throw new InternalError("CodePointTrie data header has unsupported options");236}237238if (type == null) {239type = actualType;240}241if (valueWidth == null) {242valueWidth = actualValueWidth;243}244if (type != actualType || valueWidth != actualValueWidth) {245throw new InternalError("CodePointTrie data header has a different type or value width than required");246}247248// Get the length values and offsets.249dataLength |= ((options & OPTIONS_DATA_LENGTH_MASK) << 4);250dataNullOffset |= ((options & OPTIONS_DATA_NULL_OFFSET_MASK) << 8);251252int highStart = shiftedHighStart << SHIFT_2;253254// Calculate the actual length, minus the header.255int actualLength = indexLength * 2;256if (valueWidth == ValueWidth.BITS_16) {257actualLength += dataLength * 2;258} else if (valueWidth == ValueWidth.BITS_32) {259actualLength += dataLength * 4;260} else {261actualLength += dataLength;262}263if (bytes.remaining() < actualLength) {264throw new InternalError("Buffer too short for the CodePointTrie data");265}266267char[] index = ICUBinary.getChars(bytes, indexLength, 0);268switch (valueWidth) {269case BITS_16: {270char[] data16 = ICUBinary.getChars(bytes, dataLength, 0);271return type == Type.FAST ?272new Fast16(index, data16, highStart, index3NullOffset, dataNullOffset) :273new Small16(index, data16, highStart, index3NullOffset, dataNullOffset);274}275case BITS_32: {276int[] data32 = ICUBinary.getInts(bytes, dataLength, 0);277return type == Type.FAST ?278new Fast32(index, data32, highStart, index3NullOffset, dataNullOffset) :279new Small32(index, data32, highStart, index3NullOffset, dataNullOffset);280}281case BITS_8: {282byte[] data8 = ICUBinary.getBytes(bytes, dataLength, 0);283return type == Type.FAST ?284new Fast8(index, data8, highStart, index3NullOffset, dataNullOffset) :285new Small8(index, data8, highStart, index3NullOffset, dataNullOffset);286}287default:288throw new AssertionError("should be unreachable");289}290} finally {291bytes.order(outerByteOrder);292}293}294295/**296* Returns the trie type.297*298* @return the trie type299* @stable ICU 63300*/301public abstract Type getType();302/**303* Returns the number of bits in a trie data value.304*305* @return the number of bits in a trie data value306* @stable ICU 63307*/308public final ValueWidth getValueWidth() { return data.getValueWidth(); }309310/**311* {@inheritDoc}312* @stable ICU 63313*/314@Override315public int get(int c) {316return data.getFromIndex(cpIndex(c));317}318319/**320* Returns a trie value for an ASCII code point, without range checking.321*322* @param c the input code point; must be U+0000..U+007F323* @return The ASCII code point's trie value.324* @stable ICU 63325*/326public final int asciiGet(int c) {327return ascii[c];328}329330private static final int MAX_UNICODE = 0x10ffff;331332private static final int ASCII_LIMIT = 0x80;333334private static final int maybeFilterValue(int value, int trieNullValue, int nullValue,335ValueFilter filter) {336if (value == trieNullValue) {337value = nullValue;338} else if (filter != null) {339value = filter.apply(value);340}341return value;342}343344/**345* {@inheritDoc}346* @stable ICU 63347*/348@Override349public final boolean getRange(int start, ValueFilter filter, Range range) {350if (start < 0 || MAX_UNICODE < start) {351return false;352}353if (start >= highStart) {354int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;355int value = data.getFromIndex(di);356if (filter != null) { value = filter.apply(value); }357range.set(start, MAX_UNICODE, value);358return true;359}360361int nullValue = this.nullValue;362if (filter != null) { nullValue = filter.apply(nullValue); }363Type type = getType();364365int prevI3Block = -1;366int prevBlock = -1;367int c = start;368// Initialize to make compiler happy. Real value when haveValue is true.369int trieValue = 0, value = 0;370boolean haveValue = false;371do {372int i3Block;373int i3;374int i3BlockLength;375int dataBlockLength;376if (c <= 0xffff && (type == Type.FAST || c <= SMALL_MAX)) {377i3Block = 0;378i3 = c >> FAST_SHIFT;379i3BlockLength = type == Type.FAST ? BMP_INDEX_LENGTH : SMALL_INDEX_LENGTH;380dataBlockLength = FAST_DATA_BLOCK_LENGTH;381} else {382// Use the multi-stage index.383int i1 = c >> SHIFT_1;384if (type == Type.FAST) {385assert(0xffff < c && c < highStart);386i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;387} else {388assert(c < highStart && highStart > SMALL_LIMIT);389i1 += SMALL_INDEX_LENGTH;390}391i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)];392if (i3Block == prevI3Block && (c - start) >= CP_PER_INDEX_2_ENTRY) {393// The index-3 block is the same as the previous one, and filled with value.394assert((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0);395c += CP_PER_INDEX_2_ENTRY;396continue;397}398prevI3Block = i3Block;399if (i3Block == index3NullOffset) {400// This is the index-3 null block.401if (haveValue) {402if (nullValue != value) {403range.set(start, c - 1, value);404return true;405}406} else {407trieValue = this.nullValue;408value = nullValue;409haveValue = true;410}411prevBlock = dataNullOffset;412c = (c + CP_PER_INDEX_2_ENTRY) & ~(CP_PER_INDEX_2_ENTRY - 1);413continue;414}415i3 = (c >> SHIFT_3) & INDEX_3_MASK;416i3BlockLength = INDEX_3_BLOCK_LENGTH;417dataBlockLength = SMALL_DATA_BLOCK_LENGTH;418}419// Enumerate data blocks for one index-3 block.420do {421int block;422if ((i3Block & 0x8000) == 0) {423block = index[i3Block + i3];424} else {425// 18-bit indexes stored in groups of 9 entries per 8 indexes.426int group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);427int gi = i3 & 7;428block = (index[group++] << (2 + (2 * gi))) & 0x30000;429block |= index[group + gi];430}431if (block == prevBlock && (c - start) >= dataBlockLength) {432// The block is the same as the previous one, and filled with value.433assert((c & (dataBlockLength - 1)) == 0);434c += dataBlockLength;435} else {436int dataMask = dataBlockLength - 1;437prevBlock = block;438if (block == dataNullOffset) {439// This is the data null block.440if (haveValue) {441if (nullValue != value) {442range.set(start, c - 1, value);443return true;444}445} else {446trieValue = this.nullValue;447value = nullValue;448haveValue = true;449}450c = (c + dataBlockLength) & ~dataMask;451} else {452int di = block + (c & dataMask);453int trieValue2 = data.getFromIndex(di);454if (haveValue) {455if (trieValue2 != trieValue) {456if (filter == null ||457maybeFilterValue(trieValue2, this.nullValue, nullValue,458filter) != value) {459range.set(start, c - 1, value);460return true;461}462trieValue = trieValue2; // may or may not help463}464} else {465trieValue = trieValue2;466value = maybeFilterValue(trieValue2, this.nullValue, nullValue, filter);467haveValue = true;468}469while ((++c & dataMask) != 0) {470trieValue2 = data.getFromIndex(++di);471if (trieValue2 != trieValue) {472if (filter == null ||473maybeFilterValue(trieValue2, this.nullValue, nullValue,474filter) != value) {475range.set(start, c - 1, value);476return true;477}478trieValue = trieValue2; // may or may not help479}480}481}482}483} while (++i3 < i3BlockLength);484} while (c < highStart);485assert(haveValue);486int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET;487int highValue = data.getFromIndex(di);488if (maybeFilterValue(highValue, this.nullValue, nullValue, filter) != value) {489--c;490} else {491c = MAX_UNICODE;492}493range.set(start, c, value);494return true;495}496497/**498* Writes a representation of the trie to the output stream.499* Inverse of {@link #fromBinary}.500*501* @param os the output stream502* @return the number of bytes written503* @stable ICU 63504*/505public final int toBinary(OutputStream os) {506try {507DataOutputStream dos = new DataOutputStream(os);508509// Write the UCPTrieHeader510dos.writeInt(0x54726933); // signature="Tri3"511dos.writeChar( // options512((dataLength & 0xf0000) >> 4) |513((dataNullOffset & 0xf0000) >> 8) |514(getType().ordinal() << 6) |515getValueWidth().ordinal());516dos.writeChar(index.length);517dos.writeChar(dataLength);518dos.writeChar(index3NullOffset);519dos.writeChar(dataNullOffset);520dos.writeChar(highStart >> SHIFT_2); // shiftedHighStart521int length = 16; // sizeof(UCPTrieHeader)522523for (char i : index) { dos.writeChar(i); }524length += index.length * 2;525length += data.write(dos);526return length;527} catch (IOException e) {528throw new UncheckedIOException(e);529}530}531532/** @internal */533static final int FAST_SHIFT = 6;534535/** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */536static final int FAST_DATA_BLOCK_LENGTH = 1 << FAST_SHIFT;537538/** Mask for getting the lower bits for the in-fast-data-block offset. @internal */539private static final int FAST_DATA_MASK = FAST_DATA_BLOCK_LENGTH - 1;540541/** @internal */542private static final int SMALL_MAX = 0xfff;543544/**545* Offset from dataLength (to be subtracted) for fetching the546* value returned for out-of-range code points and ill-formed UTF-8/16.547* @internal548*/549private static final int ERROR_VALUE_NEG_DATA_OFFSET = 1;550/**551* Offset from dataLength (to be subtracted) for fetching the552* value returned for code points highStart..U+10FFFF.553* @internal554*/555private static final int HIGH_VALUE_NEG_DATA_OFFSET = 2;556557// ucptrie_impl.h558559/** The length of the BMP index table. 1024=0x400 */560private static final int BMP_INDEX_LENGTH = 0x10000 >> FAST_SHIFT;561562static final int SMALL_LIMIT = 0x1000;563private static final int SMALL_INDEX_LENGTH = SMALL_LIMIT >> FAST_SHIFT;564565/** Shift size for getting the index-3 table offset. */566static final int SHIFT_3 = 4;567568/** Shift size for getting the index-2 table offset. */569private static final int SHIFT_2 = 5 + SHIFT_3;570571/** Shift size for getting the index-1 table offset. */572private static final int SHIFT_1 = 5 + SHIFT_2;573574/**575* Difference between two shift sizes,576* for getting an index-2 offset from an index-3 offset. 5=9-4577*/578static final int SHIFT_2_3 = SHIFT_2 - SHIFT_3;579580/**581* Difference between two shift sizes,582* for getting an index-1 offset from an index-2 offset. 5=14-9583*/584static final int SHIFT_1_2 = SHIFT_1 - SHIFT_2;585586/**587* Number of index-1 entries for the BMP. (4)588* This part of the index-1 table is omitted from the serialized form.589*/590private static final int OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1;591592/** Number of entries in an index-2 block. 32=0x20 */593static final int INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2;594595/** Mask for getting the lower bits for the in-index-2-block offset. */596static final int INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1;597598/** Number of code points per index-2 table entry. 512=0x200 */599static final int CP_PER_INDEX_2_ENTRY = 1 << SHIFT_2;600601/** Number of entries in an index-3 block. 32=0x20 */602static final int INDEX_3_BLOCK_LENGTH = 1 << SHIFT_2_3;603604/** Mask for getting the lower bits for the in-index-3-block offset. */605private static final int INDEX_3_MASK = INDEX_3_BLOCK_LENGTH - 1;606607/** Number of entries in a small data block. 16=0x10 */608static final int SMALL_DATA_BLOCK_LENGTH = 1 << SHIFT_3;609610/** Mask for getting the lower bits for the in-small-data-block offset. */611static final int SMALL_DATA_MASK = SMALL_DATA_BLOCK_LENGTH - 1;612613// ucptrie_impl.h: Constants for use with UCPTrieHeader.options.614private static final int OPTIONS_DATA_LENGTH_MASK = 0xf000;615private static final int OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00;616private static final int OPTIONS_RESERVED_MASK = 0x38;617private static final int OPTIONS_VALUE_BITS_MASK = 7;618/**619* Value for index3NullOffset which indicates that there is no index-3 null block.620* Bit 15 is unused for this value because this bit is used if the index-3 contains621* 18-bit indexes.622*/623static final int NO_INDEX3_NULL_OFFSET = 0x7fff;624static final int NO_DATA_NULL_OFFSET = 0xfffff;625626private static abstract class Data {627abstract ValueWidth getValueWidth();628abstract int getDataLength();629abstract int getFromIndex(int index);630abstract int write(DataOutputStream dos) throws IOException;631}632633private static final class Data16 extends Data {634char[] array;635Data16(char[] a) { array = a; }636@Override ValueWidth getValueWidth() { return ValueWidth.BITS_16; }637@Override int getDataLength() { return array.length; }638@Override int getFromIndex(int index) { return array[index]; }639@Override int write(DataOutputStream dos) throws IOException {640for (char v : array) { dos.writeChar(v); }641return array.length * 2;642}643}644645private static final class Data32 extends Data {646int[] array;647Data32(int[] a) { array = a; }648@Override ValueWidth getValueWidth() { return ValueWidth.BITS_32; }649@Override int getDataLength() { return array.length; }650@Override int getFromIndex(int index) { return array[index]; }651@Override int write(DataOutputStream dos) throws IOException {652for (int v : array) { dos.writeInt(v); }653return array.length * 4;654}655}656657private static final class Data8 extends Data {658byte[] array;659Data8(byte[] a) { array = a; }660@Override ValueWidth getValueWidth() { return ValueWidth.BITS_8; }661@Override int getDataLength() { return array.length; }662@Override int getFromIndex(int index) { return array[index] & 0xff; }663@Override int write(DataOutputStream dos) throws IOException {664for (byte v : array) { dos.writeByte(v); }665return array.length;666}667}668669/** @internal */670private final int[] ascii;671672/** @internal */673private final char[] index;674675/**676* @internal677* @deprecated This API is ICU internal only.678*/679@Deprecated680protected final Data data;681/**682* @internal683* @deprecated This API is ICU internal only.684*/685@Deprecated686protected final int dataLength;687/**688* Start of the last range which ends at U+10FFFF.689* @internal690* @deprecated This API is ICU internal only.691*/692@Deprecated693protected final int highStart;694695/**696* Internal index-3 null block offset.697* Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.698* @internal699*/700private final int index3NullOffset;701/**702* Internal data null block offset, not shifted.703* Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.704* @internal705*/706private final int dataNullOffset;707/** @internal */708private final int nullValue;709710/**711* @internal712* @deprecated This API is ICU internal only.713*/714@Deprecated715protected final int fastIndex(int c) {716return index[c >> FAST_SHIFT] + (c & FAST_DATA_MASK);717}718719/**720* @internal721* @deprecated This API is ICU internal only.722*/723@Deprecated724protected final int smallIndex(Type type, int c) {725// Split into two methods to make this part inline-friendly.726// In C, this part is a macro.727if (c >= highStart) {728return dataLength - HIGH_VALUE_NEG_DATA_OFFSET;729}730return internalSmallIndex(type, c);731}732733private final int internalSmallIndex(Type type, int c) {734int i1 = c >> SHIFT_1;735if (type == Type.FAST) {736assert(0xffff < c && c < highStart);737i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH;738} else {739assert(0 <= c && c < highStart && highStart > SMALL_LIMIT);740i1 += SMALL_INDEX_LENGTH;741}742int i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)];743int i3 = (c >> SHIFT_3) & INDEX_3_MASK;744int dataBlock;745if ((i3Block & 0x8000) == 0) {746// 16-bit indexes747dataBlock = index[i3Block + i3];748} else {749// 18-bit indexes stored in groups of 9 entries per 8 indexes.750i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);751i3 &= 7;752dataBlock = (index[i3Block++] << (2 + (2 * i3))) & 0x30000;753dataBlock |= index[i3Block + i3];754}755return dataBlock + (c & SMALL_DATA_MASK);756}757758/**759* @internal760* @deprecated This API is ICU internal only.761*/762@Deprecated763protected abstract int cpIndex(int c);764765/**766* A CodePointTrie with {@link Type#FAST}.767*768* @stable ICU 63769*/770public static abstract class Fast extends CodePointTrie {771private Fast(char[] index, Data data, int highStart,772int index3NullOffset, int dataNullOffset) {773super(index, data, highStart, index3NullOffset, dataNullOffset);774}775776/**777* Creates a trie from its binary form.778* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}779* with {@link Type#FAST}.780*781* @param valueWidth selects the number of bits in a data value; this method throws an exception782* if the valueWidth does not match the binary data;783* use null to accept any data value width784* @param bytes a buffer containing the binary data of a CodePointTrie785* @return the trie786* @stable ICU 63787*/788public static Fast fromBinary(ValueWidth valueWidth, ByteBuffer bytes) {789return (Fast) CodePointTrie.fromBinary(Type.FAST, valueWidth, bytes);790}791792/**793* @return {@link Type#FAST}794* @stable ICU 63795*/796@Override797public final Type getType() { return Type.FAST; }798799/**800* Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.801* Can be used to look up a value for a UTF-16 code unit if other parts of802* the string processing check for surrogates.803*804* @param c the input code point, must be U+0000..U+FFFF805* @return The BMP code point's trie value.806* @stable ICU 63807*/808public abstract int bmpGet(int c);809810/**811* Returns a trie value for a supplementary code point (U+10000..U+10FFFF),812* without range checking.813*814* @param c the input code point, must be U+10000..U+10FFFF815* @return The supplementary code point's trie value.816* @stable ICU 63817*/818public abstract int suppGet(int c);819820/**821* @internal822* @deprecated This API is ICU internal only.823*/824@Deprecated825@Override826protected final int cpIndex(int c) {827if (c >= 0) {828if (c <= 0xffff) {829return fastIndex(c);830} else if (c <= 0x10ffff) {831return smallIndex(Type.FAST, c);832}833}834return dataLength - ERROR_VALUE_NEG_DATA_OFFSET;835}836837/**838* {@inheritDoc}839* @stable ICU 63840*/841@Override842public final StringIterator stringIterator(CharSequence s, int sIndex) {843return new FastStringIterator(s, sIndex);844}845846private final class FastStringIterator extends StringIterator {847private FastStringIterator(CharSequence s, int sIndex) {848super(s, sIndex);849}850851@Override852public boolean next() {853if (sIndex >= s.length()) {854return false;855}856char lead = s.charAt(sIndex++);857c = lead;858int dataIndex;859if (!Character.isSurrogate(lead)) {860dataIndex = fastIndex(c);861} else {862char trail;863if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() &&864Character.isLowSurrogate(trail = s.charAt(sIndex))) {865++sIndex;866c = Character.toCodePoint(lead, trail);867dataIndex = smallIndex(Type.FAST, c);868} else {869dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;870}871}872value = data.getFromIndex(dataIndex);873return true;874}875876@Override877public boolean previous() {878if (sIndex <= 0) {879return false;880}881char trail = s.charAt(--sIndex);882c = trail;883int dataIndex;884if (!Character.isSurrogate(trail)) {885dataIndex = fastIndex(c);886} else {887char lead;888if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 &&889Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) {890--sIndex;891c = Character.toCodePoint(lead, trail);892dataIndex = smallIndex(Type.FAST, c);893} else {894dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;895}896}897value = data.getFromIndex(dataIndex);898return true;899}900}901}902903/**904* A CodePointTrie with {@link Type#SMALL}.905*906* @stable ICU 63907*/908public static abstract class Small extends CodePointTrie {909private Small(char[] index, Data data, int highStart,910int index3NullOffset, int dataNullOffset) {911super(index, data, highStart, index3NullOffset, dataNullOffset);912}913914/**915* Creates a trie from its binary form.916* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}917* with {@link Type#SMALL}.918*919* @param valueWidth selects the number of bits in a data value; this method throws an exception920* if the valueWidth does not match the binary data;921* use null to accept any data value width922* @param bytes a buffer containing the binary data of a CodePointTrie923* @return the trie924* @stable ICU 63925*/926public static Small fromBinary(ValueWidth valueWidth, ByteBuffer bytes) {927return (Small) CodePointTrie.fromBinary(Type.SMALL, valueWidth, bytes);928}929930/**931* @return {@link Type#SMALL}932* @stable ICU 63933*/934@Override935public final Type getType() { return Type.SMALL; }936937/**938* @internal939* @deprecated This API is ICU internal only.940*/941@Deprecated942@Override943protected final int cpIndex(int c) {944if (c >= 0) {945if (c <= SMALL_MAX) {946return fastIndex(c);947} else if (c <= 0x10ffff) {948return smallIndex(Type.SMALL, c);949}950}951return dataLength - ERROR_VALUE_NEG_DATA_OFFSET;952}953954/**955* {@inheritDoc}956* @stable ICU 63957*/958@Override959public final StringIterator stringIterator(CharSequence s, int sIndex) {960return new SmallStringIterator(s, sIndex);961}962963private final class SmallStringIterator extends StringIterator {964private SmallStringIterator(CharSequence s, int sIndex) {965super(s, sIndex);966}967968@Override969public boolean next() {970if (sIndex >= s.length()) {971return false;972}973char lead = s.charAt(sIndex++);974c = lead;975int dataIndex;976if (!Character.isSurrogate(lead)) {977dataIndex = cpIndex(c);978} else {979char trail;980if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() &&981Character.isLowSurrogate(trail = s.charAt(sIndex))) {982++sIndex;983c = Character.toCodePoint(lead, trail);984dataIndex = smallIndex(Type.SMALL, c);985} else {986dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;987}988}989value = data.getFromIndex(dataIndex);990return true;991}992993@Override994public boolean previous() {995if (sIndex <= 0) {996return false;997}998char trail = s.charAt(--sIndex);999c = trail;1000int dataIndex;1001if (!Character.isSurrogate(trail)) {1002dataIndex = cpIndex(c);1003} else {1004char lead;1005if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 &&1006Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) {1007--sIndex;1008c = Character.toCodePoint(lead, trail);1009dataIndex = smallIndex(Type.SMALL, c);1010} else {1011dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET;1012}1013}1014value = data.getFromIndex(dataIndex);1015return true;1016}1017}1018}10191020/**1021* A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_16}.1022*1023* @stable ICU 631024*/1025public static final class Fast16 extends Fast {1026private final char[] dataArray;10271028Fast16(char[] index, char[] data16, int highStart,1029int index3NullOffset, int dataNullOffset) {1030super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset);1031this.dataArray = data16;1032}10331034/**1035* Creates a trie from its binary form.1036* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}1037* with {@link Type#FAST} and {@link ValueWidth#BITS_16}.1038*1039* @param bytes a buffer containing the binary data of a CodePointTrie1040* @return the trie1041* @stable ICU 631042*/1043public static Fast16 fromBinary(ByteBuffer bytes) {1044return (Fast16) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_16, bytes);1045}10461047/**1048* {@inheritDoc}1049* @stable ICU 631050*/1051@Override1052public final int get(int c) {1053return dataArray[cpIndex(c)];1054}10551056/**1057* {@inheritDoc}1058* @stable ICU 631059*/1060@Override1061public final int bmpGet(int c) {1062assert 0 <= c && c <= 0xffff;1063return dataArray[fastIndex(c)];1064}10651066/**1067* {@inheritDoc}1068* @stable ICU 631069*/1070@Override1071public final int suppGet(int c) {1072assert 0x10000 <= c && c <= 0x10ffff;1073return dataArray[smallIndex(Type.FAST, c)];1074}1075}10761077/**1078* A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_32}.1079*1080* @stable ICU 631081*/1082public static final class Fast32 extends Fast {1083private final int[] dataArray;10841085Fast32(char[] index, int[] data32, int highStart,1086int index3NullOffset, int dataNullOffset) {1087super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset);1088this.dataArray = data32;1089}10901091/**1092* Creates a trie from its binary form.1093* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}1094* with {@link Type#FAST} and {@link ValueWidth#BITS_32}.1095*1096* @param bytes a buffer containing the binary data of a CodePointTrie1097* @return the trie1098* @stable ICU 631099*/1100public static Fast32 fromBinary(ByteBuffer bytes) {1101return (Fast32) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_32, bytes);1102}11031104/**1105* {@inheritDoc}1106* @stable ICU 631107*/1108@Override1109public final int get(int c) {1110return dataArray[cpIndex(c)];1111}11121113/**1114* {@inheritDoc}1115* @stable ICU 631116*/1117@Override1118public final int bmpGet(int c) {1119assert 0 <= c && c <= 0xffff;1120return dataArray[fastIndex(c)];1121}11221123/**1124* {@inheritDoc}1125* @stable ICU 631126*/1127@Override1128public final int suppGet(int c) {1129assert 0x10000 <= c && c <= 0x10ffff;1130return dataArray[smallIndex(Type.FAST, c)];1131}1132}11331134/**1135* A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_8}.1136*1137* @stable ICU 631138*/1139public static final class Fast8 extends Fast {1140private final byte[] dataArray;11411142Fast8(char[] index, byte[] data8, int highStart,1143int index3NullOffset, int dataNullOffset) {1144super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset);1145this.dataArray = data8;1146}11471148/**1149* Creates a trie from its binary form.1150* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}1151* with {@link Type#FAST} and {@link ValueWidth#BITS_8}.1152*1153* @param bytes a buffer containing the binary data of a CodePointTrie1154* @return the trie1155* @stable ICU 631156*/1157public static Fast8 fromBinary(ByteBuffer bytes) {1158return (Fast8) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_8, bytes);1159}11601161/**1162* {@inheritDoc}1163* @stable ICU 631164*/1165@Override1166public final int get(int c) {1167return dataArray[cpIndex(c)] & 0xff;1168}11691170/**1171* {@inheritDoc}1172* @stable ICU 631173*/1174@Override1175public final int bmpGet(int c) {1176assert 0 <= c && c <= 0xffff;1177return dataArray[fastIndex(c)] & 0xff;1178}11791180/**1181* {@inheritDoc}1182* @stable ICU 631183*/1184@Override1185public final int suppGet(int c) {1186assert 0x10000 <= c && c <= 0x10ffff;1187return dataArray[smallIndex(Type.FAST, c)] & 0xff;1188}1189}11901191/**1192* A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_16}.1193*1194* @stable ICU 631195*/1196public static final class Small16 extends Small {1197Small16(char[] index, char[] data16, int highStart,1198int index3NullOffset, int dataNullOffset) {1199super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset);1200}12011202/**1203* Creates a trie from its binary form.1204* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}1205* with {@link Type#SMALL} and {@link ValueWidth#BITS_16}.1206*1207* @param bytes a buffer containing the binary data of a CodePointTrie1208* @return the trie1209* @stable ICU 631210*/1211public static Small16 fromBinary(ByteBuffer bytes) {1212return (Small16) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_16, bytes);1213}1214}12151216/**1217* A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_32}.1218*1219* @stable ICU 631220*/1221public static final class Small32 extends Small {1222Small32(char[] index, int[] data32, int highStart,1223int index3NullOffset, int dataNullOffset) {1224super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset);1225}12261227/**1228* Creates a trie from its binary form.1229* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}1230* with {@link Type#SMALL} and {@link ValueWidth#BITS_32}.1231*1232* @param bytes a buffer containing the binary data of a CodePointTrie1233* @return the trie1234* @stable ICU 631235*/1236public static Small32 fromBinary(ByteBuffer bytes) {1237return (Small32) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_32, bytes);1238}1239}12401241/**1242* A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_8}.1243*1244* @stable ICU 631245*/1246public static final class Small8 extends Small {1247Small8(char[] index, byte[] data8, int highStart,1248int index3NullOffset, int dataNullOffset) {1249super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset);1250}12511252/**1253* Creates a trie from its binary form.1254* Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)}1255* with {@link Type#SMALL} and {@link ValueWidth#BITS_8}.1256*1257* @param bytes a buffer containing the binary data of a CodePointTrie1258* @return the trie1259* @stable ICU 631260*/1261public static Small8 fromBinary(ByteBuffer bytes) {1262return (Small8) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_8, bytes);1263}1264}1265}126612671268