Path: blob/master/src/java.base/share/classes/jdk/internal/icu/impl/CharTrie.java
41161 views
/*1* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425/*26******************************************************************************27* Copyright (C) 1996-2014, International Business Machines Corporation and28* others. All Rights Reserved.29******************************************************************************30*/3132package jdk.internal.icu.impl;3334import jdk.internal.icu.text.UTF16;3536import java.io.DataInputStream;37import java.io.InputStream;38import java.io.IOException;3940/**41* Trie implementation which stores data in char, 16 bits.42* @author synwee43* @see com.ibm.icu.impl.Trie44* @since release 2.1, Jan 01 200245*/4647// note that i need to handle the block calculations later, since chartrie48// in icu4c uses the same index array.49public class CharTrie extends Trie50{51// public constructors ---------------------------------------------5253/**54* <p>Creates a new Trie with the settings for the trie data.</p>55* <p>Unserialize the 32-bit-aligned input stream and use the data for the56* trie.</p>57* @param inputStream file input stream to a ICU data file, containing58* the trie59* @param dataManipulate object which provides methods to parse the char60* data61* @throws IOException thrown when data reading fails62* @draft 2.163*/64public CharTrie(InputStream inputStream,65DataManipulate dataManipulate) throws IOException66{67super(inputStream, dataManipulate);6869if (!isCharTrie()) {70throw new IllegalArgumentException(71"Data given does not belong to a char trie.");72}73}7475// public methods --------------------------------------------------7677/**78* Gets the value associated with the codepoint.79* If no value is associated with the codepoint, a default value will be80* returned.81* @param ch codepoint82* @return offset to data83*/84public final char getCodePointValue(int ch)85{86int offset;8788// fastpath for U+0000..U+D7FF89if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {90// copy of getRawOffset()91offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)92+ (ch & INDEX_STAGE_3_MASK_);93return m_data_[offset];94}9596// handle U+D800..U+10FFFF97offset = getCodePointOffset(ch);9899// return -1 if there is an error, in this case we return the default100// value: m_initialValue_101return (offset >= 0) ? m_data_[offset] : m_initialValue_;102}103104/**105* Gets the value to the data which this lead surrogate character points106* to.107* Returned data may contain folding offset information for the next108* trailing surrogate character.109* This method does not guarantee correct results for trail surrogates.110* @param ch lead surrogate character111* @return data value112*/113public final char getLeadValue(char ch)114{115return m_data_[getLeadOffset(ch)];116}117118// protected methods -----------------------------------------------119120/**121* <p>Parses the input stream and stores its trie content into a index and122* data array</p>123* @param inputStream data input stream containing trie data124* @exception IOException thrown when data reading fails125*/126protected final void unserialize(InputStream inputStream)127throws IOException128{129DataInputStream input = new DataInputStream(inputStream);130int indexDataLength = m_dataOffset_ + m_dataLength_;131m_index_ = new char[indexDataLength];132for (int i = 0; i < indexDataLength; i ++) {133m_index_[i] = input.readChar();134}135m_data_ = m_index_;136m_initialValue_ = m_data_[m_dataOffset_];137}138139/**140* Gets the offset to the data which the surrogate pair points to.141* @param lead lead surrogate142* @param trail trailing surrogate143* @return offset to data144* @draft 2.1145*/146protected final int getSurrogateOffset(char lead, char trail)147{148if (m_dataManipulate_ == null) {149throw new NullPointerException(150"The field DataManipulate in this Trie is null");151}152153// get fold position for the next trail surrogate154int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));155156// get the real data from the folded lead/trail units157if (offset > 0) {158return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));159}160161// return -1 if there is an error, in this case we return the default162// value: m_initialValue_163return -1;164}165166// private data members --------------------------------------------167168/**169* Default value170*/171private char m_initialValue_;172/**173* Array of char data174*/175private char m_data_[];176}177178179