Path: blob/master/src/java.base/share/classes/java/text/RBCollationTables.java
41152 views
/*1* Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425/*26* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved27* (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved28*29* The original version of this source code and documentation is copyrighted30* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These31* materials are provided under terms of a License Agreement between Taligent32* and Sun. This technology is protected by multiple US and International33* patents. This notice and attribution to Taligent may not be removed.34* Taligent is a registered trademark of Taligent, Inc.35*36*/3738package java.text;3940import java.util.Vector;41import sun.text.UCompactIntArray;42import sun.text.IntHashtable;4344/**45* This class contains the static state of a RuleBasedCollator: The various46* tables that are used by the collation routines. Several RuleBasedCollators47* can share a single RBCollationTables object, easing memory requirements and48* improving performance.49*/50final class RBCollationTables {51//===========================================================================================52// The following diagram shows the data structure of the RBCollationTables object.53// Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.54// "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".55// What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and56// sorts 'o-umlaut' as if it's always expanded with 'e'.57//58// mapping table contracting list expanding list59// (contains all unicode char60// entries) ___ ____________ _________________________61// ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')|62// |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------|63// |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : |64// |____:___| | |_:_| |------------| | |-------------------------|65// |____:___| | |'cH'|v('cH')| | | : |66// |__'a'___|-> v('a') | |------------| | |-------------------------|67// |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : |68// |____:___| | |------------| | |-------------------------|69// |____:___| | |'CH'|v('CH')| | | : |70// |___'c'__|---------------- ------------ | |-------------------------|71// |____:___| | | : |72// |o-umlaut|---------------------------------------- |_________________________|73// |____:___|74//75// Noted by Helena Shih on 6/23/9776//============================================================================================7778public RBCollationTables(String rules, int decmp) throws ParseException {79this.rules = rules;8081RBTableBuilder builder = new RBTableBuilder(new BuildAPI());82builder.build(rules, decmp); // this object is filled in through83// the BuildAPI object84}8586final class BuildAPI {87/**88* Private constructor. Prevents anyone else besides RBTableBuilder89* from gaining direct access to the internals of this class.90*/91private BuildAPI() {92}9394/**95* This function is used by RBTableBuilder to fill in all the members of this96* object. (Effectively, the builder class functions as a "friend" of this97* class, but to avoid changing too much of the logic, it carries around "shadow"98* copies of all these variables until the end of the build process and then99* copies them en masse into the actual tables object once all the construction100* logic is complete. This function does that "copying en masse".101* @param f2ary The value for frenchSec (the French-secondary flag)102* @param swap The value for SE Asian swapping rule103* @param map The collator's character-mapping table (the value for mapping)104* @param cTbl The collator's contracting-character table (the value for contractTable)105* @param eTbl The collator's expanding-character table (the value for expandTable)106* @param cFlgs The hash table of characters that participate in contracting-107* character sequences (the value for contractFlags)108* @param mso The value for maxSecOrder109* @param mto The value for maxTerOrder110*/111void fillInTables(boolean f2ary,112boolean swap,113UCompactIntArray map,114Vector<Vector<EntryPair>> cTbl,115Vector<int[]> eTbl,116IntHashtable cFlgs,117short mso,118short mto) {119frenchSec = f2ary;120seAsianSwapping = swap;121mapping = map;122contractTable = cTbl;123expandTable = eTbl;124contractFlags = cFlgs;125maxSecOrder = mso;126maxTerOrder = mto;127}128}129130/**131* Gets the table-based rules for the collation object.132* @return returns the collation rules that the table collation object133* was created from.134*/135public String getRules()136{137return rules;138}139140public boolean isFrenchSec() {141return frenchSec;142}143144public boolean isSEAsianSwapping() {145return seAsianSwapping;146}147148// ==============================================================149// internal (for use by CollationElementIterator)150// ==============================================================151152/**153* Get the entry of hash table of the contracting string in the collation154* table.155* @param ch the starting character of the contracting string156*/157Vector<EntryPair> getContractValues(int ch)158{159int index = mapping.elementAt(ch);160return getContractValuesImpl(index - CONTRACTCHARINDEX);161}162163//get contract values from contractTable by index164private Vector<EntryPair> getContractValuesImpl(int index)165{166if (index >= 0)167{168return contractTable.elementAt(index);169}170else // not found171{172return null;173}174}175176/**177* Returns true if this character appears anywhere in a contracting178* character sequence. (Used by CollationElementIterator.setOffset().)179*/180boolean usedInContractSeq(int c) {181return contractFlags.get(c) == 1;182}183184/**185* Return the maximum length of any expansion sequences that end186* with the specified comparison order.187*188* @param order a collation order returned by previous or next.189* @return the maximum length of any expansion seuences ending190* with the specified order.191*192* @see CollationElementIterator#getMaxExpansion193*/194int getMaxExpansion(int order) {195int result = 1;196197if (expandTable != null) {198// Right now this does a linear search through the entire199// expansion table. If a collator had a large number of expansions,200// this could cause a performance problem, but in practise that201// rarely happens202for (int i = 0; i < expandTable.size(); i++) {203int[] valueList = expandTable.elementAt(i);204int length = valueList.length;205206if (length > result && valueList[length-1] == order) {207result = length;208}209}210}211212return result;213}214215/**216* Get the entry of hash table of the expanding string in the collation217* table.218* @param idx the index of the expanding string value list219*/220final int[] getExpandValueList(int idx) {221return expandTable.elementAt(idx - EXPANDCHARINDEX);222}223224/**225* Get the comarison order of a character from the collation table.226* @return the comparison order of a character.227*/228int getUnicodeOrder(int ch) {229return mapping.elementAt(ch);230}231232short getMaxSecOrder() {233return maxSecOrder;234}235236short getMaxTerOrder() {237return maxTerOrder;238}239240/**241* Reverse a string.242*/243//shemran/Note: this is used for secondary order value reverse, no244// need to consider supplementary pair.245static void reverse (StringBuffer result, int from, int to)246{247int i = from;248char swap;249250int j = to - 1;251while (i < j) {252swap = result.charAt(i);253result.setCharAt(i, result.charAt(j));254result.setCharAt(j, swap);255i++;256j--;257}258}259260static final int getEntry(Vector<EntryPair> list, String name, boolean fwd) {261for (int i = 0; i < list.size(); i++) {262EntryPair pair = list.elementAt(i);263if (pair.fwd == fwd && pair.entryName.equals(name)) {264return i;265}266}267return UNMAPPED;268}269270// ==============================================================271// constants272// ==============================================================273//sherman/Todo: is the value big enough?????274static final int EXPANDCHARINDEX = 0x7E000000; // Expand index follows275static final int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow276static final int UNMAPPED = 0xFFFFFFFF;277278static final int PRIMARYORDERMASK = 0xffff0000;279static final int SECONDARYORDERMASK = 0x0000ff00;280static final int TERTIARYORDERMASK = 0x000000ff;281static final int PRIMARYDIFFERENCEONLY = 0xffff0000;282static final int SECONDARYDIFFERENCEONLY = 0xffffff00;283static final int PRIMARYORDERSHIFT = 16;284static final int SECONDARYORDERSHIFT = 8;285286// ==============================================================287// instance variables288// ==============================================================289private String rules = null;290private boolean frenchSec = false;291private boolean seAsianSwapping = false;292293private UCompactIntArray mapping = null;294private Vector<Vector<EntryPair>> contractTable = null;295private Vector<int[]> expandTable = null;296private IntHashtable contractFlags = null;297298private short maxSecOrder = 0;299private short maxTerOrder = 0;300}301302303