Path: blob/master/src/java.base/share/classes/java/text/MergeCollation.java
41152 views
/*1* Copyright (c) 1996, 2019, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425/*26* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved27* (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved28*29* The original version of this source code and documentation is copyrighted30* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These31* materials are provided under terms of a License Agreement between Taligent32* and Sun. This technology is protected by multiple US and International33* patents. This notice and attribution to Taligent may not be removed.34* Taligent is a registered trademark of Taligent, Inc.35*36*/3738package java.text;3940import java.util.ArrayList;4142/**43* Utility class for normalizing and merging patterns for collation.44* Patterns are strings of the form <entry>*, where <entry> has the45* form:46* <pattern> := <entry>*47* <entry> := <separator><chars>{"/"<extension>}48* <separator> := "=", ",", ";", "<", "&"49* <chars>, and <extension> are both arbitrary strings.50* unquoted whitespaces are ignored.51* 'xxx' can be used to quote characters52* One difference from Collator is that & is used to reset to a current53* point. Or, in other words, it introduces a new sequence which is to54* be added to the old.55* That is: "a < b < c < d" is the same as "a < b & b < c & c < d" OR56* "a < b < d & b < c"57* XXX: make '' be a single quote.58* @see PatternEntry59* @author Mark Davis, Helena Shih60*/6162final class MergeCollation {6364/**65* Creates from a pattern66* @throws ParseException If the input pattern is incorrect.67*/68public MergeCollation(String pattern) throws ParseException69{70for (int i = 0; i < statusArray.length; i++)71statusArray[i] = 0;72setPattern(pattern);73}7475/**76* recovers current pattern77*/78public String getPattern() {79return getPattern(true);80}8182/**83* recovers current pattern.84* @param withWhiteSpace puts spacing around the entries, and \n85* before & and <86*/87public String getPattern(boolean withWhiteSpace) {88StringBuffer result = new StringBuffer();89PatternEntry tmp = null;90ArrayList<PatternEntry> extList = null;91int i;92for (i = 0; i < patterns.size(); ++i) {93PatternEntry entry = patterns.get(i);94if (!entry.extension.isEmpty()) {95if (extList == null)96extList = new ArrayList<>();97extList.add(entry);98} else {99if (extList != null) {100PatternEntry last = findLastWithNoExtension(i-1);101for (int j = extList.size() - 1; j >= 0 ; j--) {102tmp = extList.get(j);103tmp.addToBuffer(result, false, withWhiteSpace, last);104}105extList = null;106}107entry.addToBuffer(result, false, withWhiteSpace, null);108}109}110if (extList != null) {111PatternEntry last = findLastWithNoExtension(i-1);112for (int j = extList.size() - 1; j >= 0 ; j--) {113tmp = extList.get(j);114tmp.addToBuffer(result, false, withWhiteSpace, last);115}116extList = null;117}118return result.toString();119}120121private final PatternEntry findLastWithNoExtension(int i) {122for (--i;i >= 0; --i) {123PatternEntry entry = patterns.get(i);124if (entry.extension.isEmpty()) {125return entry;126}127}128return null;129}130131/**132* emits the pattern for collation builder.133* @return emits the string in the format understable to the collation134* builder.135*/136public String emitPattern() {137return emitPattern(true);138}139140/**141* emits the pattern for collation builder.142* @param withWhiteSpace puts spacing around the entries, and \n143* before & and <144* @return emits the string in the format understable to the collation145* builder.146*/147public String emitPattern(boolean withWhiteSpace) {148StringBuffer result = new StringBuffer();149for (int i = 0; i < patterns.size(); ++i)150{151PatternEntry entry = patterns.get(i);152if (entry != null) {153entry.addToBuffer(result, true, withWhiteSpace, null);154}155}156return result.toString();157}158159/**160* sets the pattern.161*/162public void setPattern(String pattern) throws ParseException163{164patterns.clear();165addPattern(pattern);166}167168/**169* adds a pattern to the current one.170* @param pattern the new pattern to be added171*/172public void addPattern(String pattern) throws ParseException173{174if (pattern == null)175return;176177PatternEntry.Parser parser = new PatternEntry.Parser(pattern);178179PatternEntry entry = parser.next();180while (entry != null) {181fixEntry(entry);182entry = parser.next();183}184}185186/**187* gets count of separate entries188* @return the size of pattern entries189*/190public int getCount() {191return patterns.size();192}193194/**195* gets count of separate entries196* @param index the offset of the desired pattern entry197* @return the requested pattern entry198*/199public PatternEntry getItemAt(int index) {200return patterns.get(index);201}202203//============================================================204// privates205//============================================================206ArrayList<PatternEntry> patterns = new ArrayList<>(); // a list of PatternEntries207208private transient PatternEntry saveEntry = null;209private transient PatternEntry lastEntry = null;210211// This is really used as a local variable inside fixEntry, but we cache212// it here to avoid newing it up every time the method is called.213private transient StringBuffer excess = new StringBuffer();214215//216// When building a MergeCollation, we need to do lots of searches to see217// whether a given entry is already in the table. Since we're using an218// array, this would make the algorithm O(N*N). To speed things up, we219// use this bit array to remember whether the array contains any entries220// starting with each Unicode character. If not, we can avoid the search.221// Using BitSet would make this easier, but it's significantly slower.222//223private transient byte[] statusArray = new byte[8192];224private final byte BITARRAYMASK = (byte)0x1;225private final int BYTEPOWER = 3;226private final int BYTEMASK = (1 << BYTEPOWER) - 1;227228/*229If the strength is RESET, then just change the lastEntry to230be the current. (If the current is not in patterns, signal an error).231If not, then remove the current entry, and add it after lastEntry232(which is usually at the end).233*/234private final void fixEntry(PatternEntry newEntry) throws ParseException235{236// check to see whether the new entry has the same characters as the previous237// entry did (this can happen when a pattern declaring a difference between two238// strings that are canonically equivalent is normalized). If so, and the strength239// is anything other than IDENTICAL or RESET, throw an exception (you can't240// declare a string to be unequal to itself). --rtg 5/24/99241if (lastEntry != null && newEntry.chars.equals(lastEntry.chars)242&& newEntry.extension.equals(lastEntry.extension)) {243if (newEntry.strength != Collator.IDENTICAL244&& newEntry.strength != PatternEntry.RESET) {245throw new ParseException("The entries " + lastEntry + " and "246+ newEntry + " are adjacent in the rules, but have conflicting "247+ "strengths: A character can't be unequal to itself.", -1);248} else {249// otherwise, just skip this entry and behave as though you never saw it250return;251}252}253254boolean changeLastEntry = true;255if (newEntry.strength != PatternEntry.RESET) {256int oldIndex = -1;257258if ((newEntry.chars.length() == 1)) {259260char c = newEntry.chars.charAt(0);261int statusIndex = c >> BYTEPOWER;262byte bitClump = statusArray[statusIndex];263byte setBit = (byte)(BITARRAYMASK << (c & BYTEMASK));264265if (bitClump != 0 && (bitClump & setBit) != 0) {266oldIndex = patterns.lastIndexOf(newEntry);267} else {268// We're going to add an element that starts with this269// character, so go ahead and set its bit.270statusArray[statusIndex] = (byte)(bitClump | setBit);271}272} else {273oldIndex = patterns.lastIndexOf(newEntry);274}275if (oldIndex != -1) {276patterns.remove(oldIndex);277}278279excess.setLength(0);280int lastIndex = findLastEntry(lastEntry, excess);281282if (excess.length() != 0) {283newEntry.extension = excess + newEntry.extension;284if (lastIndex != patterns.size()) {285lastEntry = saveEntry;286changeLastEntry = false;287}288}289if (lastIndex == patterns.size()) {290patterns.add(newEntry);291saveEntry = newEntry;292} else {293patterns.add(lastIndex, newEntry);294}295}296if (changeLastEntry) {297lastEntry = newEntry;298}299}300301private final int findLastEntry(PatternEntry entry,302StringBuffer excessChars) throws ParseException303{304if (entry == null)305return 0;306307if (entry.strength != PatternEntry.RESET) {308// Search backwards for string that contains this one;309// most likely entry is last one310311int oldIndex = -1;312if ((entry.chars.length() == 1)) {313int index = entry.chars.charAt(0) >> BYTEPOWER;314if ((statusArray[index] &315(BITARRAYMASK << (entry.chars.charAt(0) & BYTEMASK))) != 0) {316oldIndex = patterns.lastIndexOf(entry);317}318} else {319oldIndex = patterns.lastIndexOf(entry);320}321if ((oldIndex == -1))322throw new ParseException("couldn't find last entry: "323+ entry, oldIndex);324return oldIndex + 1;325} else {326int i;327for (i = patterns.size() - 1; i >= 0; --i) {328PatternEntry e = patterns.get(i);329if (e.chars.regionMatches(0,entry.chars,0,330e.chars.length())) {331excessChars.append(entry.chars, e.chars.length(),332entry.chars.length());333break;334}335}336if (i == -1)337throw new ParseException("couldn't find: " + entry, i);338return i + 1;339}340}341}342343344