Path: blob/master/src/java.base/share/classes/java/text/Collator.java
41152 views
/*1* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425/*26* (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved27* (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved28*29* The original version of this source code and documentation is copyrighted30* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These31* materials are provided under terms of a License Agreement between Taligent32* and Sun. This technology is protected by multiple US and International33* patents. This notice and attribution to Taligent may not be removed.34* Taligent is a registered trademark of Taligent, Inc.35*36*/3738package java.text;3940import java.lang.ref.SoftReference;41import java.text.spi.CollatorProvider;42import java.util.Locale;43import java.util.ResourceBundle;44import java.util.concurrent.ConcurrentHashMap;45import java.util.concurrent.ConcurrentMap;46import sun.util.locale.provider.LocaleProviderAdapter;47import sun.util.locale.provider.LocaleServiceProviderPool;484950/**51* The {@code Collator} class performs locale-sensitive52* {@code String} comparison. You use this class to build53* searching and sorting routines for natural language text.54*55* <p>56* {@code Collator} is an abstract base class. Subclasses57* implement specific collation strategies. One subclass,58* {@code RuleBasedCollator}, is currently provided with59* the Java Platform and is applicable to a wide set of languages. Other60* subclasses may be created to handle more specialized needs.61*62* <p>63* Like other locale-sensitive classes, you can use the static64* factory method, {@code getInstance}, to obtain the appropriate65* {@code Collator} object for a given locale. You will only need66* to look at the subclasses of {@code Collator} if you need67* to understand the details of a particular collation strategy or68* if you need to modify that strategy.69*70* <p>71* The following example shows how to compare two strings using72* the {@code Collator} for the default locale.73* <blockquote>74* <pre>{@code75* // Compare two strings in the default locale76* Collator myCollator = Collator.getInstance();77* if( myCollator.compare("abc", "ABC") < 0 )78* System.out.println("abc is less than ABC");79* else80* System.out.println("abc is greater than or equal to ABC");81* }</pre>82* </blockquote>83*84* <p>85* You can set a {@code Collator}'s <em>strength</em> property86* to determine the level of difference considered significant in87* comparisons. Four strengths are provided: {@code PRIMARY},88* {@code SECONDARY}, {@code TERTIARY}, and {@code IDENTICAL}.89* The exact assignment of strengths to language features is90* locale dependent. For example, in Czech, "e" and "f" are considered91* primary differences, while "e" and "ě" are secondary differences,92* "e" and "E" are tertiary differences and "e" and "e" are identical.93* The following shows how both case and accents could be ignored for94* US English.95* <blockquote>96* <pre>97* //Get the Collator for US English and set its strength to PRIMARY98* Collator usCollator = Collator.getInstance(Locale.US);99* usCollator.setStrength(Collator.PRIMARY);100* if( usCollator.compare("abc", "ABC") == 0 ) {101* System.out.println("Strings are equivalent");102* }103* </pre>104* </blockquote>105* <p>106* For comparing {@code String}s exactly once, the {@code compare}107* method provides the best performance. When sorting a list of108* {@code String}s however, it is generally necessary to compare each109* {@code String} multiple times. In this case, {@code CollationKey}s110* provide better performance. The {@code CollationKey} class converts111* a {@code String} to a series of bits that can be compared bitwise112* against other {@code CollationKey}s. A {@code CollationKey} is113* created by a {@code Collator} object for a given {@code String}.114* <br>115* <strong>Note:</strong> {@code CollationKey}s from different116* {@code Collator}s can not be compared. See the class description117* for {@link CollationKey}118* for an example using {@code CollationKey}s.119*120* @see RuleBasedCollator121* @see CollationKey122* @see CollationElementIterator123* @see Locale124* @author Helena Shih, Laura Werner, Richard Gillam125* @since 1.1126*/127128public abstract class Collator129implements java.util.Comparator<Object>, Cloneable130{131/**132* Collator strength value. When set, only PRIMARY differences are133* considered significant during comparison. The assignment of strengths134* to language features is locale dependent. A common example is for135* different base letters ("a" vs "b") to be considered a PRIMARY difference.136* @see java.text.Collator#setStrength137* @see java.text.Collator#getStrength138*/139public static final int PRIMARY = 0;140/**141* Collator strength value. When set, only SECONDARY and above differences are142* considered significant during comparison. The assignment of strengths143* to language features is locale dependent. A common example is for144* different accented forms of the same base letter ("a" vs "\u00E4") to be145* considered a SECONDARY difference.146* @see java.text.Collator#setStrength147* @see java.text.Collator#getStrength148*/149public static final int SECONDARY = 1;150/**151* Collator strength value. When set, only TERTIARY and above differences are152* considered significant during comparison. The assignment of strengths153* to language features is locale dependent. A common example is for154* case differences ("a" vs "A") to be considered a TERTIARY difference.155* @see java.text.Collator#setStrength156* @see java.text.Collator#getStrength157*/158public static final int TERTIARY = 2;159160/**161* Collator strength value. When set, all differences are162* considered significant during comparison. The assignment of strengths163* to language features is locale dependent. A common example is for control164* characters ("\u0001" vs "\u0002") to be considered equal at the165* PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL166* level. Additionally, differences between pre-composed accents such as167* "\u00C0" (A-grave) and combining accents such as "A\u0300"168* (A, combining-grave) will be considered significant at the IDENTICAL169* level if decomposition is set to NO_DECOMPOSITION.170*/171public static final int IDENTICAL = 3;172173/**174* Decomposition mode value. With NO_DECOMPOSITION175* set, accented characters will not be decomposed for collation. This176* is the default setting and provides the fastest collation but177* will only produce correct results for languages that do not use accents.178* @see java.text.Collator#getDecomposition179* @see java.text.Collator#setDecomposition180*/181public static final int NO_DECOMPOSITION = 0;182183/**184* Decomposition mode value. With CANONICAL_DECOMPOSITION185* set, characters that are canonical variants according to Unicode186* standard will be decomposed for collation. This should be used to get187* correct collation of accented characters.188* <p>189* CANONICAL_DECOMPOSITION corresponds to Normalization Form D as190* described in191* <a href="http://www.unicode.org/reports/tr15/">Unicode192* Standard Annex #15: Unicode Normalization Forms</a>.193* @see java.text.Collator#getDecomposition194* @see java.text.Collator#setDecomposition195*/196public static final int CANONICAL_DECOMPOSITION = 1;197198/**199* Decomposition mode value. With FULL_DECOMPOSITION200* set, both Unicode canonical variants and Unicode compatibility variants201* will be decomposed for collation. This causes not only accented202* characters to be collated, but also characters that have special formats203* to be collated with their norminal form. For example, the half-width and204* full-width ASCII and Katakana characters are then collated together.205* FULL_DECOMPOSITION is the most complete and therefore the slowest206* decomposition mode.207* <p>208* FULL_DECOMPOSITION corresponds to Normalization Form KD as209* described in210* <a href="http://www.unicode.org/reports/tr15/">Unicode211* Standard Annex #15: Unicode Normalization Forms</a>.212* @see java.text.Collator#getDecomposition213* @see java.text.Collator#setDecomposition214*/215public static final int FULL_DECOMPOSITION = 2;216217/**218* Gets the Collator for the current default locale.219* The default locale is determined by java.util.Locale.getDefault.220* @return the Collator for the default locale.(for example, en_US)221* @see java.util.Locale#getDefault222*/223public static synchronized Collator getInstance() {224return getInstance(Locale.getDefault());225}226227/**228* Gets the Collator for the desired locale.229* @param desiredLocale the desired locale.230* @return the Collator for the desired locale.231* @see java.util.Locale232* @see java.util.ResourceBundle233*/234public static Collator getInstance(Locale desiredLocale) {235SoftReference<Collator> ref = cache.get(desiredLocale);236Collator result = (ref != null) ? ref.get() : null;237if (result == null) {238LocaleProviderAdapter adapter;239adapter = LocaleProviderAdapter.getAdapter(CollatorProvider.class,240desiredLocale);241CollatorProvider provider = adapter.getCollatorProvider();242result = provider.getInstance(desiredLocale);243if (result == null) {244result = LocaleProviderAdapter.forJRE()245.getCollatorProvider().getInstance(desiredLocale);246}247while (true) {248if (ref != null) {249// Remove the empty SoftReference if any250cache.remove(desiredLocale, ref);251}252ref = cache.putIfAbsent(desiredLocale, new SoftReference<>(result));253if (ref == null) {254break;255}256Collator cachedColl = ref.get();257if (cachedColl != null) {258result = cachedColl;259break;260}261}262}263return (Collator) result.clone(); // make the world safe264}265266/**267* Compares the source string to the target string according to the268* collation rules for this Collator. Returns an integer less than,269* equal to or greater than zero depending on whether the source String is270* less than, equal to or greater than the target string. See the Collator271* class description for an example of use.272* <p>273* For a one time comparison, this method has the best performance. If a274* given String will be involved in multiple comparisons, CollationKey.compareTo275* has the best performance. See the Collator class description for an example276* using CollationKeys.277* @param source the source string.278* @param target the target string.279* @return Returns an integer value. Value is less than zero if source is less than280* target, value is zero if source and target are equal, value is greater than zero281* if source is greater than target.282* @see java.text.CollationKey283* @see java.text.Collator#getCollationKey284*/285public abstract int compare(String source, String target);286287/**288* Compares its two arguments for order. Returns a negative integer,289* zero, or a positive integer as the first argument is less than, equal290* to, or greater than the second.291* <p>292* This implementation merely returns293* {@code compare((String)o1, (String)o2) }.294*295* @return a negative integer, zero, or a positive integer as the296* first argument is less than, equal to, or greater than the297* second.298* @throws ClassCastException the arguments cannot be cast to Strings.299* @see java.util.Comparator300* @since 1.2301*/302@Override303public int compare(Object o1, Object o2) {304return compare((String)o1, (String)o2);305}306307/**308* Transforms the String into a series of bits that can be compared bitwise309* to other CollationKeys. CollationKeys provide better performance than310* Collator.compare when Strings are involved in multiple comparisons.311* See the Collator class description for an example using CollationKeys.312* @param source the string to be transformed into a collation key.313* @return the CollationKey for the given String based on this Collator's collation314* rules. If the source String is null, a null CollationKey is returned.315* @see java.text.CollationKey316* @see java.text.Collator#compare317*/318public abstract CollationKey getCollationKey(String source);319320/**321* Convenience method for comparing the equality of two strings based on322* this Collator's collation rules.323* @param source the source string to be compared with.324* @param target the target string to be compared with.325* @return true if the strings are equal according to the collation326* rules. false, otherwise.327* @see java.text.Collator#compare328*/329public boolean equals(String source, String target)330{331return (compare(source, target) == Collator.EQUAL);332}333334/**335* Returns this Collator's strength property. The strength property determines336* the minimum level of difference considered significant during comparison.337* See the Collator class description for an example of use.338* @return this Collator's current strength property.339* @see java.text.Collator#setStrength340* @see java.text.Collator#PRIMARY341* @see java.text.Collator#SECONDARY342* @see java.text.Collator#TERTIARY343* @see java.text.Collator#IDENTICAL344*/345public synchronized int getStrength()346{347return strength;348}349350/**351* Sets this Collator's strength property. The strength property determines352* the minimum level of difference considered significant during comparison.353* See the Collator class description for an example of use.354* @param newStrength the new strength value.355* @see java.text.Collator#getStrength356* @see java.text.Collator#PRIMARY357* @see java.text.Collator#SECONDARY358* @see java.text.Collator#TERTIARY359* @see java.text.Collator#IDENTICAL360* @throws IllegalArgumentException If the new strength value is not one of361* PRIMARY, SECONDARY, TERTIARY or IDENTICAL.362*/363public synchronized void setStrength(int newStrength) {364if ((newStrength != PRIMARY) &&365(newStrength != SECONDARY) &&366(newStrength != TERTIARY) &&367(newStrength != IDENTICAL)) {368throw new IllegalArgumentException("Incorrect comparison level.");369}370strength = newStrength;371}372373/**374* Get the decomposition mode of this Collator. Decomposition mode375* determines how Unicode composed characters are handled. Adjusting376* decomposition mode allows the user to select between faster and more377* complete collation behavior.378* <p>The three values for decomposition mode are:379* <UL>380* <LI>NO_DECOMPOSITION,381* <LI>CANONICAL_DECOMPOSITION382* <LI>FULL_DECOMPOSITION.383* </UL>384* See the documentation for these three constants for a description385* of their meaning.386* @return the decomposition mode387* @see java.text.Collator#setDecomposition388* @see java.text.Collator#NO_DECOMPOSITION389* @see java.text.Collator#CANONICAL_DECOMPOSITION390* @see java.text.Collator#FULL_DECOMPOSITION391*/392public synchronized int getDecomposition()393{394return decmp;395}396/**397* Set the decomposition mode of this Collator. See getDecomposition398* for a description of decomposition mode.399* @param decompositionMode the new decomposition mode.400* @see java.text.Collator#getDecomposition401* @see java.text.Collator#NO_DECOMPOSITION402* @see java.text.Collator#CANONICAL_DECOMPOSITION403* @see java.text.Collator#FULL_DECOMPOSITION404* @throws IllegalArgumentException If the given value is not a valid decomposition405* mode.406*/407public synchronized void setDecomposition(int decompositionMode) {408if ((decompositionMode != NO_DECOMPOSITION) &&409(decompositionMode != CANONICAL_DECOMPOSITION) &&410(decompositionMode != FULL_DECOMPOSITION)) {411throw new IllegalArgumentException("Wrong decomposition mode.");412}413decmp = decompositionMode;414}415416/**417* Returns an array of all locales for which the418* {@code getInstance} methods of this class can return419* localized instances.420* The returned array represents the union of locales supported421* by the Java runtime and by installed422* {@link java.text.spi.CollatorProvider CollatorProvider} implementations.423* It must contain at least a Locale instance equal to424* {@link java.util.Locale#US Locale.US}.425*426* @return An array of locales for which localized427* {@code Collator} instances are available.428*/429public static synchronized Locale[] getAvailableLocales() {430LocaleServiceProviderPool pool =431LocaleServiceProviderPool.getPool(CollatorProvider.class);432return pool.getAvailableLocales();433}434435/**436* Overrides Cloneable437*/438@Override439public Object clone()440{441try {442return (Collator)super.clone();443} catch (CloneNotSupportedException e) {444throw new InternalError(e);445}446}447448/**449* Compares the equality of two Collators.450* @param that the Collator to be compared with this.451* @return true if this Collator is the same as that Collator;452* false otherwise.453*/454@Override455public boolean equals(Object that)456{457if (this == that) {458return true;459}460if (that == null) {461return false;462}463if (getClass() != that.getClass()) {464return false;465}466Collator other = (Collator) that;467return ((strength == other.strength) &&468(decmp == other.decmp));469}470471/**472* Generates the hash code for this Collator.473*/474@Override475public abstract int hashCode();476477/**478* Default constructor. This constructor is479* protected so subclasses can get access to it. Users typically create480* a Collator sub-class by calling the factory method getInstance.481* @see java.text.Collator#getInstance482*/483protected Collator()484{485strength = TERTIARY;486decmp = CANONICAL_DECOMPOSITION;487}488489private int strength = 0;490private int decmp = 0;491private static final ConcurrentMap<Locale, SoftReference<Collator>> cache492= new ConcurrentHashMap<>();493494//495// FIXME: These three constants should be removed.496//497/**498* LESS is returned if source string is compared to be less than target499* string in the compare() method.500* @see java.text.Collator#compare501*/502static final int LESS = -1;503/**504* EQUAL is returned if source string is compared to be equal to target505* string in the compare() method.506* @see java.text.Collator#compare507*/508static final int EQUAL = 0;509/**510* GREATER is returned if source string is compared to be greater than511* target string in the compare() method.512* @see java.text.Collator#compare513*/514static final int GREATER = 1;515}516517518