Path: blob/master/src/java.base/share/classes/sun/util/locale/provider/CollationRules.java
41161 views
/*1* Copyright (c) 1996, 2012, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425/*26* (C) Copyright Taligent, Inc. 1996,1997 - All Rights Reserved27* (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved28*29* The original version of this source code and documentation is copyrighted30* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These31* materials are provided under terms of a License Agreement between Taligent32* and Sun. This technology is protected by multiple US and International33* patents. This notice and attribution to Taligent may not be removed.34* Taligent is a registered trademark of Taligent, Inc.35*36*/3738package sun.util.locale.provider;39/**40* CollationRules contains the default en_US collation rules as a base41* for building other collation tables.42* <p>Note that decompositions are done before these rules are used,43* so they do not have to contain accented characters, such as A-grave.44* @see RuleBasedCollator45* @see LocaleElements46* @author Helena Shih, Mark Davis47*/48final class CollationRules {49static final String DEFAULTRULES =50"" // no FRENCH accent order by default, add in French Delta51// IGNORABLES (up to first < character)52// COMPLETELY IGNORE format characters53+ "='\u200B'=\u200C=\u200D=\u200E=\u200F"54// Control Characters55+ "=\u0000 =\u0001 =\u0002 =\u0003 =\u0004" //null, .. eot56+ "=\u0005 =\u0006 =\u0007 =\u0008 ='\u0009'" //enq, ...57+ "='\u000b' =\u000e" //vt,, so58+ "=\u000f ='\u0010' =\u0011 =\u0012 =\u0013" //si, dle, dc1, dc2, dc359+ "=\u0014 =\u0015 =\u0016 =\u0017 =\u0018" //dc4, nak, syn, etb, can60+ "=\u0019 =\u001a =\u001b =\u001c =\u001d" //em, sub, esc, fs, gs61+ "=\u001e =\u001f =\u007f" //rs, us, del62//....then the C1 Latin 1 reserved control codes63+ "=\u0080 =\u0081 =\u0082 =\u0083 =\u0084 =\u0085"64+ "=\u0086 =\u0087 =\u0088 =\u0089 =\u008a =\u008b"65+ "=\u008c =\u008d =\u008e =\u008f =\u0090 =\u0091"66+ "=\u0092 =\u0093 =\u0094 =\u0095 =\u0096 =\u0097"67+ "=\u0098 =\u0099 =\u009a =\u009b =\u009c =\u009d"68+ "=\u009e =\u009f"69// IGNORE except for secondary, tertiary difference70// Spaces71+ ";'\u0020';'\u00A0'" // spaces72+ ";'\u2000';'\u2001';'\u2002';'\u2003';'\u2004'" // spaces73+ ";'\u2005';'\u2006';'\u2007';'\u2008';'\u2009'" // spaces74+ ";'\u200A';'\u3000';'\uFEFF'" // spaces75+ ";'\r' ;'\t' ;'\n';'\f';'\u000b'" // whitespace7677// Non-spacing accents7879+ ";\u0301" // non-spacing acute accent80+ ";\u0300" // non-spacing grave accent81+ ";\u0306" // non-spacing breve accent82+ ";\u0302" // non-spacing circumflex accent83+ ";\u030c" // non-spacing caron/hacek accent84+ ";\u030a" // non-spacing ring above accent85+ ";\u030d" // non-spacing vertical line above86+ ";\u0308" // non-spacing diaeresis accent87+ ";\u030b" // non-spacing double acute accent88+ ";\u0303" // non-spacing tilde accent89+ ";\u0307" // non-spacing dot above/overdot accent90+ ";\u0304" // non-spacing macron accent91+ ";\u0337" // non-spacing short slash overlay (overstruck diacritic)92+ ";\u0327" // non-spacing cedilla accent93+ ";\u0328" // non-spacing ogonek accent94+ ";\u0323" // non-spacing dot-below/underdot accent95+ ";\u0332" // non-spacing underscore/underline accent96// with the rest of the general diacritical marks in binary order97+ ";\u0305" // non-spacing overscore/overline98+ ";\u0309" // non-spacing hook above99+ ";\u030e" // non-spacing double vertical line above100+ ";\u030f" // non-spacing double grave101+ ";\u0310" // non-spacing chandrabindu102+ ";\u0311" // non-spacing inverted breve103+ ";\u0312" // non-spacing turned comma above/cedilla above104+ ";\u0313" // non-spacing comma above105+ ";\u0314" // non-spacing reversed comma above106+ ";\u0315" // non-spacing comma above right107+ ";\u0316" // non-spacing grave below108+ ";\u0317" // non-spacing acute below109+ ";\u0318" // non-spacing left tack below110+ ";\u0319" // non-spacing tack below111+ ";\u031a" // non-spacing left angle above112+ ";\u031b" // non-spacing horn113+ ";\u031c" // non-spacing left half ring below114+ ";\u031d" // non-spacing up tack below115+ ";\u031e" // non-spacing down tack below116+ ";\u031f" // non-spacing plus sign below117+ ";\u0320" // non-spacing minus sign below118+ ";\u0321" // non-spacing palatalized hook below119+ ";\u0322" // non-spacing retroflex hook below120+ ";\u0324" // non-spacing double dot below121+ ";\u0325" // non-spacing ring below122+ ";\u0326" // non-spacing comma below123+ ";\u0329" // non-spacing vertical line below124+ ";\u032a" // non-spacing bridge below125+ ";\u032b" // non-spacing inverted double arch below126+ ";\u032c" // non-spacing hacek below127+ ";\u032d" // non-spacing circumflex below128+ ";\u032e" // non-spacing breve below129+ ";\u032f" // non-spacing inverted breve below130+ ";\u0330" // non-spacing tilde below131+ ";\u0331" // non-spacing macron below132+ ";\u0333" // non-spacing double underscore133+ ";\u0334" // non-spacing tilde overlay134+ ";\u0335" // non-spacing short bar overlay135+ ";\u0336" // non-spacing long bar overlay136+ ";\u0338" // non-spacing long slash overlay137+ ";\u0339" // non-spacing right half ring below138+ ";\u033a" // non-spacing inverted bridge below139+ ";\u033b" // non-spacing square below140+ ";\u033c" // non-spacing seagull below141+ ";\u033d" // non-spacing x above142+ ";\u033e" // non-spacing vertical tilde143+ ";\u033f" // non-spacing double overscore144//+ ";\u0340" // non-spacing grave tone mark == \u0300145//+ ";\u0341" // non-spacing acute tone mark == \u0301146+ ";\u0342;"147//+ "\u0343;" // == \u0313148+ "\u0344;\u0345;\u0360;\u0361" // newer149+ ";\u0483;\u0484;\u0485;\u0486" // Cyrillic accents150151+ ";\u20D0;\u20D1;\u20D2" // symbol accents152+ ";\u20D3;\u20D4;\u20D5" // symbol accents153+ ";\u20D6;\u20D7;\u20D8" // symbol accents154+ ";\u20D9;\u20DA;\u20DB" // symbol accents155+ ";\u20DC;\u20DD;\u20DE" // symbol accents156+ ";\u20DF;\u20E0;\u20E1" // symbol accents157158+ ",'\u002D';\u00AD" // dashes159+ ";\u2010;\u2011;\u2012" // dashes160+ ";\u2013;\u2014;\u2015" // dashes161+ ";\u2212" // dashes162163// other punctuation164165+ "<'\u005f'" // underline/underscore (spacing)166+ "<\u00af" // overline or macron (spacing)167+ "<'\u002c'" // comma (spacing)168+ "<'\u003b'" // semicolon169+ "<'\u003a'" // colon170+ "<'\u0021'" // exclamation point171+ "<\u00a1" // inverted exclamation point172+ "<'\u003f'" // question mark173+ "<\u00bf" // inverted question mark174+ "<'\u002f'" // slash175+ "<'\u002e'" // period/full stop176+ "<\u00b4" // acute accent (spacing)177+ "<'\u0060'" // grave accent (spacing)178+ "<'\u005e'" // circumflex accent (spacing)179+ "<\u00a8" // diaresis/umlaut accent (spacing)180+ "<'\u007e'" // tilde accent (spacing)181+ "<\u00b7" // middle dot (spacing)182+ "<\u00b8" // cedilla accent (spacing)183+ "<'\u0027'" // apostrophe184+ "<'\"'" // quotation marks185+ "<\u00ab" // left angle quotes186+ "<\u00bb" // right angle quotes187+ "<'\u0028'" // left parenthesis188+ "<'\u0029'" // right parenthesis189+ "<'\u005b'" // left bracket190+ "<'\u005d'" // right bracket191+ "<'\u007b'" // left brace192+ "<'\u007d'" // right brace193+ "<\u00a7" // section symbol194+ "<\u00b6" // paragraph symbol195+ "<\u00a9" // copyright symbol196+ "<\u00ae" // registered trademark symbol197+ "<'\u0040'" // at sign198+ "<\u00a4" // international currency symbol199+ "<\u0e3f" // baht sign200+ "<\u00a2" // cent sign201+ "<\u20a1" // colon sign202+ "<\u20a2" // cruzeiro sign203+ "<'\u0024'" // dollar sign204+ "<\u20ab" // dong sign205+ "<\u20ac" // euro sign206+ "<\u20a3" // franc sign207+ "<\u20a4" // lira sign208+ "<\u20a5" // mill sign209+ "<\u20a6" // naira sign210+ "<\u20a7" // peseta sign211+ "<\u00a3" // pound-sterling sign212+ "<\u20a8" // rupee sign213+ "<\u20aa" // new shekel sign214+ "<\u20a9" // won sign215+ "<\u00a5" // yen sign216+ "<'\u002a'" // asterisk217+ "<'\\'" // backslash218+ "<'\u0026'" // ampersand219+ "<'\u0023'" // number sign220+ "<'\u0025'" // percent sign221+ "<'\u002b'" // plus sign222+ "<\u00b1" // plus-or-minus sign223+ "<\u00f7" // divide sign224+ "<\u00d7" // multiply sign225+ "<'\u003c'" // less-than sign226+ "<'\u003d'" // equal sign227+ "<'\u003e'" // greater-than sign228+ "<\u00ac" // end of line symbol/logical NOT symbol229+ "<'\u007c'" // vertical line/logical OR symbol230+ "<\u00a6" // broken vertical line231+ "<\u00b0" // degree symbol232+ "<\u00b5" // micro symbol233234// NUMERICS235236+ "<0<1<2<3<4<5<6<7<8<9"237+ "<\u00bc<\u00bd<\u00be" // 1/4,1/2,3/4 fractions238239// NON-IGNORABLES240+ "<a,A"241+ "<b,B"242+ "<c,C"243+ "<d,D"244+ "<\u00F0,\u00D0" // eth245+ "<e,E"246+ "<f,F"247+ "<g,G"248+ "<h,H"249+ "<i,I"250+ "<j,J"251+ "<k,K"252+ "<l,L"253+ "<m,M"254+ "<n,N"255+ "<o,O"256+ "<p,P"257+ "<q,Q"258+ "<r,R"259+ "<s, S & SS,\u00DF" // s-zet260+ "<t,T"261+ "& TH, \u00DE &TH, \u00FE " // thorn262+ "<u,U"263+ "<v,V"264+ "<w,W"265+ "<x,X"266+ "<y,Y"267+ "<z,Z"268+ "&AE,\u00C6" // ae & AE ligature269+ "&AE,\u00E6"270+ "&OE,\u0152" // oe & OE ligature271+ "&OE,\u0153";272273// No instantiation274private CollationRules() {275}276}277278279