Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/sun/util/locale/provider/CollationRules.java
41161 views
1
/*
2
* Copyright (c) 1996, 2012, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
/*
27
* (C) Copyright Taligent, Inc. 1996,1997 - All Rights Reserved
28
* (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
29
*
30
* The original version of this source code and documentation is copyrighted
31
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32
* materials are provided under terms of a License Agreement between Taligent
33
* and Sun. This technology is protected by multiple US and International
34
* patents. This notice and attribution to Taligent may not be removed.
35
* Taligent is a registered trademark of Taligent, Inc.
36
*
37
*/
38
39
package sun.util.locale.provider;
40
/**
41
* CollationRules contains the default en_US collation rules as a base
42
* for building other collation tables.
43
* <p>Note that decompositions are done before these rules are used,
44
* so they do not have to contain accented characters, such as A-grave.
45
* @see RuleBasedCollator
46
* @see LocaleElements
47
* @author Helena Shih, Mark Davis
48
*/
49
final class CollationRules {
50
static final String DEFAULTRULES =
51
"" // no FRENCH accent order by default, add in French Delta
52
// IGNORABLES (up to first < character)
53
// COMPLETELY IGNORE format characters
54
+ "='\u200B'=\u200C=\u200D=\u200E=\u200F"
55
// Control Characters
56
+ "=\u0000 =\u0001 =\u0002 =\u0003 =\u0004" //null, .. eot
57
+ "=\u0005 =\u0006 =\u0007 =\u0008 ='\u0009'" //enq, ...
58
+ "='\u000b' =\u000e" //vt,, so
59
+ "=\u000f ='\u0010' =\u0011 =\u0012 =\u0013" //si, dle, dc1, dc2, dc3
60
+ "=\u0014 =\u0015 =\u0016 =\u0017 =\u0018" //dc4, nak, syn, etb, can
61
+ "=\u0019 =\u001a =\u001b =\u001c =\u001d" //em, sub, esc, fs, gs
62
+ "=\u001e =\u001f =\u007f" //rs, us, del
63
//....then the C1 Latin 1 reserved control codes
64
+ "=\u0080 =\u0081 =\u0082 =\u0083 =\u0084 =\u0085"
65
+ "=\u0086 =\u0087 =\u0088 =\u0089 =\u008a =\u008b"
66
+ "=\u008c =\u008d =\u008e =\u008f =\u0090 =\u0091"
67
+ "=\u0092 =\u0093 =\u0094 =\u0095 =\u0096 =\u0097"
68
+ "=\u0098 =\u0099 =\u009a =\u009b =\u009c =\u009d"
69
+ "=\u009e =\u009f"
70
// IGNORE except for secondary, tertiary difference
71
// Spaces
72
+ ";'\u0020';'\u00A0'" // spaces
73
+ ";'\u2000';'\u2001';'\u2002';'\u2003';'\u2004'" // spaces
74
+ ";'\u2005';'\u2006';'\u2007';'\u2008';'\u2009'" // spaces
75
+ ";'\u200A';'\u3000';'\uFEFF'" // spaces
76
+ ";'\r' ;'\t' ;'\n';'\f';'\u000b'" // whitespace
77
78
// Non-spacing accents
79
80
+ ";\u0301" // non-spacing acute accent
81
+ ";\u0300" // non-spacing grave accent
82
+ ";\u0306" // non-spacing breve accent
83
+ ";\u0302" // non-spacing circumflex accent
84
+ ";\u030c" // non-spacing caron/hacek accent
85
+ ";\u030a" // non-spacing ring above accent
86
+ ";\u030d" // non-spacing vertical line above
87
+ ";\u0308" // non-spacing diaeresis accent
88
+ ";\u030b" // non-spacing double acute accent
89
+ ";\u0303" // non-spacing tilde accent
90
+ ";\u0307" // non-spacing dot above/overdot accent
91
+ ";\u0304" // non-spacing macron accent
92
+ ";\u0337" // non-spacing short slash overlay (overstruck diacritic)
93
+ ";\u0327" // non-spacing cedilla accent
94
+ ";\u0328" // non-spacing ogonek accent
95
+ ";\u0323" // non-spacing dot-below/underdot accent
96
+ ";\u0332" // non-spacing underscore/underline accent
97
// with the rest of the general diacritical marks in binary order
98
+ ";\u0305" // non-spacing overscore/overline
99
+ ";\u0309" // non-spacing hook above
100
+ ";\u030e" // non-spacing double vertical line above
101
+ ";\u030f" // non-spacing double grave
102
+ ";\u0310" // non-spacing chandrabindu
103
+ ";\u0311" // non-spacing inverted breve
104
+ ";\u0312" // non-spacing turned comma above/cedilla above
105
+ ";\u0313" // non-spacing comma above
106
+ ";\u0314" // non-spacing reversed comma above
107
+ ";\u0315" // non-spacing comma above right
108
+ ";\u0316" // non-spacing grave below
109
+ ";\u0317" // non-spacing acute below
110
+ ";\u0318" // non-spacing left tack below
111
+ ";\u0319" // non-spacing tack below
112
+ ";\u031a" // non-spacing left angle above
113
+ ";\u031b" // non-spacing horn
114
+ ";\u031c" // non-spacing left half ring below
115
+ ";\u031d" // non-spacing up tack below
116
+ ";\u031e" // non-spacing down tack below
117
+ ";\u031f" // non-spacing plus sign below
118
+ ";\u0320" // non-spacing minus sign below
119
+ ";\u0321" // non-spacing palatalized hook below
120
+ ";\u0322" // non-spacing retroflex hook below
121
+ ";\u0324" // non-spacing double dot below
122
+ ";\u0325" // non-spacing ring below
123
+ ";\u0326" // non-spacing comma below
124
+ ";\u0329" // non-spacing vertical line below
125
+ ";\u032a" // non-spacing bridge below
126
+ ";\u032b" // non-spacing inverted double arch below
127
+ ";\u032c" // non-spacing hacek below
128
+ ";\u032d" // non-spacing circumflex below
129
+ ";\u032e" // non-spacing breve below
130
+ ";\u032f" // non-spacing inverted breve below
131
+ ";\u0330" // non-spacing tilde below
132
+ ";\u0331" // non-spacing macron below
133
+ ";\u0333" // non-spacing double underscore
134
+ ";\u0334" // non-spacing tilde overlay
135
+ ";\u0335" // non-spacing short bar overlay
136
+ ";\u0336" // non-spacing long bar overlay
137
+ ";\u0338" // non-spacing long slash overlay
138
+ ";\u0339" // non-spacing right half ring below
139
+ ";\u033a" // non-spacing inverted bridge below
140
+ ";\u033b" // non-spacing square below
141
+ ";\u033c" // non-spacing seagull below
142
+ ";\u033d" // non-spacing x above
143
+ ";\u033e" // non-spacing vertical tilde
144
+ ";\u033f" // non-spacing double overscore
145
//+ ";\u0340" // non-spacing grave tone mark == \u0300
146
//+ ";\u0341" // non-spacing acute tone mark == \u0301
147
+ ";\u0342;"
148
//+ "\u0343;" // == \u0313
149
+ "\u0344;\u0345;\u0360;\u0361" // newer
150
+ ";\u0483;\u0484;\u0485;\u0486" // Cyrillic accents
151
152
+ ";\u20D0;\u20D1;\u20D2" // symbol accents
153
+ ";\u20D3;\u20D4;\u20D5" // symbol accents
154
+ ";\u20D6;\u20D7;\u20D8" // symbol accents
155
+ ";\u20D9;\u20DA;\u20DB" // symbol accents
156
+ ";\u20DC;\u20DD;\u20DE" // symbol accents
157
+ ";\u20DF;\u20E0;\u20E1" // symbol accents
158
159
+ ",'\u002D';\u00AD" // dashes
160
+ ";\u2010;\u2011;\u2012" // dashes
161
+ ";\u2013;\u2014;\u2015" // dashes
162
+ ";\u2212" // dashes
163
164
// other punctuation
165
166
+ "<'\u005f'" // underline/underscore (spacing)
167
+ "<\u00af" // overline or macron (spacing)
168
+ "<'\u002c'" // comma (spacing)
169
+ "<'\u003b'" // semicolon
170
+ "<'\u003a'" // colon
171
+ "<'\u0021'" // exclamation point
172
+ "<\u00a1" // inverted exclamation point
173
+ "<'\u003f'" // question mark
174
+ "<\u00bf" // inverted question mark
175
+ "<'\u002f'" // slash
176
+ "<'\u002e'" // period/full stop
177
+ "<\u00b4" // acute accent (spacing)
178
+ "<'\u0060'" // grave accent (spacing)
179
+ "<'\u005e'" // circumflex accent (spacing)
180
+ "<\u00a8" // diaresis/umlaut accent (spacing)
181
+ "<'\u007e'" // tilde accent (spacing)
182
+ "<\u00b7" // middle dot (spacing)
183
+ "<\u00b8" // cedilla accent (spacing)
184
+ "<'\u0027'" // apostrophe
185
+ "<'\"'" // quotation marks
186
+ "<\u00ab" // left angle quotes
187
+ "<\u00bb" // right angle quotes
188
+ "<'\u0028'" // left parenthesis
189
+ "<'\u0029'" // right parenthesis
190
+ "<'\u005b'" // left bracket
191
+ "<'\u005d'" // right bracket
192
+ "<'\u007b'" // left brace
193
+ "<'\u007d'" // right brace
194
+ "<\u00a7" // section symbol
195
+ "<\u00b6" // paragraph symbol
196
+ "<\u00a9" // copyright symbol
197
+ "<\u00ae" // registered trademark symbol
198
+ "<'\u0040'" // at sign
199
+ "<\u00a4" // international currency symbol
200
+ "<\u0e3f" // baht sign
201
+ "<\u00a2" // cent sign
202
+ "<\u20a1" // colon sign
203
+ "<\u20a2" // cruzeiro sign
204
+ "<'\u0024'" // dollar sign
205
+ "<\u20ab" // dong sign
206
+ "<\u20ac" // euro sign
207
+ "<\u20a3" // franc sign
208
+ "<\u20a4" // lira sign
209
+ "<\u20a5" // mill sign
210
+ "<\u20a6" // naira sign
211
+ "<\u20a7" // peseta sign
212
+ "<\u00a3" // pound-sterling sign
213
+ "<\u20a8" // rupee sign
214
+ "<\u20aa" // new shekel sign
215
+ "<\u20a9" // won sign
216
+ "<\u00a5" // yen sign
217
+ "<'\u002a'" // asterisk
218
+ "<'\\'" // backslash
219
+ "<'\u0026'" // ampersand
220
+ "<'\u0023'" // number sign
221
+ "<'\u0025'" // percent sign
222
+ "<'\u002b'" // plus sign
223
+ "<\u00b1" // plus-or-minus sign
224
+ "<\u00f7" // divide sign
225
+ "<\u00d7" // multiply sign
226
+ "<'\u003c'" // less-than sign
227
+ "<'\u003d'" // equal sign
228
+ "<'\u003e'" // greater-than sign
229
+ "<\u00ac" // end of line symbol/logical NOT symbol
230
+ "<'\u007c'" // vertical line/logical OR symbol
231
+ "<\u00a6" // broken vertical line
232
+ "<\u00b0" // degree symbol
233
+ "<\u00b5" // micro symbol
234
235
// NUMERICS
236
237
+ "<0<1<2<3<4<5<6<7<8<9"
238
+ "<\u00bc<\u00bd<\u00be" // 1/4,1/2,3/4 fractions
239
240
// NON-IGNORABLES
241
+ "<a,A"
242
+ "<b,B"
243
+ "<c,C"
244
+ "<d,D"
245
+ "<\u00F0,\u00D0" // eth
246
+ "<e,E"
247
+ "<f,F"
248
+ "<g,G"
249
+ "<h,H"
250
+ "<i,I"
251
+ "<j,J"
252
+ "<k,K"
253
+ "<l,L"
254
+ "<m,M"
255
+ "<n,N"
256
+ "<o,O"
257
+ "<p,P"
258
+ "<q,Q"
259
+ "<r,R"
260
+ "<s, S & SS,\u00DF" // s-zet
261
+ "<t,T"
262
+ "& TH, \u00DE &TH, \u00FE " // thorn
263
+ "<u,U"
264
+ "<v,V"
265
+ "<w,W"
266
+ "<x,X"
267
+ "<y,Y"
268
+ "<z,Z"
269
+ "&AE,\u00C6" // ae & AE ligature
270
+ "&AE,\u00E6"
271
+ "&OE,\u0152" // oe & OE ligature
272
+ "&OE,\u0153";
273
274
// No instantiation
275
private CollationRules() {
276
}
277
}
278
279