Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/java/lang/ConditionalSpecialCasing.java
41152 views
1
/*
2
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
package java.lang;
27
28
import java.text.BreakIterator;
29
import java.util.HashSet;
30
import java.util.Hashtable;
31
import java.util.Iterator;
32
import java.util.Locale;
33
import sun.text.Normalizer;
34
35
36
/**
37
* This is a utility class for {@code String.toLowerCase()} and
38
* {@code String.toUpperCase()}, that handles special casing with
39
* conditions. In other words, it handles the mappings with conditions
40
* that are defined in
41
* <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">Special
42
* Casing Properties</a> file.
43
* <p>
44
* Note that the unconditional case mappings (including 1:M mappings)
45
* are handled in {@code Character.toLower/UpperCase()}.
46
*/
47
final class ConditionalSpecialCasing {
48
49
// context conditions.
50
static final int FINAL_CASED = 1;
51
static final int AFTER_SOFT_DOTTED = 2;
52
static final int MORE_ABOVE = 3;
53
static final int AFTER_I = 4;
54
static final int NOT_BEFORE_DOT = 5;
55
56
// combining class definitions
57
static final int COMBINING_CLASS_ABOVE = 230;
58
59
// Special case mapping entries
60
static Entry[] entry = {
61
//# ================================================================================
62
//# Conditional mappings
63
//# ================================================================================
64
new Entry(0x03A3, new char[]{0x03C2}, new char[]{0x03A3}, null, FINAL_CASED), // # GREEK CAPITAL LETTER SIGMA
65
new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, null, 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
66
67
//# ================================================================================
68
//# Locale-sensitive mappings
69
//# ================================================================================
70
//# Lithuanian
71
new Entry(0x0307, new char[]{0x0307}, new char[]{}, "lt", AFTER_SOFT_DOTTED), // # COMBINING DOT ABOVE
72
new Entry(0x0049, new char[]{0x0069, 0x0307}, new char[]{0x0049}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I
73
new Entry(0x004A, new char[]{0x006A, 0x0307}, new char[]{0x004A}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER J
74
new Entry(0x012E, new char[]{0x012F, 0x0307}, new char[]{0x012E}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I WITH OGONEK
75
new Entry(0x00CC, new char[]{0x0069, 0x0307, 0x0300}, new char[]{0x00CC}, "lt", 0), // # LATIN CAPITAL LETTER I WITH GRAVE
76
new Entry(0x00CD, new char[]{0x0069, 0x0307, 0x0301}, new char[]{0x00CD}, "lt", 0), // # LATIN CAPITAL LETTER I WITH ACUTE
77
new Entry(0x0128, new char[]{0x0069, 0x0307, 0x0303}, new char[]{0x0128}, "lt", 0), // # LATIN CAPITAL LETTER I WITH TILDE
78
79
//# ================================================================================
80
//# Turkish and Azeri
81
new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
82
new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
83
new Entry(0x0307, new char[]{}, new char[]{0x0307}, "tr", AFTER_I), // # COMBINING DOT ABOVE
84
new Entry(0x0307, new char[]{}, new char[]{0x0307}, "az", AFTER_I), // # COMBINING DOT ABOVE
85
new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
86
new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "az", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
87
new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN SMALL LETTER I
88
new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "az", 0) // # LATIN SMALL LETTER I
89
};
90
91
// A hash table that contains the above entries
92
static Hashtable<Integer, HashSet<Entry>> entryTable = new Hashtable<>();
93
static {
94
// create hashtable from the entry
95
for (Entry cur : entry) {
96
Integer cp = cur.getCodePoint();
97
HashSet<Entry> set = entryTable.get(cp);
98
if (set == null) {
99
set = new HashSet<>();
100
entryTable.put(cp, set);
101
}
102
set.add(cur);
103
}
104
}
105
106
static int toLowerCaseEx(String src, int index, Locale locale) {
107
char[] result = lookUpTable(src, index, locale, true);
108
109
if (result != null) {
110
if (result.length == 1) {
111
return result[0];
112
} else {
113
return Character.ERROR;
114
}
115
} else {
116
// default to Character class' one
117
return Character.toLowerCase(src.codePointAt(index));
118
}
119
}
120
121
static int toUpperCaseEx(String src, int index, Locale locale) {
122
char[] result = lookUpTable(src, index, locale, false);
123
124
if (result != null) {
125
if (result.length == 1) {
126
return result[0];
127
} else {
128
return Character.ERROR;
129
}
130
} else {
131
// default to Character class' one
132
return Character.toUpperCaseEx(src.codePointAt(index));
133
}
134
}
135
136
static char[] toLowerCaseCharArray(String src, int index, Locale locale) {
137
return lookUpTable(src, index, locale, true);
138
}
139
140
static char[] toUpperCaseCharArray(String src, int index, Locale locale) {
141
char[] result = lookUpTable(src, index, locale, false);
142
if (result != null) {
143
return result;
144
} else {
145
return Character.toUpperCaseCharArray(src.codePointAt(index));
146
}
147
}
148
149
private static char[] lookUpTable(String src, int index, Locale locale, boolean bLowerCasing) {
150
HashSet<Entry> set = entryTable.get(src.codePointAt(index));
151
char[] ret = null;
152
153
if (set != null) {
154
Iterator<Entry> iter = set.iterator();
155
String currentLang = locale.getLanguage();
156
while (iter.hasNext()) {
157
Entry entry = iter.next();
158
String conditionLang = entry.getLanguage();
159
if (((conditionLang == null) || (conditionLang.equals(currentLang))) &&
160
isConditionMet(src, index, locale, entry.getCondition())) {
161
ret = bLowerCasing ? entry.getLowerCase() : entry.getUpperCase();
162
if (conditionLang != null) {
163
break;
164
}
165
}
166
}
167
}
168
169
return ret;
170
}
171
172
private static boolean isConditionMet(String src, int index, Locale locale, int condition) {
173
switch (condition) {
174
case FINAL_CASED:
175
return isFinalCased(src, index, locale);
176
177
case AFTER_SOFT_DOTTED:
178
return isAfterSoftDotted(src, index);
179
180
case MORE_ABOVE:
181
return isMoreAbove(src, index);
182
183
case AFTER_I:
184
return isAfterI(src, index);
185
186
case NOT_BEFORE_DOT:
187
return !isBeforeDot(src, index);
188
189
default:
190
return true;
191
}
192
}
193
194
/**
195
* Implements the "Final_Cased" condition
196
*
197
* Specification: Within the closest word boundaries containing C, there is a cased
198
* letter before C, and there is no cased letter after C.
199
*
200
* Regular Expression:
201
* Before C: [{cased==true}][{wordBoundary!=true}]*
202
* After C: !([{wordBoundary!=true}]*[{cased}])
203
*/
204
private static boolean isFinalCased(String src, int index, Locale locale) {
205
BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
206
wordBoundary.setText(src);
207
int ch;
208
209
// Look for a preceding 'cased' letter
210
for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
211
i -= Character.charCount(ch)) {
212
213
ch = src.codePointBefore(i);
214
if (isCased(ch)) {
215
216
int len = src.length();
217
// Check that there is no 'cased' letter after the index
218
for (i = index + Character.charCount(src.codePointAt(index));
219
(i < len) && !wordBoundary.isBoundary(i);
220
i += Character.charCount(ch)) {
221
222
ch = src.codePointAt(i);
223
if (isCased(ch)) {
224
return false;
225
}
226
}
227
228
return true;
229
}
230
}
231
232
return false;
233
}
234
235
/**
236
* Implements the "After_I" condition
237
*
238
* Specification: The last preceding base character was an uppercase I,
239
* and there is no intervening combining character class 230 (ABOVE).
240
*
241
* Regular Expression:
242
* Before C: [I]([{cc!=230}&{cc!=0}])*
243
*/
244
private static boolean isAfterI(String src, int index) {
245
int ch;
246
int cc;
247
248
// Look for the last preceding base character
249
for (int i = index; i > 0; i -= Character.charCount(ch)) {
250
251
ch = src.codePointBefore(i);
252
253
if (ch == 'I') {
254
return true;
255
} else {
256
cc = Normalizer.getCombiningClass(ch);
257
if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
258
return false;
259
}
260
}
261
}
262
263
return false;
264
}
265
266
/**
267
* Implements the "After_Soft_Dotted" condition
268
*
269
* Specification: The last preceding character with combining class
270
* of zero before C was Soft_Dotted, and there is no intervening
271
* combining character class 230 (ABOVE).
272
*
273
* Regular Expression:
274
* Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
275
*/
276
private static boolean isAfterSoftDotted(String src, int index) {
277
int ch;
278
int cc;
279
280
// Look for the last preceding character
281
for (int i = index; i > 0; i -= Character.charCount(ch)) {
282
283
ch = src.codePointBefore(i);
284
285
if (isSoftDotted(ch)) {
286
return true;
287
} else {
288
cc = Normalizer.getCombiningClass(ch);
289
if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
290
return false;
291
}
292
}
293
}
294
295
return false;
296
}
297
298
/**
299
* Implements the "More_Above" condition
300
*
301
* Specification: C is followed by one or more characters of combining
302
* class 230 (ABOVE) in the combining character sequence.
303
*
304
* Regular Expression:
305
* After C: [{cc!=0}]*[{cc==230}]
306
*/
307
private static boolean isMoreAbove(String src, int index) {
308
int ch;
309
int cc;
310
int len = src.length();
311
312
// Look for a following ABOVE combining class character
313
for (int i = index + Character.charCount(src.codePointAt(index));
314
i < len; i += Character.charCount(ch)) {
315
316
ch = src.codePointAt(i);
317
cc = Normalizer.getCombiningClass(ch);
318
319
if (cc == COMBINING_CLASS_ABOVE) {
320
return true;
321
} else if (cc == 0) {
322
return false;
323
}
324
}
325
326
return false;
327
}
328
329
/**
330
* Implements the "Before_Dot" condition
331
*
332
* Specification: C is followed by {@code U+0307 COMBINING DOT ABOVE}.
333
* Any sequence of characters with a combining class that is
334
* neither 0 nor 230 may intervene between the current character
335
* and the combining dot above.
336
*
337
* Regular Expression:
338
* After C: ([{cc!=230}&{cc!=0}])*[\u0307]
339
*/
340
private static boolean isBeforeDot(String src, int index) {
341
int ch;
342
int cc;
343
int len = src.length();
344
345
// Look for a following COMBINING DOT ABOVE
346
for (int i = index + Character.charCount(src.codePointAt(index));
347
i < len; i += Character.charCount(ch)) {
348
349
ch = src.codePointAt(i);
350
351
if (ch == '\u0307') {
352
return true;
353
} else {
354
cc = Normalizer.getCombiningClass(ch);
355
if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
356
return false;
357
}
358
}
359
}
360
361
return false;
362
}
363
364
/**
365
* Examines whether a character is 'cased'.
366
*
367
* A character C is defined to be 'cased' if and only if at least one of
368
* following are true for C: uppercase==true, or lowercase==true, or
369
* general_category==titlecase_letter.
370
*
371
* The uppercase and lowercase property values are specified in the data
372
* file DerivedCoreProperties.txt in the Unicode Character Database.
373
*/
374
private static boolean isCased(int ch) {
375
int type = Character.getType(ch);
376
if (type == Character.LOWERCASE_LETTER ||
377
type == Character.UPPERCASE_LETTER ||
378
type == Character.TITLECASE_LETTER) {
379
return true;
380
} else {
381
// Check for Other_Lowercase and Other_Uppercase
382
//
383
if ((ch >= 0x02B0) && (ch <= 0x02B8)) {
384
// MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
385
return true;
386
} else if ((ch >= 0x02C0) && (ch <= 0x02C1)) {
387
// MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP
388
return true;
389
} else if ((ch >= 0x02E0) && (ch <= 0x02E4)) {
390
// MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
391
return true;
392
} else if (ch == 0x0345) {
393
// COMBINING GREEK YPOGEGRAMMENI
394
return true;
395
} else if (ch == 0x037A) {
396
// GREEK YPOGEGRAMMENI
397
return true;
398
} else if ((ch >= 0x1D2C) && (ch <= 0x1D61)) {
399
// MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
400
return true;
401
} else if ((ch >= 0x2160) && (ch <= 0x217F)) {
402
// ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND
403
// SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND
404
return true;
405
} else if ((ch >= 0x24B6) && (ch <= 0x24E9)) {
406
// CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z
407
// CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
408
return true;
409
} else {
410
return false;
411
}
412
}
413
}
414
415
private static boolean isSoftDotted(int ch) {
416
switch (ch) {
417
case 0x0069: // Soft_Dotted # L& LATIN SMALL LETTER I
418
case 0x006A: // Soft_Dotted # L& LATIN SMALL LETTER J
419
case 0x012F: // Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK
420
case 0x0268: // Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE
421
case 0x0456: // Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
422
case 0x0458: // Soft_Dotted # L& CYRILLIC SMALL LETTER JE
423
case 0x1D62: // Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER I
424
case 0x1E2D: // Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW
425
case 0x1ECB: // Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW
426
case 0x2071: // Soft_Dotted # L& SUPERSCRIPT LATIN SMALL LETTER I
427
return true;
428
default:
429
return false;
430
}
431
}
432
433
/**
434
* An internal class that represents an entry in the Special Casing Properties.
435
*/
436
static class Entry {
437
int ch;
438
char [] lower;
439
char [] upper;
440
String lang;
441
int condition;
442
443
Entry(int ch, char[] lower, char[] upper, String lang, int condition) {
444
this.ch = ch;
445
this.lower = lower;
446
this.upper = upper;
447
this.lang = lang;
448
this.condition = condition;
449
}
450
451
int getCodePoint() {
452
return ch;
453
}
454
455
char[] getLowerCase() {
456
return lower;
457
}
458
459
char[] getUpperCase() {
460
return upper;
461
}
462
463
String getLanguage() {
464
return lang;
465
}
466
467
int getCondition() {
468
return condition;
469
}
470
}
471
}
472
473