Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/java/text/RBTableBuilder.java
41152 views
1
/*
2
* Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
/*
27
* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28
* (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
29
*
30
* The original version of this source code and documentation is copyrighted
31
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32
* materials are provided under terms of a License Agreement between Taligent
33
* and Sun. This technology is protected by multiple US and International
34
* patents. This notice and attribution to Taligent may not be removed.
35
* Taligent is a registered trademark of Taligent, Inc.
36
*
37
*/
38
39
package java.text;
40
41
import java.util.Vector;
42
import sun.text.UCompactIntArray;
43
import sun.text.IntHashtable;
44
import sun.text.ComposedCharIter;
45
import jdk.internal.icu.impl.NormalizerImpl;
46
47
/**
48
* This class contains all the code to parse a RuleBasedCollator pattern
49
* and build a RBCollationTables object from it. A particular instance
50
* of tis class exists only during the actual build process-- once an
51
* RBCollationTables object has been built, the RBTableBuilder object
52
* goes away. This object carries all of the state which is only needed
53
* during the build process, plus a "shadow" copy of all of the state
54
* that will go into the tables object itself. This object communicates
55
* with RBCollationTables through a separate class, RBCollationTables.BuildAPI,
56
* this is an inner class of RBCollationTables and provides a separate
57
* private API for communication with RBTableBuilder.
58
* This class isn't just an inner class of RBCollationTables itself because
59
* of its large size. For source-code readability, it seemed better for the
60
* builder to have its own source file.
61
*/
62
final class RBTableBuilder {
63
64
public RBTableBuilder(RBCollationTables.BuildAPI tables) {
65
this.tables = tables;
66
}
67
68
/**
69
* Create a table-based collation object with the given rules.
70
* This is the main function that actually builds the tables and
71
* stores them back in the RBCollationTables object. It is called
72
* ONLY by the RBCollationTables constructor.
73
* @see RuleBasedCollator#RuleBasedCollator
74
* @throws ParseException If the rules format is incorrect.
75
*/
76
77
public void build(String pattern, int decmp) throws ParseException {
78
String expChars;
79
String groupChars;
80
if (pattern.isEmpty())
81
throw new ParseException("Build rules empty.", 0);
82
83
// This array maps Unicode characters to their collation ordering
84
mapping = new UCompactIntArray(RBCollationTables.UNMAPPED);
85
// Normalize the build rules. Find occurances of all decomposed characters
86
// and normalize the rules before feeding into the builder. By "normalize",
87
// we mean that all precomposed Unicode characters must be converted into
88
// a base character and one or more combining characters (such as accents).
89
// When there are multiple combining characters attached to a base character,
90
// the combining characters must be in their canonical order
91
//
92
// sherman/Note:
93
//(1)decmp will be NO_DECOMPOSITION only in ko locale to prevent decompose
94
//hangual syllables to jamos, so we can actually just call decompose with
95
//normalizer's IGNORE_HANGUL option turned on
96
//
97
//(2)just call the "special version" in NormalizerImpl directly
98
//pattern = Normalizer.decompose(pattern, false, Normalizer.IGNORE_HANGUL, true);
99
//
100
//Normalizer.Mode mode = CollatorUtilities.toNormalizerMode(decmp);
101
//pattern = Normalizer.normalize(pattern, mode, 0, true);
102
103
pattern = NormalizerImpl.canonicalDecomposeWithSingleQuotation(pattern);
104
105
// Build the merged collation entries
106
// Since rules can be specified in any order in the string
107
// (e.g. "c , C < d , D < e , E .... C < CH")
108
// this splits all of the rules in the string out into separate
109
// objects and then sorts them. In the above example, it merges the
110
// "C < CH" rule in just before the "C < D" rule.
111
//
112
113
mPattern = new MergeCollation(pattern);
114
115
int order = 0;
116
117
// Now walk though each entry and add it to my own tables
118
for (int i = 0; i < mPattern.getCount(); ++i) {
119
PatternEntry entry = mPattern.getItemAt(i);
120
if (entry != null) {
121
groupChars = entry.getChars();
122
if (groupChars.length() > 1) {
123
switch(groupChars.charAt(groupChars.length()-1)) {
124
case '@':
125
frenchSec = true;
126
groupChars = groupChars.substring(0, groupChars.length()-1);
127
break;
128
case '!':
129
seAsianSwapping = true;
130
groupChars = groupChars.substring(0, groupChars.length()-1);
131
break;
132
}
133
}
134
135
order = increment(entry.getStrength(), order);
136
expChars = entry.getExtension();
137
138
if (!expChars.isEmpty()) {
139
addExpandOrder(groupChars, expChars, order);
140
} else if (groupChars.length() > 1) {
141
char ch = groupChars.charAt(0);
142
if (Character.isHighSurrogate(ch) && groupChars.length() == 2) {
143
addOrder(Character.toCodePoint(ch, groupChars.charAt(1)), order);
144
} else {
145
addContractOrder(groupChars, order);
146
}
147
} else {
148
char ch = groupChars.charAt(0);
149
addOrder(ch, order);
150
}
151
}
152
}
153
addComposedChars();
154
155
commit();
156
mapping.compact();
157
/*
158
System.out.println("mappingSize=" + mapping.getKSize());
159
for (int j = 0; j < 0xffff; j++) {
160
int value = mapping.elementAt(j);
161
if (value != RBCollationTables.UNMAPPED)
162
System.out.println("index=" + Integer.toString(j, 16)
163
+ ", value=" + Integer.toString(value, 16));
164
}
165
*/
166
tables.fillInTables(frenchSec, seAsianSwapping, mapping, contractTable, expandTable,
167
contractFlags, maxSecOrder, maxTerOrder);
168
}
169
170
/** Add expanding entries for pre-composed unicode characters so that this
171
* collator can be used reasonably well with decomposition turned off.
172
*/
173
private void addComposedChars() throws ParseException {
174
// Iterate through all of the pre-composed characters in Unicode
175
ComposedCharIter iter = new ComposedCharIter();
176
int c;
177
while ((c = iter.next()) != ComposedCharIter.DONE) {
178
if (getCharOrder(c) == RBCollationTables.UNMAPPED) {
179
//
180
// We don't already have an ordering for this pre-composed character.
181
//
182
// First, see if the decomposed string is already in our
183
// tables as a single contracting-string ordering.
184
// If so, just map the precomposed character to that order.
185
//
186
// TODO: What we should really be doing here is trying to find the
187
// longest initial substring of the decomposition that is present
188
// in the tables as a contracting character sequence, and find its
189
// ordering. Then do this recursively with the remaining chars
190
// so that we build a list of orderings, and add that list to
191
// the expansion table.
192
// That would be more correct but also significantly slower, so
193
// I'm not totally sure it's worth doing.
194
//
195
String s = iter.decomposition();
196
197
//sherman/Note: if this is 1 character decomposed string, the
198
//only thing need to do is to check if this decomposed character
199
//has an entry in our order table, this order is not necessary
200
//to be a contraction order, if it does have one, add an entry
201
//for the precomposed character by using the same order, the
202
//previous impl unnecessarily adds a single character expansion
203
//entry.
204
if (s.length() == 1) {
205
int order = getCharOrder(s.charAt(0));
206
if (order != RBCollationTables.UNMAPPED) {
207
addOrder(c, order);
208
}
209
continue;
210
} else if (s.length() == 2) {
211
char ch0 = s.charAt(0);
212
if (Character.isHighSurrogate(ch0)) {
213
int order = getCharOrder(s.codePointAt(0));
214
if (order != RBCollationTables.UNMAPPED) {
215
addOrder(c, order);
216
}
217
continue;
218
}
219
}
220
int contractOrder = getContractOrder(s);
221
if (contractOrder != RBCollationTables.UNMAPPED) {
222
addOrder(c, contractOrder);
223
} else {
224
//
225
// We don't have a contracting ordering for the entire string
226
// that results from the decomposition, but if we have orders
227
// for each individual character, we can add an expanding
228
// table entry for the pre-composed character
229
//
230
boolean allThere = true;
231
for (int i = 0; i < s.length(); i++) {
232
if (getCharOrder(s.charAt(i)) == RBCollationTables.UNMAPPED) {
233
allThere = false;
234
break;
235
}
236
}
237
if (allThere) {
238
addExpandOrder(c, s, RBCollationTables.UNMAPPED);
239
}
240
}
241
}
242
}
243
}
244
245
/**
246
* Look up for unmapped values in the expanded character table.
247
*
248
* When the expanding character tables are built by addExpandOrder,
249
* it doesn't know what the final ordering of each character
250
* in the expansion will be. Instead, it just puts the raw character
251
* code into the table, adding CHARINDEX as a flag. Now that we've
252
* finished building the mapping table, we can go back and look up
253
* that character to see what its real collation order is and
254
* stick that into the expansion table. That lets us avoid doing
255
* a two-stage lookup later.
256
*/
257
private final void commit()
258
{
259
if (expandTable != null) {
260
for (int i = 0; i < expandTable.size(); i++) {
261
int[] valueList = expandTable.elementAt(i);
262
for (int j = 0; j < valueList.length; j++) {
263
int order = valueList[j];
264
if (order < RBCollationTables.EXPANDCHARINDEX && order > CHARINDEX) {
265
// found a expanding character that isn't filled in yet
266
int ch = order - CHARINDEX;
267
268
// Get the real values for the non-filled entry
269
int realValue = getCharOrder(ch);
270
271
if (realValue == RBCollationTables.UNMAPPED) {
272
// The real value is still unmapped, maybe it's ignorable
273
valueList[j] = IGNORABLEMASK & ch;
274
} else {
275
// just fill in the value
276
valueList[j] = realValue;
277
}
278
}
279
}
280
}
281
}
282
}
283
/**
284
* Increment of the last order based on the comparison level.
285
*/
286
private final int increment(int aStrength, int lastValue)
287
{
288
switch(aStrength)
289
{
290
case Collator.PRIMARY:
291
// increment priamry order and mask off secondary and tertiary difference
292
lastValue += PRIMARYORDERINCREMENT;
293
lastValue &= RBCollationTables.PRIMARYORDERMASK;
294
isOverIgnore = true;
295
break;
296
case Collator.SECONDARY:
297
// increment secondary order and mask off tertiary difference
298
lastValue += SECONDARYORDERINCREMENT;
299
lastValue &= RBCollationTables.SECONDARYDIFFERENCEONLY;
300
// record max # of ignorable chars with secondary difference
301
if (!isOverIgnore)
302
maxSecOrder++;
303
break;
304
case Collator.TERTIARY:
305
// increment tertiary order
306
lastValue += TERTIARYORDERINCREMENT;
307
// record max # of ignorable chars with tertiary difference
308
if (!isOverIgnore)
309
maxTerOrder++;
310
break;
311
}
312
return lastValue;
313
}
314
315
/**
316
* Adds a character and its designated order into the collation table.
317
*/
318
private final void addOrder(int ch, int anOrder)
319
{
320
// See if the char already has an order in the mapping table
321
int order = mapping.elementAt(ch);
322
323
if (order >= RBCollationTables.CONTRACTCHARINDEX) {
324
// There's already an entry for this character that points to a contracting
325
// character table. Instead of adding the character directly to the mapping
326
// table, we must add it to the contract table instead.
327
int length = 1;
328
if (Character.isSupplementaryCodePoint(ch)) {
329
length = Character.toChars(ch, keyBuf, 0);
330
} else {
331
keyBuf[0] = (char)ch;
332
}
333
addContractOrder(new String(keyBuf, 0, length), anOrder);
334
} else {
335
// add the entry to the mapping table,
336
// the same later entry replaces the previous one
337
mapping.setElementAt(ch, anOrder);
338
}
339
}
340
341
private final void addContractOrder(String groupChars, int anOrder) {
342
addContractOrder(groupChars, anOrder, true);
343
}
344
345
/**
346
* Adds the contracting string into the collation table.
347
*/
348
private final void addContractOrder(String groupChars, int anOrder,
349
boolean fwd)
350
{
351
if (contractTable == null) {
352
contractTable = new Vector<>(INITIALTABLESIZE);
353
}
354
355
//initial character
356
int ch = groupChars.codePointAt(0);
357
/*
358
char ch0 = groupChars.charAt(0);
359
int ch = Character.isHighSurrogate(ch0)?
360
Character.toCodePoint(ch0, groupChars.charAt(1)):ch0;
361
*/
362
// See if the initial character of the string already has a contract table.
363
int entry = mapping.elementAt(ch);
364
Vector<EntryPair> entryTable = getContractValuesImpl(entry - RBCollationTables.CONTRACTCHARINDEX);
365
366
if (entryTable == null) {
367
// We need to create a new table of contract entries for this base char
368
int tableIndex = RBCollationTables.CONTRACTCHARINDEX + contractTable.size();
369
entryTable = new Vector<>(INITIALTABLESIZE);
370
contractTable.addElement(entryTable);
371
372
// Add the initial character's current ordering first. then
373
// update its mapping to point to this contract table
374
entryTable.addElement(new EntryPair(groupChars.substring(0,Character.charCount(ch)), entry));
375
mapping.setElementAt(ch, tableIndex);
376
}
377
378
// Now add (or replace) this string in the table
379
int index = RBCollationTables.getEntry(entryTable, groupChars, fwd);
380
if (index != RBCollationTables.UNMAPPED) {
381
EntryPair pair = entryTable.elementAt(index);
382
pair.value = anOrder;
383
} else {
384
EntryPair pair = entryTable.lastElement();
385
386
// NOTE: This little bit of logic is here to speed CollationElementIterator
387
// .nextContractChar(). This code ensures that the longest sequence in
388
// this list is always the _last_ one in the list. This keeps
389
// nextContractChar() from having to search the entire list for the longest
390
// sequence.
391
if (groupChars.length() > pair.entryName.length()) {
392
entryTable.addElement(new EntryPair(groupChars, anOrder, fwd));
393
} else {
394
entryTable.insertElementAt(new EntryPair(groupChars, anOrder,
395
fwd), entryTable.size() - 1);
396
}
397
}
398
399
// If this was a forward mapping for a contracting string, also add a
400
// reverse mapping for it, so that CollationElementIterator.previous
401
// can work right
402
if (fwd && groupChars.length() > 1) {
403
addContractFlags(groupChars);
404
addContractOrder(new StringBuffer(groupChars).reverse().toString(),
405
anOrder, false);
406
}
407
}
408
409
/**
410
* If the given string has been specified as a contracting string
411
* in this collation table, return its ordering.
412
* Otherwise return UNMAPPED.
413
*/
414
private int getContractOrder(String groupChars)
415
{
416
int result = RBCollationTables.UNMAPPED;
417
if (contractTable != null) {
418
int ch = groupChars.codePointAt(0);
419
/*
420
char ch0 = groupChars.charAt(0);
421
int ch = Character.isHighSurrogate(ch0)?
422
Character.toCodePoint(ch0, groupChars.charAt(1)):ch0;
423
*/
424
Vector<EntryPair> entryTable = getContractValues(ch);
425
if (entryTable != null) {
426
int index = RBCollationTables.getEntry(entryTable, groupChars, true);
427
if (index != RBCollationTables.UNMAPPED) {
428
EntryPair pair = entryTable.elementAt(index);
429
result = pair.value;
430
}
431
}
432
}
433
return result;
434
}
435
436
private final int getCharOrder(int ch) {
437
int order = mapping.elementAt(ch);
438
439
if (order >= RBCollationTables.CONTRACTCHARINDEX) {
440
Vector<EntryPair> groupList = getContractValuesImpl(order - RBCollationTables.CONTRACTCHARINDEX);
441
EntryPair pair = groupList.firstElement();
442
order = pair.value;
443
}
444
return order;
445
}
446
447
/**
448
* Get the entry of hash table of the contracting string in the collation
449
* table.
450
* @param ch the starting character of the contracting string
451
*/
452
private Vector<EntryPair> getContractValues(int ch)
453
{
454
int index = mapping.elementAt(ch);
455
return getContractValuesImpl(index - RBCollationTables.CONTRACTCHARINDEX);
456
}
457
458
private Vector<EntryPair> getContractValuesImpl(int index)
459
{
460
if (index >= 0)
461
{
462
return contractTable.elementAt(index);
463
}
464
else // not found
465
{
466
return null;
467
}
468
}
469
470
/**
471
* Adds the expanding string into the collation table.
472
*/
473
private final void addExpandOrder(String contractChars,
474
String expandChars,
475
int anOrder) throws ParseException
476
{
477
// Create an expansion table entry
478
int tableIndex = addExpansion(anOrder, expandChars);
479
480
// And add its index into the main mapping table
481
if (contractChars.length() > 1) {
482
char ch = contractChars.charAt(0);
483
if (Character.isHighSurrogate(ch) && contractChars.length() == 2) {
484
char ch2 = contractChars.charAt(1);
485
if (Character.isLowSurrogate(ch2)) {
486
//only add into table when it is a legal surrogate
487
addOrder(Character.toCodePoint(ch, ch2), tableIndex);
488
}
489
} else {
490
addContractOrder(contractChars, tableIndex);
491
}
492
} else {
493
addOrder(contractChars.charAt(0), tableIndex);
494
}
495
}
496
497
private final void addExpandOrder(int ch, String expandChars, int anOrder)
498
throws ParseException
499
{
500
int tableIndex = addExpansion(anOrder, expandChars);
501
addOrder(ch, tableIndex);
502
}
503
504
/**
505
* Create a new entry in the expansion table that contains the orderings
506
* for the given characers. If anOrder is valid, it is added to the
507
* beginning of the expanded list of orders.
508
*/
509
private int addExpansion(int anOrder, String expandChars) {
510
if (expandTable == null) {
511
expandTable = new Vector<>(INITIALTABLESIZE);
512
}
513
514
// If anOrder is valid, we want to add it at the beginning of the list
515
int offset = (anOrder == RBCollationTables.UNMAPPED) ? 0 : 1;
516
517
int[] valueList = new int[expandChars.length() + offset];
518
if (offset == 1) {
519
valueList[0] = anOrder;
520
}
521
522
int j = offset;
523
for (int i = 0; i < expandChars.length(); i++) {
524
char ch0 = expandChars.charAt(i);
525
char ch1;
526
int ch;
527
if (Character.isHighSurrogate(ch0)) {
528
if (++i == expandChars.length() ||
529
!Character.isLowSurrogate(ch1=expandChars.charAt(i))) {
530
//ether we are missing the low surrogate or the next char
531
//is not a legal low surrogate, so stop loop
532
break;
533
}
534
ch = Character.toCodePoint(ch0, ch1);
535
536
} else {
537
ch = ch0;
538
}
539
540
int mapValue = getCharOrder(ch);
541
542
if (mapValue != RBCollationTables.UNMAPPED) {
543
valueList[j++] = mapValue;
544
} else {
545
// can't find it in the table, will be filled in by commit().
546
valueList[j++] = CHARINDEX + ch;
547
}
548
}
549
if (j < valueList.length) {
550
//we had at least one supplementary character, the size of valueList
551
//is bigger than it really needs...
552
int[] tmpBuf = new int[j];
553
while (--j >= 0) {
554
tmpBuf[j] = valueList[j];
555
}
556
valueList = tmpBuf;
557
}
558
// Add the expanding char list into the expansion table.
559
int tableIndex = RBCollationTables.EXPANDCHARINDEX + expandTable.size();
560
expandTable.addElement(valueList);
561
562
return tableIndex;
563
}
564
565
private void addContractFlags(String chars) {
566
char c0;
567
int c;
568
int len = chars.length();
569
for (int i = 0; i < len; i++) {
570
c0 = chars.charAt(i);
571
c = Character.isHighSurrogate(c0)
572
?Character.toCodePoint(c0, chars.charAt(++i))
573
:c0;
574
contractFlags.put(c, 1);
575
}
576
}
577
578
// ==============================================================
579
// constants
580
// ==============================================================
581
static final int CHARINDEX = 0x70000000; // need look up in .commit()
582
583
private static final int IGNORABLEMASK = 0x0000ffff;
584
private static final int PRIMARYORDERINCREMENT = 0x00010000;
585
private static final int SECONDARYORDERINCREMENT = 0x00000100;
586
private static final int TERTIARYORDERINCREMENT = 0x00000001;
587
private static final int INITIALTABLESIZE = 20;
588
private static final int MAXKEYSIZE = 5;
589
590
// ==============================================================
591
// instance variables
592
// ==============================================================
593
594
// variables used by the build process
595
private RBCollationTables.BuildAPI tables = null;
596
private MergeCollation mPattern = null;
597
private boolean isOverIgnore = false;
598
private char[] keyBuf = new char[MAXKEYSIZE];
599
private IntHashtable contractFlags = new IntHashtable(100);
600
601
// "shadow" copies of the instance variables in RBCollationTables
602
// (the values in these variables are copied back into RBCollationTables
603
// at the end of the build process)
604
private boolean frenchSec = false;
605
private boolean seAsianSwapping = false;
606
607
private UCompactIntArray mapping = null;
608
private Vector<Vector<EntryPair>> contractTable = null;
609
private Vector<int[]> expandTable = null;
610
611
private short maxSecOrder = 0;
612
private short maxTerOrder = 0;
613
}
614
615