CoCalc -- RBTableBuilder.java

GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/java/text/RBTableBuilder.java
⁴¹¹⁵² views
1
/*
2
 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26
/*
27
 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28
 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
29
 *
30
 *   The original version of this source code and documentation is copyrighted
31
 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32
 * materials are provided under terms of a License Agreement between Taligent
33
 * and Sun. This technology is protected by multiple US and International
34
 * patents. This notice and attribution to Taligent may not be removed.
35
 *   Taligent is a registered trademark of Taligent, Inc.
36
 *
37
 */
38

39
package java.text;
40

41
import java.util.Vector;
42
import sun.text.UCompactIntArray;
43
import sun.text.IntHashtable;
44
import sun.text.ComposedCharIter;
45
import jdk.internal.icu.impl.NormalizerImpl;
46

47
/**
48
 * This class contains all the code to parse a RuleBasedCollator pattern
49
 * and build a RBCollationTables object from it.  A particular instance
50
 * of tis class exists only during the actual build process-- once an
51
 * RBCollationTables object has been built, the RBTableBuilder object
52
 * goes away.  This object carries all of the state which is only needed
53
 * during the build process, plus a "shadow" copy of all of the state
54
 * that will go into the tables object itself.  This object communicates
55
 * with RBCollationTables through a separate class, RBCollationTables.BuildAPI,
56
 * this is an inner class of RBCollationTables and provides a separate
57
 * private API for communication with RBTableBuilder.
58
 * This class isn't just an inner class of RBCollationTables itself because
59
 * of its large size.  For source-code readability, it seemed better for the
60
 * builder to have its own source file.
61
 */
62
final class RBTableBuilder {
63

64
    public RBTableBuilder(RBCollationTables.BuildAPI tables) {
65
        this.tables = tables;
66
    }
67

68
    /**
69
     * Create a table-based collation object with the given rules.
70
     * This is the main function that actually builds the tables and
71
     * stores them back in the RBCollationTables object.  It is called
72
     * ONLY by the RBCollationTables constructor.
73
     * @see RuleBasedCollator#RuleBasedCollator
74
     * @throws    ParseException If the rules format is incorrect.
75
     */
76

77
    public void build(String pattern, int decmp) throws ParseException {
78
        String expChars;
79
        String groupChars;
80
        if (pattern.isEmpty())
81
            throw new ParseException("Build rules empty.", 0);
82

83
        // This array maps Unicode characters to their collation ordering
84
        mapping = new UCompactIntArray(RBCollationTables.UNMAPPED);
85
        // Normalize the build rules.  Find occurances of all decomposed characters
86
        // and normalize the rules before feeding into the builder.  By "normalize",
87
        // we mean that all precomposed Unicode characters must be converted into
88
        // a base character and one or more combining characters (such as accents).
89
        // When there are multiple combining characters attached to a base character,
90
        // the combining characters must be in their canonical order
91
        //
92
        // sherman/Note:
93
        //(1)decmp will be NO_DECOMPOSITION only in ko locale to prevent decompose
94
        //hangual syllables to jamos, so we can actually just call decompose with
95
        //normalizer's IGNORE_HANGUL option turned on
96
        //
97
        //(2)just call the "special version" in NormalizerImpl directly
98
        //pattern = Normalizer.decompose(pattern, false, Normalizer.IGNORE_HANGUL, true);
99
        //
100
        //Normalizer.Mode mode = CollatorUtilities.toNormalizerMode(decmp);
101
        //pattern = Normalizer.normalize(pattern, mode, 0, true);
102

103
        pattern = NormalizerImpl.canonicalDecomposeWithSingleQuotation(pattern);
104

105
        // Build the merged collation entries
106
        // Since rules can be specified in any order in the string
107
        // (e.g. "c , C < d , D < e , E .... C < CH")
108
        // this splits all of the rules in the string out into separate
109
        // objects and then sorts them.  In the above example, it merges the
110
        // "C < CH" rule in just before the "C < D" rule.
111
        //
112

113
        mPattern = new MergeCollation(pattern);
114

115
        int order = 0;
116

117
        // Now walk though each entry and add it to my own tables
118
        for (int i = 0; i < mPattern.getCount(); ++i) {
119
            PatternEntry entry = mPattern.getItemAt(i);
120
            if (entry != null) {
121
                groupChars = entry.getChars();
122
                if (groupChars.length() > 1) {
123
                    switch(groupChars.charAt(groupChars.length()-1)) {
124
                    case '@':
125
                        frenchSec = true;
126
                        groupChars = groupChars.substring(0, groupChars.length()-1);
127
                        break;
128
                    case '!':
129
                        seAsianSwapping = true;
130
                        groupChars = groupChars.substring(0, groupChars.length()-1);
131
                        break;
132
                    }
133
                }
134

135
                order = increment(entry.getStrength(), order);
136
                expChars = entry.getExtension();
137

138
                if (!expChars.isEmpty()) {
139
                    addExpandOrder(groupChars, expChars, order);
140
                } else if (groupChars.length() > 1) {
141
                    char ch = groupChars.charAt(0);
142
                    if (Character.isHighSurrogate(ch) && groupChars.length() == 2) {
143
                        addOrder(Character.toCodePoint(ch, groupChars.charAt(1)), order);
144
                    } else {
145
                        addContractOrder(groupChars, order);
146
                    }
147
                } else {
148
                    char ch = groupChars.charAt(0);
149
                    addOrder(ch, order);
150
                }
151
            }
152
        }
153
        addComposedChars();
154

155
        commit();
156
        mapping.compact();
157
        /*
158
        System.out.println("mappingSize=" + mapping.getKSize());
159
        for (int j = 0; j < 0xffff; j++) {
160
            int value = mapping.elementAt(j);
161
            if (value != RBCollationTables.UNMAPPED)
162
                System.out.println("index=" + Integer.toString(j, 16)
163
                           + ", value=" + Integer.toString(value, 16));
164
        }
165
        */
166
        tables.fillInTables(frenchSec, seAsianSwapping, mapping, contractTable, expandTable,
167
                    contractFlags, maxSecOrder, maxTerOrder);
168
    }
169

170
    /** Add expanding entries for pre-composed unicode characters so that this
171
     * collator can be used reasonably well with decomposition turned off.
172
     */
173
    private void addComposedChars() throws ParseException {
174
        // Iterate through all of the pre-composed characters in Unicode
175
        ComposedCharIter iter = new ComposedCharIter();
176
        int c;
177
        while ((c = iter.next()) != ComposedCharIter.DONE) {
178
            if (getCharOrder(c) == RBCollationTables.UNMAPPED) {
179
                //
180
                // We don't already have an ordering for this pre-composed character.
181
                //
182
                // First, see if the decomposed string is already in our
183
                // tables as a single contracting-string ordering.
184
                // If so, just map the precomposed character to that order.
185
                //
186
                // TODO: What we should really be doing here is trying to find the
187
                // longest initial substring of the decomposition that is present
188
                // in the tables as a contracting character sequence, and find its
189
                // ordering.  Then do this recursively with the remaining chars
190
                // so that we build a list of orderings, and add that list to
191
                // the expansion table.
192
                // That would be more correct but also significantly slower, so
193
                // I'm not totally sure it's worth doing.
194
                //
195
                String s = iter.decomposition();
196

197
                //sherman/Note: if this is 1 character decomposed string, the
198
                //only thing need to do is to check if this decomposed character
199
                //has an entry in our order table, this order is not necessary
200
                //to be a contraction order, if it does have one, add an entry
201
                //for the precomposed character by using the same order, the
202
                //previous impl unnecessarily adds a single character expansion
203
                //entry.
204
                if (s.length() == 1) {
205
                    int order = getCharOrder(s.charAt(0));
206
                    if (order != RBCollationTables.UNMAPPED) {
207
                        addOrder(c, order);
208
                    }
209
                    continue;
210
                } else if (s.length() == 2) {
211
                    char ch0 = s.charAt(0);
212
                    if (Character.isHighSurrogate(ch0)) {
213
                        int order = getCharOrder(s.codePointAt(0));
214
                        if (order != RBCollationTables.UNMAPPED) {
215
                            addOrder(c, order);
216
                        }
217
                        continue;
218
                    }
219
                }
220
                int contractOrder = getContractOrder(s);
221
                if (contractOrder != RBCollationTables.UNMAPPED) {
222
                    addOrder(c, contractOrder);
223
                } else {
224
                    //
225
                    // We don't have a contracting ordering for the entire string
226
                    // that results from the decomposition, but if we have orders
227
                    // for each individual character, we can add an expanding
228
                    // table entry for the pre-composed character
229
                    //
230
                    boolean allThere = true;
231
                    for (int i = 0; i < s.length(); i++) {
232
                        if (getCharOrder(s.charAt(i)) == RBCollationTables.UNMAPPED) {
233
                            allThere = false;
234
                            break;
235
                        }
236
                    }
237
                    if (allThere) {
238
                        addExpandOrder(c, s, RBCollationTables.UNMAPPED);
239
                    }
240
                }
241
            }
242
        }
243
    }
244

245
    /**
246
     * Look up for unmapped values in the expanded character table.
247
     *
248
     * When the expanding character tables are built by addExpandOrder,
249
     * it doesn't know what the final ordering of each character
250
     * in the expansion will be.  Instead, it just puts the raw character
251
     * code into the table, adding CHARINDEX as a flag.  Now that we've
252
     * finished building the mapping table, we can go back and look up
253
     * that character to see what its real collation order is and
254
     * stick that into the expansion table.  That lets us avoid doing
255
     * a two-stage lookup later.
256
     */
257
    private final void commit()
258
    {
259
        if (expandTable != null) {
260
            for (int i = 0; i < expandTable.size(); i++) {
261
                int[] valueList = expandTable.elementAt(i);
262
                for (int j = 0; j < valueList.length; j++) {
263
                    int order = valueList[j];
264
                    if (order < RBCollationTables.EXPANDCHARINDEX && order > CHARINDEX) {
265
                        // found a expanding character that isn't filled in yet
266
                        int ch = order - CHARINDEX;
267

268
                        // Get the real values for the non-filled entry
269
                        int realValue = getCharOrder(ch);
270

271
                        if (realValue == RBCollationTables.UNMAPPED) {
272
                            // The real value is still unmapped, maybe it's ignorable
273
                            valueList[j] = IGNORABLEMASK & ch;
274
                        } else {
275
                            // just fill in the value
276
                            valueList[j] = realValue;
277
                        }
278
                    }
279
                }
280
            }
281
        }
282
    }
283
    /**
284
     *  Increment of the last order based on the comparison level.
285
     */
286
    private final int increment(int aStrength, int lastValue)
287
    {
288
        switch(aStrength)
289
        {
290
        case Collator.PRIMARY:
291
            // increment priamry order  and mask off secondary and tertiary difference
292
            lastValue += PRIMARYORDERINCREMENT;
293
            lastValue &= RBCollationTables.PRIMARYORDERMASK;
294
            isOverIgnore = true;
295
            break;
296
        case Collator.SECONDARY:
297
            // increment secondary order and mask off tertiary difference
298
            lastValue += SECONDARYORDERINCREMENT;
299
            lastValue &= RBCollationTables.SECONDARYDIFFERENCEONLY;
300
            // record max # of ignorable chars with secondary difference
301
            if (!isOverIgnore)
302
                maxSecOrder++;
303
            break;
304
        case Collator.TERTIARY:
305
            // increment tertiary order
306
            lastValue += TERTIARYORDERINCREMENT;
307
            // record max # of ignorable chars with tertiary difference
308
            if (!isOverIgnore)
309
                maxTerOrder++;
310
            break;
311
        }
312
        return lastValue;
313
    }
314

315
    /**
316
     *  Adds a character and its designated order into the collation table.
317
     */
318
    private final void addOrder(int ch, int anOrder)
319
    {
320
        // See if the char already has an order in the mapping table
321
        int order = mapping.elementAt(ch);
322

323
        if (order >= RBCollationTables.CONTRACTCHARINDEX) {
324
            // There's already an entry for this character that points to a contracting
325
            // character table.  Instead of adding the character directly to the mapping
326
            // table, we must add it to the contract table instead.
327
            int length = 1;
328
            if (Character.isSupplementaryCodePoint(ch)) {
329
                length = Character.toChars(ch, keyBuf, 0);
330
            } else {
331
                keyBuf[0] = (char)ch;
332
            }
333
            addContractOrder(new String(keyBuf, 0, length), anOrder);
334
        } else {
335
            // add the entry to the mapping table,
336
            // the same later entry replaces the previous one
337
            mapping.setElementAt(ch, anOrder);
338
        }
339
    }
340

341
    private final void addContractOrder(String groupChars, int anOrder) {
342
        addContractOrder(groupChars, anOrder, true);
343
    }
344

345
    /**
346
     *  Adds the contracting string into the collation table.
347
     */
348
    private final void addContractOrder(String groupChars, int anOrder,
349
                                          boolean fwd)
350
    {
351
        if (contractTable == null) {
352
            contractTable = new Vector<>(INITIALTABLESIZE);
353
        }
354

355
        //initial character
356
        int ch = groupChars.codePointAt(0);
357
        /*
358
        char ch0 = groupChars.charAt(0);
359
        int ch = Character.isHighSurrogate(ch0)?
360
          Character.toCodePoint(ch0, groupChars.charAt(1)):ch0;
361
          */
362
        // See if the initial character of the string already has a contract table.
363
        int entry = mapping.elementAt(ch);
364
        Vector<EntryPair> entryTable = getContractValuesImpl(entry - RBCollationTables.CONTRACTCHARINDEX);
365

366
        if (entryTable == null) {
367
            // We need to create a new table of contract entries for this base char
368
            int tableIndex = RBCollationTables.CONTRACTCHARINDEX + contractTable.size();
369
            entryTable = new Vector<>(INITIALTABLESIZE);
370
            contractTable.addElement(entryTable);
371

372
            // Add the initial character's current ordering first. then
373
            // update its mapping to point to this contract table
374
            entryTable.addElement(new EntryPair(groupChars.substring(0,Character.charCount(ch)), entry));
375
            mapping.setElementAt(ch, tableIndex);
376
        }
377

378
        // Now add (or replace) this string in the table
379
        int index = RBCollationTables.getEntry(entryTable, groupChars, fwd);
380
        if (index != RBCollationTables.UNMAPPED) {
381
            EntryPair pair = entryTable.elementAt(index);
382
            pair.value = anOrder;
383
        } else {
384
            EntryPair pair = entryTable.lastElement();
385

386
            // NOTE:  This little bit of logic is here to speed CollationElementIterator
387
            // .nextContractChar().  This code ensures that the longest sequence in
388
            // this list is always the _last_ one in the list.  This keeps
389
            // nextContractChar() from having to search the entire list for the longest
390
            // sequence.
391
            if (groupChars.length() > pair.entryName.length()) {
392
                entryTable.addElement(new EntryPair(groupChars, anOrder, fwd));
393
            } else {
394
                entryTable.insertElementAt(new EntryPair(groupChars, anOrder,
395
                        fwd), entryTable.size() - 1);
396
            }
397
        }
398

399
        // If this was a forward mapping for a contracting string, also add a
400
        // reverse mapping for it, so that CollationElementIterator.previous
401
        // can work right
402
        if (fwd && groupChars.length() > 1) {
403
            addContractFlags(groupChars);
404
            addContractOrder(new StringBuffer(groupChars).reverse().toString(),
405
                             anOrder, false);
406
        }
407
    }
408

409
    /**
410
     * If the given string has been specified as a contracting string
411
     * in this collation table, return its ordering.
412
     * Otherwise return UNMAPPED.
413
     */
414
    private int getContractOrder(String groupChars)
415
    {
416
        int result = RBCollationTables.UNMAPPED;
417
        if (contractTable != null) {
418
            int ch = groupChars.codePointAt(0);
419
            /*
420
            char ch0 = groupChars.charAt(0);
421
            int ch = Character.isHighSurrogate(ch0)?
422
              Character.toCodePoint(ch0, groupChars.charAt(1)):ch0;
423
              */
424
            Vector<EntryPair> entryTable = getContractValues(ch);
425
            if (entryTable != null) {
426
                int index = RBCollationTables.getEntry(entryTable, groupChars, true);
427
                if (index != RBCollationTables.UNMAPPED) {
428
                    EntryPair pair = entryTable.elementAt(index);
429
                    result = pair.value;
430
                }
431
            }
432
        }
433
        return result;
434
    }
435

436
    private final int getCharOrder(int ch) {
437
        int order = mapping.elementAt(ch);
438

439
        if (order >= RBCollationTables.CONTRACTCHARINDEX) {
440
            Vector<EntryPair> groupList = getContractValuesImpl(order - RBCollationTables.CONTRACTCHARINDEX);
441
            EntryPair pair = groupList.firstElement();
442
            order = pair.value;
443
        }
444
        return order;
445
    }
446

447
    /**
448
     *  Get the entry of hash table of the contracting string in the collation
449
     *  table.
450
     *  @param ch the starting character of the contracting string
451
     */
452
    private Vector<EntryPair> getContractValues(int ch)
453
    {
454
        int index = mapping.elementAt(ch);
455
        return getContractValuesImpl(index - RBCollationTables.CONTRACTCHARINDEX);
456
    }
457

458
    private Vector<EntryPair> getContractValuesImpl(int index)
459
    {
460
        if (index >= 0)
461
        {
462
            return contractTable.elementAt(index);
463
        }
464
        else // not found
465
        {
466
            return null;
467
        }
468
    }
469

470
    /**
471
     *  Adds the expanding string into the collation table.
472
     */
473
    private final void addExpandOrder(String contractChars,
474
                                String expandChars,
475
                                int anOrder) throws ParseException
476
    {
477
        // Create an expansion table entry
478
        int tableIndex = addExpansion(anOrder, expandChars);
479

480
        // And add its index into the main mapping table
481
        if (contractChars.length() > 1) {
482
            char ch = contractChars.charAt(0);
483
            if (Character.isHighSurrogate(ch) && contractChars.length() == 2) {
484
                char ch2 = contractChars.charAt(1);
485
                if (Character.isLowSurrogate(ch2)) {
486
                    //only add into table when it is a legal surrogate
487
                    addOrder(Character.toCodePoint(ch, ch2), tableIndex);
488
                }
489
            } else {
490
                addContractOrder(contractChars, tableIndex);
491
            }
492
        } else {
493
            addOrder(contractChars.charAt(0), tableIndex);
494
        }
495
    }
496

497
    private final void addExpandOrder(int ch, String expandChars, int anOrder)
498
      throws ParseException
499
    {
500
        int tableIndex = addExpansion(anOrder, expandChars);
501
        addOrder(ch, tableIndex);
502
    }
503

504
    /**
505
     * Create a new entry in the expansion table that contains the orderings
506
     * for the given characers.  If anOrder is valid, it is added to the
507
     * beginning of the expanded list of orders.
508
     */
509
    private int addExpansion(int anOrder, String expandChars) {
510
        if (expandTable == null) {
511
            expandTable = new Vector<>(INITIALTABLESIZE);
512
        }
513

514
        // If anOrder is valid, we want to add it at the beginning of the list
515
        int offset = (anOrder == RBCollationTables.UNMAPPED) ? 0 : 1;
516

517
        int[] valueList = new int[expandChars.length() + offset];
518
        if (offset == 1) {
519
            valueList[0] = anOrder;
520
        }
521

522
        int j = offset;
523
        for (int i = 0; i < expandChars.length(); i++) {
524
            char ch0 = expandChars.charAt(i);
525
            char ch1;
526
            int ch;
527
            if (Character.isHighSurrogate(ch0)) {
528
                if (++i == expandChars.length() ||
529
                    !Character.isLowSurrogate(ch1=expandChars.charAt(i))) {
530
                    //ether we are missing the low surrogate or the next char
531
                    //is not a legal low surrogate, so stop loop
532
                    break;
533
                }
534
                ch = Character.toCodePoint(ch0, ch1);
535

536
            } else {
537
                ch = ch0;
538
            }
539

540
            int mapValue = getCharOrder(ch);
541

542
            if (mapValue != RBCollationTables.UNMAPPED) {
543
                valueList[j++] = mapValue;
544
            } else {
545
                // can't find it in the table, will be filled in by commit().
546
                valueList[j++] = CHARINDEX + ch;
547
            }
548
        }
549
        if (j < valueList.length) {
550
            //we had at least one supplementary character, the size of valueList
551
            //is bigger than it really needs...
552
            int[] tmpBuf = new int[j];
553
            while (--j >= 0) {
554
                tmpBuf[j] = valueList[j];
555
            }
556
            valueList = tmpBuf;
557
        }
558
        // Add the expanding char list into the expansion table.
559
        int tableIndex = RBCollationTables.EXPANDCHARINDEX + expandTable.size();
560
        expandTable.addElement(valueList);
561

562
        return tableIndex;
563
    }
564

565
    private void addContractFlags(String chars) {
566
        char c0;
567
        int c;
568
        int len = chars.length();
569
        for (int i = 0; i < len; i++) {
570
            c0 = chars.charAt(i);
571
            c = Character.isHighSurrogate(c0)
572
                          ?Character.toCodePoint(c0, chars.charAt(++i))
573
                          :c0;
574
            contractFlags.put(c, 1);
575
        }
576
    }
577

578
    // ==============================================================
579
    // constants
580
    // ==============================================================
581
    static final int CHARINDEX = 0x70000000;  // need look up in .commit()
582

583
    private static final int IGNORABLEMASK = 0x0000ffff;
584
    private static final int PRIMARYORDERINCREMENT = 0x00010000;
585
    private static final int SECONDARYORDERINCREMENT = 0x00000100;
586
    private static final int TERTIARYORDERINCREMENT = 0x00000001;
587
    private static final int INITIALTABLESIZE = 20;
588
    private static final int MAXKEYSIZE = 5;
589

590
    // ==============================================================
591
    // instance variables
592
    // ==============================================================
593

594
    // variables used by the build process
595
    private RBCollationTables.BuildAPI tables = null;
596
    private MergeCollation mPattern = null;
597
    private boolean isOverIgnore = false;
598
    private char[] keyBuf = new char[MAXKEYSIZE];
599
    private IntHashtable contractFlags = new IntHashtable(100);
600

601
    // "shadow" copies of the instance variables in RBCollationTables
602
    // (the values in these variables are copied back into RBCollationTables
603
    // at the end of the build process)
604
    private boolean frenchSec = false;
605
    private boolean seAsianSwapping = false;
606

607
    private UCompactIntArray mapping = null;
608
    private Vector<Vector<EntryPair>>   contractTable = null;
609
    private Vector<int[]>   expandTable = null;
610

611
    private short maxSecOrder = 0;
612
    private short maxTerOrder = 0;
613
}
614

615
Product

Resources

Company