CoCalc -- CollationElementIterator.java

GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/java/text/CollationElementIterator.java
⁴¹¹⁵² views
1
/*
2
 * Copyright (c) 1996, 2020, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26
/*
27
 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28
 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
29
 *
30
 *   The original version of this source code and documentation is copyrighted
31
 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32
 * materials are provided under terms of a License Agreement between Taligent
33
 * and Sun. This technology is protected by multiple US and International
34
 * patents. This notice and attribution to Taligent may not be removed.
35
 *   Taligent is a registered trademark of Taligent, Inc.
36
 *
37
 */
38

39
package java.text;
40

41
import java.lang.Character;
42
import java.util.Vector;
43
import sun.text.CollatorUtilities;
44
import jdk.internal.icu.text.NormalizerBase;
45

46
/**
47
 * The {@code CollationElementIterator} class is used as an iterator
48
 * to walk through each character of an international string. Use the iterator
49
 * to return the ordering priority of the positioned character. The ordering
50
 * priority of a character, which we refer to as a key, defines how a character
51
 * is collated in the given collation object.
52
 *
53
 * <p>
54
 * For example, consider the following in Spanish:
55
 * <blockquote>
56
 * <pre>
57
 * "ca" &rarr; the first key is key('c') and second key is key('a').
58
 * "cha" &rarr; the first key is key('ch') and second key is key('a').
59
 * </pre>
60
 * </blockquote>
61
 * And in German,
62
 * <blockquote>
63
 * <pre>
64
 * "\u00e4b" &rarr; the first key is key('a'), the second key is key('e'), and
65
 * the third key is key('b').
66
 * </pre>
67
 * </blockquote>
68
 * The key of a character is an integer composed of primary order(short),
69
 * secondary order(byte), and tertiary order(byte). Java strictly defines
70
 * the size and signedness of its primitive data types. Therefore, the static
71
 * functions {@code primaryOrder}, {@code secondaryOrder}, and
72
 * {@code tertiaryOrder} return {@code int}, {@code short},
73
 * and {@code short} respectively to ensure the correctness of the key
74
 * value.
75
 *
76
 * <p>
77
 * Example of the iterator usage,
78
 * <blockquote>
79
 * <pre>
80
 *
81
 *  String testString = "This is a test";
82
 *  Collator col = Collator.getInstance();
83
 *  if (col instanceof RuleBasedCollator) {
84
 *      RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)col;
85
 *      CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString);
86
 *      int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next());
87
 *          :
88
 *  }
89
 * </pre>
90
 * </blockquote>
91
 *
92
 * <p>
93
 * {@code CollationElementIterator.next} returns the collation order
94
 * of the next character. A collation order consists of primary order,
95
 * secondary order and tertiary order. The data type of the collation
96
 * order is <strong>int</strong>. The first 16 bits of a collation order
97
 * is its primary order; the next 8 bits is the secondary order and the
98
 * last 8 bits is the tertiary order.
99
 *
100
 * <p><b>Note:</b> {@code CollationElementIterator} is a part of
101
 * {@code RuleBasedCollator} implementation. It is only usable
102
 * with {@code RuleBasedCollator} instances.
103
 *
104
 * @see                Collator
105
 * @see                RuleBasedCollator
106
 * @author             Helena Shih, Laura Werner, Richard Gillam
107
 * @since 1.1
108
 */
109
public final class CollationElementIterator
110
{
111
    /**
112
     * Null order which indicates the end of string is reached by the
113
     * cursor.
114
     */
115
    public static final int NULLORDER = 0xffffffff;
116

117
    /**
118
     * CollationElementIterator constructor.  This takes the source string and
119
     * the collation object.  The cursor will walk thru the source string based
120
     * on the predefined collation rules.  If the source string is empty,
121
     * NULLORDER will be returned on the calls to next().
122
     * @param sourceText the source string.
123
     * @param owner the collation object.
124
     */
125
    CollationElementIterator(String sourceText, RuleBasedCollator owner) {
126
        this.owner = owner;
127
        ordering = owner.getTables();
128
        if (!sourceText.isEmpty()) {
129
            NormalizerBase.Mode mode =
130
                CollatorUtilities.toNormalizerMode(owner.getDecomposition());
131
            text = new NormalizerBase(sourceText, mode);
132
        }
133
    }
134

135
    /**
136
     * CollationElementIterator constructor.  This takes the source string and
137
     * the collation object.  The cursor will walk thru the source string based
138
     * on the predefined collation rules.  If the source string is empty,
139
     * NULLORDER will be returned on the calls to next().
140
     * @param sourceText the source string.
141
     * @param owner the collation object.
142
     */
143
    CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner) {
144
        this.owner = owner;
145
        ordering = owner.getTables();
146
        NormalizerBase.Mode mode =
147
            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
148
        text = new NormalizerBase(sourceText, mode);
149
    }
150

151
    /**
152
     * Resets the cursor to the beginning of the string.  The next call
153
     * to next() will return the first collation element in the string.
154
     */
155
    public void reset()
156
    {
157
        if (text != null) {
158
            text.reset();
159
            NormalizerBase.Mode mode =
160
                CollatorUtilities.toNormalizerMode(owner.getDecomposition());
161
            text.setMode(mode);
162
        }
163
        buffer = null;
164
        expIndex = 0;
165
        swapOrder = 0;
166
    }
167

168
    /**
169
     * Get the next collation element in the string.  <p>This iterator iterates
170
     * over a sequence of collation elements that were built from the string.
171
     * Because there isn't necessarily a one-to-one mapping from characters to
172
     * collation elements, this doesn't mean the same thing as "return the
173
     * collation element [or ordering priority] of the next character in the
174
     * string".</p>
175
     * <p>This function returns the collation element that the iterator is currently
176
     * pointing to and then updates the internal pointer to point to the next element.
177
     * previous() updates the pointer first and then returns the element.  This
178
     * means that when you change direction while iterating (i.e., call next() and
179
     * then call previous(), or call previous() and then call next()), you'll get
180
     * back the same element twice.</p>
181
     *
182
     * @return the next collation element
183
     */
184
    public int next()
185
    {
186
        if (text == null) {
187
            return NULLORDER;
188
        }
189
        NormalizerBase.Mode textMode = text.getMode();
190
        // convert the owner's mode to something the Normalizer understands
191
        NormalizerBase.Mode ownerMode =
192
            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
193
        if (textMode != ownerMode) {
194
            text.setMode(ownerMode);
195
        }
196

197
        // if buffer contains any decomposed char values
198
        // return their strength orders before continuing in
199
        // the Normalizer's CharacterIterator.
200
        if (buffer != null) {
201
            if (expIndex < buffer.length) {
202
                return strengthOrder(buffer[expIndex++]);
203
            } else {
204
                buffer = null;
205
                expIndex = 0;
206
            }
207
        } else if (swapOrder != 0) {
208
            if (Character.isSupplementaryCodePoint(swapOrder)) {
209
                char[] chars = Character.toChars(swapOrder);
210
                swapOrder = chars[1];
211
                return chars[0] << 16;
212
            }
213
            int order = swapOrder << 16;
214
            swapOrder = 0;
215
            return order;
216
        }
217
        int ch  = text.next();
218

219
        // are we at the end of Normalizer's text?
220
        if (ch == NormalizerBase.DONE) {
221
            return NULLORDER;
222
        }
223

224
        int value = ordering.getUnicodeOrder(ch);
225
        if (value == RuleBasedCollator.UNMAPPED) {
226
            swapOrder = ch;
227
            return UNMAPPEDCHARVALUE;
228
        }
229
        else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
230
            value = nextContractChar(ch);
231
        }
232
        if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
233
            buffer = ordering.getExpandValueList(value);
234
            expIndex = 0;
235
            value = buffer[expIndex++];
236
        }
237

238
        if (ordering.isSEAsianSwapping()) {
239
            int consonant;
240
            if (isThaiPreVowel(ch)) {
241
                consonant = text.next();
242
                if (isThaiBaseConsonant(consonant)) {
243
                    buffer = makeReorderedBuffer(consonant, value, buffer, true);
244
                    value = buffer[0];
245
                    expIndex = 1;
246
                } else if (consonant != NormalizerBase.DONE) {
247
                    text.previous();
248
                }
249
            }
250
            if (isLaoPreVowel(ch)) {
251
                consonant = text.next();
252
                if (isLaoBaseConsonant(consonant)) {
253
                    buffer = makeReorderedBuffer(consonant, value, buffer, true);
254
                    value = buffer[0];
255
                    expIndex = 1;
256
                } else if (consonant != NormalizerBase.DONE) {
257
                    text.previous();
258
                }
259
            }
260
        }
261

262
        return strengthOrder(value);
263
    }
264

265
    /**
266
     * Get the previous collation element in the string.  <p>This iterator iterates
267
     * over a sequence of collation elements that were built from the string.
268
     * Because there isn't necessarily a one-to-one mapping from characters to
269
     * collation elements, this doesn't mean the same thing as "return the
270
     * collation element [or ordering priority] of the previous character in the
271
     * string".</p>
272
     * <p>This function updates the iterator's internal pointer to point to the
273
     * collation element preceding the one it's currently pointing to and then
274
     * returns that element, while next() returns the current element and then
275
     * updates the pointer.  This means that when you change direction while
276
     * iterating (i.e., call next() and then call previous(), or call previous()
277
     * and then call next()), you'll get back the same element twice.</p>
278
     *
279
     * @return the previous collation element
280
     * @since 1.2
281
     */
282
    public int previous()
283
    {
284
        if (text == null) {
285
            return NULLORDER;
286
        }
287
        NormalizerBase.Mode textMode = text.getMode();
288
        // convert the owner's mode to something the Normalizer understands
289
        NormalizerBase.Mode ownerMode =
290
            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
291
        if (textMode != ownerMode) {
292
            text.setMode(ownerMode);
293
        }
294
        if (buffer != null) {
295
            if (expIndex > 0) {
296
                return strengthOrder(buffer[--expIndex]);
297
            } else {
298
                buffer = null;
299
                expIndex = 0;
300
            }
301
        } else if (swapOrder != 0) {
302
            if (Character.isSupplementaryCodePoint(swapOrder)) {
303
                char[] chars = Character.toChars(swapOrder);
304
                swapOrder = chars[1];
305
                return chars[0] << 16;
306
            }
307
            int order = swapOrder << 16;
308
            swapOrder = 0;
309
            return order;
310
        }
311
        int ch = text.previous();
312
        if (ch == NormalizerBase.DONE) {
313
            return NULLORDER;
314
        }
315

316
        int value = ordering.getUnicodeOrder(ch);
317

318
        if (value == RuleBasedCollator.UNMAPPED) {
319
            swapOrder = UNMAPPEDCHARVALUE;
320
            return ch;
321
        } else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
322
            value = prevContractChar(ch);
323
        }
324
        if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
325
            buffer = ordering.getExpandValueList(value);
326
            expIndex = buffer.length;
327
            value = buffer[--expIndex];
328
        }
329

330
        if (ordering.isSEAsianSwapping()) {
331
            int vowel;
332
            if (isThaiBaseConsonant(ch)) {
333
                vowel = text.previous();
334
                if (isThaiPreVowel(vowel)) {
335
                    buffer = makeReorderedBuffer(vowel, value, buffer, false);
336
                    expIndex = buffer.length - 1;
337
                    value = buffer[expIndex];
338
                } else {
339
                    text.next();
340
                }
341
            }
342
            if (isLaoBaseConsonant(ch)) {
343
                vowel = text.previous();
344
                if (isLaoPreVowel(vowel)) {
345
                    buffer = makeReorderedBuffer(vowel, value, buffer, false);
346
                    expIndex = buffer.length - 1;
347
                    value = buffer[expIndex];
348
                } else {
349
                    text.next();
350
                }
351
            }
352
        }
353

354
        return strengthOrder(value);
355
    }
356

357
    /**
358
     * Return the primary component of a collation element.
359
     * @param order the collation element
360
     * @return the element's primary component
361
     */
362
    public static final int primaryOrder(int order)
363
    {
364
        order &= RBCollationTables.PRIMARYORDERMASK;
365
        return (order >>> RBCollationTables.PRIMARYORDERSHIFT);
366
    }
367
    /**
368
     * Return the secondary component of a collation element.
369
     * @param order the collation element
370
     * @return the element's secondary component
371
     */
372
    public static final short secondaryOrder(int order)
373
    {
374
        order = order & RBCollationTables.SECONDARYORDERMASK;
375
        return ((short)(order >> RBCollationTables.SECONDARYORDERSHIFT));
376
    }
377
    /**
378
     * Return the tertiary component of a collation element.
379
     * @param order the collation element
380
     * @return the element's tertiary component
381
     */
382
    public static final short tertiaryOrder(int order)
383
    {
384
        return ((short)(order &= RBCollationTables.TERTIARYORDERMASK));
385
    }
386

387
    /**
388
     *  Get the comparison order in the desired strength.  Ignore the other
389
     *  differences.
390
     *  @param order The order value
391
     */
392
    final int strengthOrder(int order)
393
    {
394
        int s = owner.getStrength();
395
        if (s == Collator.PRIMARY)
396
        {
397
            order &= RBCollationTables.PRIMARYDIFFERENCEONLY;
398
        } else if (s == Collator.SECONDARY)
399
        {
400
            order &= RBCollationTables.SECONDARYDIFFERENCEONLY;
401
        }
402
        return order;
403
    }
404

405
    /**
406
     * Sets the iterator to point to the collation element corresponding to
407
     * the specified character (the parameter is a CHARACTER offset in the
408
     * original string, not an offset into its corresponding sequence of
409
     * collation elements).  The value returned by the next call to next()
410
     * will be the collation element corresponding to the specified position
411
     * in the text.  If that position is in the middle of a contracting
412
     * character sequence, the result of the next call to next() is the
413
     * collation element for that sequence.  This means that getOffset()
414
     * is not guaranteed to return the same value as was passed to a preceding
415
     * call to setOffset().
416
     *
417
     * @param newOffset The new character offset into the original text.
418
     * @since 1.2
419
     */
420
    @SuppressWarnings("deprecation") // getBeginIndex, getEndIndex and setIndex are deprecated
421
    public void setOffset(int newOffset)
422
    {
423
        if (text != null) {
424
            if (newOffset < text.getBeginIndex()
425
                || newOffset >= text.getEndIndex()) {
426
                    text.setIndexOnly(newOffset);
427
            } else {
428
                int c = text.setIndex(newOffset);
429

430
                // if the desired character isn't used in a contracting character
431
                // sequence, bypass all the backing-up logic-- we're sitting on
432
                // the right character already
433
                if (ordering.usedInContractSeq(c)) {
434
                    // walk backwards through the string until we see a character
435
                    // that DOESN'T participate in a contracting character sequence
436
                    while (ordering.usedInContractSeq(c)) {
437
                        c = text.previous();
438
                    }
439
                    // now walk forward using this object's next() method until
440
                    // we pass the starting point and set our current position
441
                    // to the beginning of the last "character" before or at
442
                    // our starting position
443
                    int last = text.getIndex();
444
                    while (text.getIndex() <= newOffset) {
445
                        last = text.getIndex();
446
                        next();
447
                    }
448
                    text.setIndexOnly(last);
449
                    // we don't need this, since last is the last index
450
                    // that is the starting of the contraction which encompass
451
                    // newOffset
452
                    // text.previous();
453
                }
454
            }
455
        }
456
        buffer = null;
457
        expIndex = 0;
458
        swapOrder = 0;
459
    }
460

461
    /**
462
     * Returns the character offset in the original text corresponding to the next
463
     * collation element.  (That is, getOffset() returns the position in the text
464
     * corresponding to the collation element that will be returned by the next
465
     * call to next().)  This value will always be the index of the FIRST character
466
     * corresponding to the collation element (a contracting character sequence is
467
     * when two or more characters all correspond to the same collation element).
468
     * This means if you do setOffset(x) followed immediately by getOffset(), getOffset()
469
     * won't necessarily return x.
470
     *
471
     * @return The character offset in the original text corresponding to the collation
472
     * element that will be returned by the next call to next().
473
     * @since 1.2
474
     */
475
    public int getOffset()
476
    {
477
        return (text != null) ? text.getIndex() : 0;
478
    }
479

480

481
    /**
482
     * Return the maximum length of any expansion sequences that end
483
     * with the specified comparison order.
484
     * @param order a collation order returned by previous or next.
485
     * @return the maximum length of any expansion sequences ending
486
     *         with the specified order.
487
     * @since 1.2
488
     */
489
    public int getMaxExpansion(int order)
490
    {
491
        return ordering.getMaxExpansion(order);
492
    }
493

494
    /**
495
     * Set a new string over which to iterate.
496
     *
497
     * @param source  the new source text
498
     * @since 1.2
499
     */
500
    public void setText(String source)
501
    {
502
        buffer = null;
503
        swapOrder = 0;
504
        expIndex = 0;
505
        NormalizerBase.Mode mode =
506
            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
507
        if (text == null) {
508
            text = new NormalizerBase(source, mode);
509
        } else {
510
            text.setMode(mode);
511
            text.setText(source);
512
        }
513
    }
514

515
    /**
516
     * Set a new string over which to iterate.
517
     *
518
     * @param source  the new source text.
519
     * @since 1.2
520
     */
521
    public void setText(CharacterIterator source)
522
    {
523
        buffer = null;
524
        swapOrder = 0;
525
        expIndex = 0;
526
        NormalizerBase.Mode mode =
527
            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
528
        if (text == null) {
529
            text = new NormalizerBase(source, mode);
530
        } else {
531
            text.setMode(mode);
532
            text.setText(source);
533
        }
534
    }
535

536
    //============================================================
537
    // privates
538
    //============================================================
539

540
    /**
541
     * Determine if a character is a Thai vowel (which sorts after
542
     * its base consonant).
543
     */
544
    private static final boolean isThaiPreVowel(int ch) {
545
        return (ch >= 0x0e40) && (ch <= 0x0e44);
546
    }
547

548
    /**
549
     * Determine if a character is a Thai base consonant
550
     */
551
    private static final boolean isThaiBaseConsonant(int ch) {
552
        return (ch >= 0x0e01) && (ch <= 0x0e2e);
553
    }
554

555
    /**
556
     * Determine if a character is a Lao vowel (which sorts after
557
     * its base consonant).
558
     */
559
    private static final boolean isLaoPreVowel(int ch) {
560
        return (ch >= 0x0ec0) && (ch <= 0x0ec4);
561
    }
562

563
    /**
564
     * Determine if a character is a Lao base consonant
565
     */
566
    private static final boolean isLaoBaseConsonant(int ch) {
567
        return (ch >= 0x0e81) && (ch <= 0x0eae);
568
    }
569

570
    /**
571
     * This method produces a buffer which contains the collation
572
     * elements for the two characters, with colFirst's values preceding
573
     * another character's.  Presumably, the other character precedes colFirst
574
     * in logical order (otherwise you wouldn't need this method would you?).
575
     * The assumption is that the other char's value(s) have already been
576
     * computed.  If this char has a single element it is passed to this
577
     * method as lastValue, and lastExpansion is null.  If it has an
578
     * expansion it is passed in lastExpansion, and colLastValue is ignored.
579
     */
580
    private int[] makeReorderedBuffer(int colFirst,
581
                                      int lastValue,
582
                                      int[] lastExpansion,
583
                                      boolean forward) {
584

585
        int[] result;
586

587
        int firstValue = ordering.getUnicodeOrder(colFirst);
588
        if (firstValue >= RuleBasedCollator.CONTRACTCHARINDEX) {
589
            firstValue = forward? nextContractChar(colFirst) : prevContractChar(colFirst);
590
        }
591

592
        int[] firstExpansion = null;
593
        if (firstValue >= RuleBasedCollator.EXPANDCHARINDEX) {
594
            firstExpansion = ordering.getExpandValueList(firstValue);
595
        }
596

597
        if (!forward) {
598
            int temp1 = firstValue;
599
            firstValue = lastValue;
600
            lastValue = temp1;
601
            int[] temp2 = firstExpansion;
602
            firstExpansion = lastExpansion;
603
            lastExpansion = temp2;
604
        }
605

606
        if (firstExpansion == null && lastExpansion == null) {
607
            result = new int [2];
608
            result[0] = firstValue;
609
            result[1] = lastValue;
610
        }
611
        else {
612
            int firstLength = firstExpansion==null? 1 : firstExpansion.length;
613
            int lastLength = lastExpansion==null? 1 : lastExpansion.length;
614
            result = new int[firstLength + lastLength];
615

616
            if (firstExpansion == null) {
617
                result[0] = firstValue;
618
            }
619
            else {
620
                System.arraycopy(firstExpansion, 0, result, 0, firstLength);
621
            }
622

623
            if (lastExpansion == null) {
624
                result[firstLength] = lastValue;
625
            }
626
            else {
627
                System.arraycopy(lastExpansion, 0, result, firstLength, lastLength);
628
            }
629
        }
630

631
        return result;
632
    }
633

634
    /**
635
     *  Check if a comparison order is ignorable.
636
     *  @return true if a character is ignorable, false otherwise.
637
     */
638
    static final boolean isIgnorable(int order)
639
    {
640
        return ((primaryOrder(order) == 0) ? true : false);
641
    }
642

643
    /**
644
     * Get the ordering priority of the next contracting character in the
645
     * string.
646
     * @param ch the starting character of a contracting character token
647
     * @return the next contracting character's ordering.  Returns NULLORDER
648
     * if the end of string is reached.
649
     */
650
    private int nextContractChar(int ch)
651
    {
652
        // First get the ordering of this single character,
653
        // which is always the first element in the list
654
        Vector<EntryPair> list = ordering.getContractValues(ch);
655
        EntryPair pair = list.firstElement();
656
        int order = pair.value;
657

658
        // find out the length of the longest contracting character sequence in the list.
659
        // There's logic in the builder code to make sure the longest sequence is always
660
        // the last.
661
        pair = list.lastElement();
662
        int maxLength = pair.entryName.length();
663

664
        // (the Normalizer is cloned here so that the seeking we do in the next loop
665
        // won't affect our real position in the text)
666
        NormalizerBase tempText = (NormalizerBase)text.clone();
667

668
        // extract the next maxLength characters in the string (we have to do this using the
669
        // Normalizer to ensure that our offsets correspond to those the rest of the
670
        // iterator is using) and store it in "fragment".
671
        tempText.previous();
672
        key.setLength(0);
673
        int c = tempText.next();
674
        while (maxLength > 0 && c != NormalizerBase.DONE) {
675
            if (Character.isSupplementaryCodePoint(c)) {
676
                key.append(Character.toChars(c));
677
                maxLength -= 2;
678
            } else {
679
                key.append((char)c);
680
                --maxLength;
681
            }
682
            c = tempText.next();
683
        }
684
        String fragment = key.toString();
685
        // now that we have that fragment, iterate through this list looking for the
686
        // longest sequence that matches the characters in the actual text.  (maxLength
687
        // is used here to keep track of the length of the longest sequence)
688
        // Upon exit from this loop, maxLength will contain the length of the matching
689
        // sequence and order will contain the collation-element value corresponding
690
        // to this sequence
691
        maxLength = 1;
692
        for (int i = list.size() - 1; i > 0; i--) {
693
            pair = list.elementAt(i);
694
            if (!pair.fwd)
695
                continue;
696

697
            if (fragment.startsWith(pair.entryName) && pair.entryName.length()
698
                    > maxLength) {
699
                maxLength = pair.entryName.length();
700
                order = pair.value;
701
            }
702
        }
703

704
        // seek our current iteration position to the end of the matching sequence
705
        // and return the appropriate collation-element value (if there was no matching
706
        // sequence, we're already seeked to the right position and order already contains
707
        // the correct collation-element value for the single character)
708
        while (maxLength > 1) {
709
            c = text.next();
710
            maxLength -= Character.charCount(c);
711
        }
712
        return order;
713
    }
714

715
    /**
716
     * Get the ordering priority of the previous contracting character in the
717
     * string.
718
     * @param ch the starting character of a contracting character token
719
     * @return the next contracting character's ordering.  Returns NULLORDER
720
     * if the end of string is reached.
721
     */
722
    private int prevContractChar(int ch)
723
    {
724
        // This function is identical to nextContractChar(), except that we've
725
        // switched things so that the next() and previous() calls on the Normalizer
726
        // are switched and so that we skip entry pairs with the fwd flag turned on
727
        // rather than off.  Notice that we still use append() and startsWith() when
728
        // working on the fragment.  This is because the entry pairs that are used
729
        // in reverse iteration have their names reversed already.
730
        Vector<EntryPair> list = ordering.getContractValues(ch);
731
        EntryPair pair = list.firstElement();
732
        int order = pair.value;
733

734
        pair = list.lastElement();
735
        int maxLength = pair.entryName.length();
736

737
        NormalizerBase tempText = (NormalizerBase)text.clone();
738

739
        tempText.next();
740
        key.setLength(0);
741
        int c = tempText.previous();
742
        while (maxLength > 0 && c != NormalizerBase.DONE) {
743
            if (Character.isSupplementaryCodePoint(c)) {
744
                key.append(Character.toChars(c));
745
                maxLength -= 2;
746
            } else {
747
                key.append((char)c);
748
                --maxLength;
749
            }
750
            c = tempText.previous();
751
        }
752
        String fragment = key.toString();
753

754
        maxLength = 1;
755
        for (int i = list.size() - 1; i > 0; i--) {
756
            pair = list.elementAt(i);
757
            if (pair.fwd)
758
                continue;
759

760
            if (fragment.startsWith(pair.entryName) && pair.entryName.length()
761
                    > maxLength) {
762
                maxLength = pair.entryName.length();
763
                order = pair.value;
764
            }
765
        }
766

767
        while (maxLength > 1) {
768
            c = text.previous();
769
            maxLength -= Character.charCount(c);
770
        }
771
        return order;
772
    }
773

774
    static final int UNMAPPEDCHARVALUE = 0x7FFF0000;
775

776
    private NormalizerBase text = null;
777
    private int[] buffer = null;
778
    private int expIndex = 0;
779
    private StringBuffer key = new StringBuffer(5);
780
    private int swapOrder = 0;
781
    private RBCollationTables ordering;
782
    private RuleBasedCollator owner;
783
}
784

785
Product

Resources

Company