Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/test/jdk/java/text/Collator/Regression.java
41149 views
1
/*
2
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation.
8
*
9
* This code is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
* version 2 for more details (a copy is included in the LICENSE file that
13
* accompanied this code).
14
*
15
* You should have received a copy of the GNU General Public License version
16
* 2 along with this work; if not, write to the Free Software Foundation,
17
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
*
19
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
* or visit www.oracle.com if you need additional information or have any
21
* questions.
22
*/
23
24
/**
25
* @test
26
* @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154
27
* 4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241
28
* 4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736
29
* 4133509 4139572 4141640 4179126 4179686 4244884 4663220
30
* @library /java/text/testlib
31
* @summary Regression tests for Collation and associated classes
32
* @modules jdk.localedata
33
*/
34
/*
35
(C) Copyright Taligent, Inc. 1996 - All Rights Reserved
36
(C) Copyright IBM Corp. 1996 - All Rights Reserved
37
38
The original version of this source code and documentation is copyrighted and
39
owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are
40
provided under terms of a License Agreement between Taligent and Sun. This
41
technology is protected by multiple US and International patents. This notice and
42
attribution to Taligent may not be removed.
43
Taligent is a registered trademark of Taligent, Inc.
44
*/
45
46
import java.text.*;
47
import java.util.Locale;
48
import java.util.Vector;
49
50
51
public class Regression extends CollatorTest {
52
53
public static void main(String[] args) throws Exception {
54
new Regression().run(args);
55
}
56
57
// CollationElementIterator.reset() doesn't work
58
//
59
public void Test4048446() {
60
CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
61
CollationElementIterator i2 = en_us.getCollationElementIterator(test1);
62
63
while ( i1.next() != CollationElementIterator.NULLORDER ) {
64
}
65
i1.reset();
66
67
assertEqual(i1, i2);
68
}
69
70
71
// Collator -> rules -> Collator round-trip broken for expanding characters
72
//
73
public void Test4051866() throws ParseException {
74
// Build a collator containing expanding characters
75
RuleBasedCollator c1 = new RuleBasedCollator("< o "
76
+"& oe ,o\u3080"
77
+"& oe ,\u1530 ,O"
78
+"& OE ,O\u3080"
79
+"& OE ,\u1520"
80
+"< p ,P");
81
82
// Build another using the rules from the first
83
RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules());
84
85
// Make sure they're the same
86
if (!c1.getRules().equals(c2.getRules())) {
87
errln("Rules are not equal");
88
}
89
}
90
91
// Collator thinks "black-bird" == "black"
92
//
93
public void Test4053636() {
94
if (en_us.equals("black-bird","black")) {
95
errln("black-bird == black");
96
}
97
}
98
99
100
// CollationElementIterator will not work correctly if the associated
101
// Collator object's mode is changed
102
//
103
public void Test4054238() {
104
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
105
106
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
107
CollationElementIterator i1 = en_us.getCollationElementIterator(test3);
108
109
c.setDecomposition(Collator.NO_DECOMPOSITION);
110
CollationElementIterator i2 = en_us.getCollationElementIterator(test3);
111
112
// At this point, BOTH iterators should use NO_DECOMPOSITION, since the
113
// collator itself is in that mode
114
assertEqual(i1, i2);
115
}
116
117
// Collator.IDENTICAL documented but not implemented
118
//
119
public void Test4054734() {
120
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
121
try {
122
c.setStrength(Collator.IDENTICAL);
123
}
124
catch (Exception e) {
125
errln("Caught " + e.toString() + " setting Collator.IDENTICAL");
126
}
127
128
String[] decomp = {
129
"\u0001", "<", "\u0002",
130
"\u0001", "=", "\u0001",
131
"A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
132
"\u00C0", "=", "A\u0300" // Decomp should make these equal
133
};
134
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
135
compareArray(c, decomp);
136
137
String[] nodecomp = {
138
"\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave
139
};
140
c.setDecomposition(Collator.NO_DECOMPOSITION);
141
compareArray(c, nodecomp);
142
}
143
144
// Full Decomposition mode not implemented
145
//
146
public void Test4054736() {
147
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
148
c.setDecomposition(Collator.FULL_DECOMPOSITION);
149
150
String[] tests = {
151
"\uFB4f", "=", "\u05D0\u05DC", // Alef-Lamed vs. Alef, Lamed
152
};
153
154
compareArray(c, tests);
155
}
156
157
// Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean
158
//
159
public void Test4058613() {
160
// Creating a default collator doesn't work when Korean is the default
161
// locale
162
163
Locale oldDefault = Locale.getDefault();
164
165
Locale.setDefault( Locale.KOREAN );
166
try {
167
Collator c = Collator.getInstance();
168
169
// Since the fix to this bug was to turn of decomposition for Korean collators,
170
// ensure that's what we got
171
if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
172
errln("Decomposition is not set to NO_DECOMPOSITION");
173
}
174
}
175
finally {
176
Locale.setDefault(oldDefault);
177
}
178
}
179
180
// RuleBasedCollator.getRules does not return the exact pattern as input
181
// for expanding character sequences
182
//
183
public void Test4059820() {
184
RuleBasedCollator c = null;
185
try {
186
c = new RuleBasedCollator("< a < b , c/a < d < z");
187
} catch (ParseException e) {
188
errln("Exception building collator: " + e.toString());
189
return;
190
}
191
if ( c.getRules().indexOf("c/a") == -1) {
192
errln("returned rules do not contain 'c/a'");
193
}
194
}
195
196
// MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
197
//
198
public void Test4060154() {
199
RuleBasedCollator c = null;
200
try {
201
c = new RuleBasedCollator("< g, G < h, H < i, I < j, J"
202
+ " & H < \u0131, \u0130, i, I" );
203
} catch (ParseException e) {
204
errln("Exception building collator: " + e.toString());
205
return;
206
}
207
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
208
209
String[] tertiary = {
210
"A", "<", "B",
211
"H", "<", "\u0131",
212
"H", "<", "I",
213
"\u0131", "<", "\u0130",
214
"\u0130", "<", "i",
215
"\u0130", ">", "H",
216
};
217
c.setStrength(Collator.TERTIARY);
218
compareArray(c, tertiary);
219
220
String[] secondary = {
221
"H", "<", "I",
222
"\u0131", "=", "\u0130",
223
};
224
c.setStrength(Collator.PRIMARY);
225
compareArray(c, secondary);
226
};
227
228
// Secondary/Tertiary comparison incorrect in French Secondary
229
//
230
public void Test4062418() throws ParseException {
231
RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
232
c.setStrength(Collator.SECONDARY);
233
234
String[] tests = {
235
"p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
236
};
237
238
compareArray(c, tests);
239
}
240
241
// Collator.compare() method broken if either string contains spaces
242
//
243
public void Test4065540() {
244
if (en_us.compare("abcd e", "abcd f") == 0) {
245
errln("'abcd e' == 'abcd f'");
246
}
247
}
248
249
// Unicode characters need to be recursively decomposed to get the
250
// correct result. For example,
251
// u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
252
//
253
public void Test4066189() {
254
String test1 = "\u1EB1";
255
String test2 = "a\u0306\u0300";
256
257
RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone();
258
c1.setDecomposition(Collator.FULL_DECOMPOSITION);
259
CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
260
261
RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone();
262
c2.setDecomposition(Collator.NO_DECOMPOSITION);
263
CollationElementIterator i2 = en_us.getCollationElementIterator(test2);
264
265
assertEqual(i1, i2);
266
}
267
268
// French secondary collation checking at the end of compare iteration fails
269
//
270
public void Test4066696() {
271
RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
272
c.setStrength(Collator.SECONDARY);
273
274
String[] tests = {
275
"\u00e0", "<", "\u01fa", // a-grave < A-ring-acute
276
};
277
278
compareArray(c, tests);
279
}
280
281
282
// Bad canonicalization of same-class combining characters
283
//
284
public void Test4076676() {
285
// These combining characters are all in the same class, so they should not
286
// be reordered, and they should compare as unequal.
287
String s1 = "A\u0301\u0302\u0300";
288
String s2 = "A\u0302\u0300\u0301";
289
290
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
291
c.setStrength(Collator.TERTIARY);
292
293
if (c.compare(s1,s2) == 0) {
294
errln("Same-class combining chars were reordered");
295
}
296
}
297
298
299
// RuleBasedCollator.equals(null) throws NullPointerException
300
//
301
public void Test4079231() {
302
try {
303
if (en_us.equals(null)) {
304
errln("en_us.equals(null) returned true");
305
}
306
}
307
catch (Exception e) {
308
errln("en_us.equals(null) threw " + e.toString());
309
}
310
}
311
312
// RuleBasedCollator breaks on "< a < bb" rule
313
//
314
public void Test4078588() throws ParseException {
315
RuleBasedCollator rbc=new RuleBasedCollator("< a < bb");
316
317
int result = rbc.compare("a","bb");
318
319
if (result != -1) {
320
errln("Compare(a,bb) returned " + result + "; expected -1");
321
}
322
}
323
324
// Combining characters in different classes not reordered properly.
325
//
326
public void Test4081866() throws ParseException {
327
// These combining characters are all in different classes,
328
// so they should be reordered and the strings should compare as equal.
329
String s1 = "A\u0300\u0316\u0327\u0315";
330
String s2 = "A\u0327\u0316\u0315\u0300";
331
332
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
333
c.setStrength(Collator.TERTIARY);
334
335
// Now that the default collators are set to NO_DECOMPOSITION
336
// (as a result of fixing bug 4114077), we must set it explicitly
337
// when we're testing reordering behavior. -- lwerner, 5/5/98
338
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
339
340
if (c.compare(s1,s2) != 0) {
341
errln("Combining chars were not reordered");
342
}
343
}
344
345
// string comparison errors in Scandinavian collators
346
//
347
public void Test4087241() {
348
RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(
349
new Locale("da", "DK"));
350
c.setStrength(Collator.SECONDARY);
351
352
String[] tests = {
353
"\u007a", "<", "\u00e6", // z < ae
354
"a\u0308", "<", "a\u030a", // a-unlaut < a-ring
355
"Y", "<", "u\u0308", // Y < u-umlaut
356
};
357
358
compareArray(c, tests);
359
}
360
361
// CollationKey takes ignorable strings into account when it shouldn't
362
//
363
public void Test4087243() {
364
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
365
c.setStrength(Collator.TERTIARY);
366
367
String[] tests = {
368
"123", "=", "123\u0001", // 1 2 3 = 1 2 3 ctrl-A
369
};
370
371
compareArray(c, tests);
372
}
373
374
// Mu/micro conflict
375
// Micro symbol and greek lowercase letter Mu should sort identically
376
//
377
public void Test4092260() {
378
Collator c = Collator.getInstance(new Locale("el", ""));
379
380
// will only be equal when FULL_DECOMPOSITION is used
381
c.setDecomposition(Collator.FULL_DECOMPOSITION);
382
383
String[] tests = {
384
"\u00B5", "=", "\u03BC",
385
};
386
387
compareArray(c, tests);
388
}
389
390
void Test4095316() {
391
Collator c = Collator.getInstance(new Locale("el", "GR"));
392
c.setStrength(Collator.TERTIARY);
393
// javadocs for RuleBasedCollator clearly specify that characters containing compatability
394
// chars MUST use FULL_DECOMPOSITION to get accurate comparisons.
395
c.setDecomposition(Collator.FULL_DECOMPOSITION);
396
397
String[] tests = {
398
"\u03D4", "=", "\u03AB",
399
};
400
401
compareArray(c, tests);
402
}
403
404
public void Test4101940() {
405
try {
406
RuleBasedCollator c = new RuleBasedCollator("< a < b");
407
CollationElementIterator i = c.getCollationElementIterator("");
408
i.reset();
409
410
if (i.next() != i.NULLORDER) {
411
errln("next did not return NULLORDER");
412
}
413
}
414
catch (Exception e) {
415
errln("Caught " + e );
416
}
417
}
418
419
// Collator.compare not handling spaces properly
420
//
421
public void Test4103436() {
422
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
423
c.setStrength(Collator.TERTIARY);
424
425
String[] tests = {
426
"file", "<", "file access",
427
"file", "<", "fileaccess",
428
};
429
430
compareArray(c, tests);
431
}
432
433
// Collation not Unicode conformant with Hangul syllables
434
//
435
public void Test4114076() {
436
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
437
c.setStrength(Collator.TERTIARY);
438
439
//
440
// With Canonical decomposition, Hangul syllables should get decomposed
441
// into Jamo, but Jamo characters should not be decomposed into
442
// conjoining Jamo
443
//
444
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
445
String[] test1 = {
446
"\ud4db", "=", "\u1111\u1171\u11b6",
447
};
448
compareArray(c, test1);
449
450
// Full decomposition result should be the same as canonical decomposition
451
// for all hangul.
452
c.setDecomposition(Collator.FULL_DECOMPOSITION);
453
compareArray(c, test1);
454
455
}
456
457
458
// Collator.getCollationKey was hanging on certain character sequences
459
//
460
public void Test4124632() throws Exception {
461
Collator coll = Collator.getInstance(Locale.JAPAN);
462
463
try {
464
coll.getCollationKey("A\u0308bc");
465
} catch (OutOfMemoryError e) {
466
errln("Ran out of memory -- probably an infinite loop");
467
}
468
}
469
470
// sort order of french words with multiple accents has errors
471
//
472
public void Test4132736() {
473
Collator c = Collator.getInstance(Locale.FRANCE);
474
475
String[] test1 = {
476
"e\u0300e\u0301", "<", "e\u0301e\u0300",
477
"e\u0300\u0301", ">", "e\u0301\u0300",
478
};
479
compareArray(c, test1);
480
}
481
482
// The sorting using java.text.CollationKey is not in the exact order
483
//
484
public void Test4133509() {
485
String[] test1 = {
486
"Exception", "<", "ExceptionInInitializerError",
487
"Graphics", "<", "GraphicsEnvironment",
488
"String", "<", "StringBuffer",
489
};
490
compareArray(en_us, test1);
491
}
492
493
// Collation with decomposition off doesn't work for Europe
494
//
495
public void Test4114077() {
496
// Ensure that we get the same results with decomposition off
497
// as we do with it on....
498
499
RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
500
c.setStrength(Collator.TERTIARY);
501
502
String[] test1 = {
503
"\u00C0", "=", "A\u0300", // Should be equivalent
504
"p\u00eache", ">", "p\u00e9ch\u00e9",
505
"\u0204", "=", "E\u030F",
506
"\u01fa", "=", "A\u030a\u0301", // a-ring-acute -> a-ring, acute
507
// -> a, ring, acute
508
"A\u0300\u0316", "<", "A\u0316\u0300", // No reordering --> unequal
509
};
510
c.setDecomposition(Collator.NO_DECOMPOSITION);
511
compareArray(c, test1);
512
513
String[] test2 = {
514
"A\u0300\u0316", "=", "A\u0316\u0300", // Reordering --> equal
515
};
516
c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
517
compareArray(c, test2);
518
}
519
520
// Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
521
//
522
public void Test4141640() {
523
//
524
// Rather than just creating a Swedish collator, we might as well
525
// try to instantiate one for every locale available on the system
526
// in order to prevent this sort of bug from cropping up in the future
527
//
528
Locale[] locales = Collator.getAvailableLocales();
529
530
for (int i = 0; i < locales.length; i++) {
531
try {
532
Collator c = Collator.getInstance(locales[i]);
533
} catch (Exception e) {
534
errln("Caught " + e + " creating collator for " + locales[i]);
535
}
536
}
537
}
538
539
// getCollationKey throws exception for spanish text
540
// Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
541
//
542
public void Test4139572() {
543
//
544
// Code pasted straight from the bug report
545
//
546
// create spanish locale and collator
547
Locale l = new Locale("es", "es");
548
Collator col = Collator.getInstance(l);
549
550
// this spanish phrase kills it!
551
col.getCollationKey("Nombre De Objeto");
552
}
553
554
// RuleBasedCollator doesn't use getCollationElementIterator internally
555
//
556
public void Test4146160() throws ParseException {
557
//
558
// Use a custom collator class whose getCollationElementIterator
559
// methods increment a count....
560
//
561
My4146160Collator.count = 0;
562
new My4146160Collator().getCollationKey("1");
563
if (My4146160Collator.count < 1) {
564
errln("getCollationElementIterator not called");
565
}
566
567
My4146160Collator.count = 0;
568
new My4146160Collator().compare("1", "2");
569
if (My4146160Collator.count < 1) {
570
errln("getCollationElementIterator not called");
571
}
572
}
573
574
static class My4146160Collator extends RuleBasedCollator {
575
public My4146160Collator() throws ParseException {
576
super(Regression.en_us.getRules());
577
}
578
579
public CollationElementIterator getCollationElementIterator(
580
String text) {
581
count++;
582
return super.getCollationElementIterator(text);
583
}
584
public CollationElementIterator getCollationElementIterator(
585
CharacterIterator text) {
586
count++;
587
return super.getCollationElementIterator(text);
588
}
589
590
public static int count = 0;
591
};
592
593
// CollationElementIterator.previous broken for expanding char sequences
594
//
595
public void Test4179686() throws ParseException {
596
597
// Create a collator with a few expanding character sequences in it....
598
RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
599
+ " & ae ; \u00e4 & AE ; \u00c4"
600
+ " & oe ; \u00f6 & OE ; \u00d6"
601
+ " & ue ; \u00fc & UE ; \u00dc");
602
603
String text = "T\u00f6ne"; // o-umlaut
604
605
CollationElementIterator iter = coll.getCollationElementIterator(text);
606
Vector elements = new Vector();
607
int elem;
608
609
// Iterate forward and collect all of the elements into a Vector
610
while ((elem = iter.next()) != iter.NULLORDER) {
611
elements.addElement(new Integer(elem));
612
}
613
614
// Now iterate backward and make sure they're the same
615
int index = elements.size() - 1;
616
while ((elem = iter.previous()) != iter.NULLORDER) {
617
int expect = ((Integer)elements.elementAt(index)).intValue();
618
619
if (elem != expect) {
620
errln("Mismatch at index " + index
621
+ ": got " + Integer.toString(elem,16)
622
+ ", expected " + Integer.toString(expect,16));
623
}
624
index--;
625
}
626
}
627
628
public void Test4244884() throws ParseException {
629
RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
630
coll = new RuleBasedCollator(coll.getRules()
631
+ " & C < ch , cH , Ch , CH < cat < crunchy");
632
633
String[] testStrings = new String[] {
634
"car",
635
"cave",
636
"clamp",
637
"cramp",
638
"czar",
639
"church",
640
"catalogue",
641
"crunchy",
642
"dog"
643
};
644
645
for (int i = 1; i < testStrings.length; i++) {
646
if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
647
errln("error: \"" + testStrings[i - 1]
648
+ "\" is greater than or equal to \"" + testStrings[i]
649
+ "\".");
650
}
651
}
652
}
653
654
public void Test4179216() throws ParseException {
655
// you can position a CollationElementIterator in the middle of
656
// a contracting character sequence, yielding a bogus collation
657
// element
658
RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
659
coll = new RuleBasedCollator(coll.getRules()
660
+ " & C < ch , cH , Ch , CH < cat < crunchy");
661
String testText = "church church catcatcher runcrunchynchy";
662
CollationElementIterator iter = coll.getCollationElementIterator(
663
testText);
664
665
// test that the "ch" combination works properly
666
iter.setOffset(4);
667
int elt4 = CollationElementIterator.primaryOrder(iter.next());
668
669
iter.reset();
670
int elt0 = CollationElementIterator.primaryOrder(iter.next());
671
672
iter.setOffset(5);
673
int elt5 = CollationElementIterator.primaryOrder(iter.next());
674
675
if (elt4 != elt0 || elt5 != elt0)
676
errln("The collation elements at positions 0 (" + elt0 + "), 4 ("
677
+ elt4 + "), and 5 (" + elt5 + ") don't match.");
678
679
// test that the "cat" combination works properly
680
iter.setOffset(14);
681
int elt14 = CollationElementIterator.primaryOrder(iter.next());
682
683
iter.setOffset(15);
684
int elt15 = CollationElementIterator.primaryOrder(iter.next());
685
686
iter.setOffset(16);
687
int elt16 = CollationElementIterator.primaryOrder(iter.next());
688
689
iter.setOffset(17);
690
int elt17 = CollationElementIterator.primaryOrder(iter.next());
691
692
iter.setOffset(18);
693
int elt18 = CollationElementIterator.primaryOrder(iter.next());
694
695
iter.setOffset(19);
696
int elt19 = CollationElementIterator.primaryOrder(iter.next());
697
698
if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
699
|| elt14 != elt18 || elt14 != elt19)
700
errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
701
+ elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
702
+ ", elt18 = " + elt18 + ", elt19 = " + elt19);
703
704
// now generate a complete list of the collation elements,
705
// first using next() and then using setOffset(), and
706
// make sure both interfaces return the same set of elements
707
iter.reset();
708
709
int elt = iter.next();
710
int count = 0;
711
while (elt != CollationElementIterator.NULLORDER) {
712
++count;
713
elt = iter.next();
714
}
715
716
String[] nextElements = new String[count];
717
String[] setOffsetElements = new String[count];
718
int lastPos = 0;
719
720
iter.reset();
721
elt = iter.next();
722
count = 0;
723
while (elt != CollationElementIterator.NULLORDER) {
724
nextElements[count++] = testText.substring(lastPos, iter.getOffset());
725
lastPos = iter.getOffset();
726
elt = iter.next();
727
}
728
count = 0;
729
for (int i = 0; i < testText.length(); ) {
730
iter.setOffset(i);
731
lastPos = iter.getOffset();
732
elt = iter.next();
733
setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
734
i = iter.getOffset();
735
}
736
for (int i = 0; i < nextElements.length; i++) {
737
if (nextElements[i].equals(setOffsetElements[i])) {
738
logln(nextElements[i]);
739
} else {
740
errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
741
+ setOffsetElements[i]);
742
}
743
}
744
}
745
746
public void Test4216006() throws Exception {
747
// rule parser barfs on "<\u00e0=a\u0300", and on other cases
748
// where the same token (after normalization) appears twice in a row
749
boolean caughtException = false;
750
try {
751
RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300");
752
}
753
catch (ParseException e) {
754
caughtException = true;
755
}
756
if (!caughtException) {
757
throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
758
}
759
760
RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
761
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
762
collator.setStrength(Collator.IDENTICAL);
763
764
String[] tests = {
765
"a\u0300", "=", "\u00e0",
766
"\u00e0", "=", "a\u0300"
767
};
768
769
compareArray(collator, tests);
770
}
771
772
public void Test4171974() {
773
// test French accent ordering more thoroughly
774
String[] frenchList = {
775
"\u0075\u0075", // u u
776
"\u00fc\u0075", // u-umlaut u
777
"\u01d6\u0075", // u-umlaut-macron u
778
"\u016b\u0075", // u-macron u
779
"\u1e7b\u0075", // u-macron-umlaut u
780
"\u0075\u00fc", // u u-umlaut
781
"\u00fc\u00fc", // u-umlaut u-umlaut
782
"\u01d6\u00fc", // u-umlaut-macron u-umlaut
783
"\u016b\u00fc", // u-macron u-umlaut
784
"\u1e7b\u00fc", // u-macron-umlaut u-umlaut
785
"\u0075\u01d6", // u u-umlaut-macron
786
"\u00fc\u01d6", // u-umlaut u-umlaut-macron
787
"\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron
788
"\u016b\u01d6", // u-macron u-umlaut-macron
789
"\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron
790
"\u0075\u016b", // u u-macron
791
"\u00fc\u016b", // u-umlaut u-macron
792
"\u01d6\u016b", // u-umlaut-macron u-macron
793
"\u016b\u016b", // u-macron u-macron
794
"\u1e7b\u016b", // u-macron-umlaut u-macron
795
"\u0075\u1e7b", // u u-macron-umlaut
796
"\u00fc\u1e7b", // u-umlaut u-macron-umlaut
797
"\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut
798
"\u016b\u1e7b", // u-macron u-macron-umlaut
799
"\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut
800
};
801
Collator french = Collator.getInstance(Locale.FRENCH);
802
803
logln("Testing French order...");
804
checkListOrder(frenchList, french);
805
806
logln("Testing French order without decomposition...");
807
french.setDecomposition(Collator.NO_DECOMPOSITION);
808
checkListOrder(frenchList, french);
809
810
String[] englishList = {
811
"\u0075\u0075", // u u
812
"\u0075\u00fc", // u u-umlaut
813
"\u0075\u01d6", // u u-umlaut-macron
814
"\u0075\u016b", // u u-macron
815
"\u0075\u1e7b", // u u-macron-umlaut
816
"\u00fc\u0075", // u-umlaut u
817
"\u00fc\u00fc", // u-umlaut u-umlaut
818
"\u00fc\u01d6", // u-umlaut u-umlaut-macron
819
"\u00fc\u016b", // u-umlaut u-macron
820
"\u00fc\u1e7b", // u-umlaut u-macron-umlaut
821
"\u01d6\u0075", // u-umlaut-macron u
822
"\u01d6\u00fc", // u-umlaut-macron u-umlaut
823
"\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron
824
"\u01d6\u016b", // u-umlaut-macron u-macron
825
"\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut
826
"\u016b\u0075", // u-macron u
827
"\u016b\u00fc", // u-macron u-umlaut
828
"\u016b\u01d6", // u-macron u-umlaut-macron
829
"\u016b\u016b", // u-macron u-macron
830
"\u016b\u1e7b", // u-macron u-macron-umlaut
831
"\u1e7b\u0075", // u-macron-umlaut u
832
"\u1e7b\u00fc", // u-macron-umlaut u-umlaut
833
"\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron
834
"\u1e7b\u016b", // u-macron-umlaut u-macron
835
"\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut
836
};
837
Collator english = Collator.getInstance(Locale.ENGLISH);
838
839
logln("Testing English order...");
840
checkListOrder(englishList, english);
841
842
logln("Testing English order without decomposition...");
843
english.setDecomposition(Collator.NO_DECOMPOSITION);
844
checkListOrder(englishList, english);
845
}
846
847
private void checkListOrder(String[] sortedList, Collator c) {
848
// this function uses the specified Collator to make sure the
849
// passed-in list is already sorted into ascending order
850
for (int i = 0; i < sortedList.length - 1; i++) {
851
if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
852
errln("List out of order at element #" + i + ": "
853
+ prettify(sortedList[i]) + " >= "
854
+ prettify(sortedList[i + 1]));
855
}
856
}
857
}
858
859
// CollationElementIterator set doesn't work propertly with next/prev
860
public void Test4663220() {
861
RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US);
862
CharacterIterator stringIter = new StringCharacterIterator("fox");
863
CollationElementIterator iter = collator.getCollationElementIterator(stringIter);
864
865
int[] elements_next = new int[3];
866
logln("calling next:");
867
for (int i = 0; i < 3; ++i) {
868
logln("[" + i + "] " + (elements_next[i] = iter.next()));
869
}
870
871
int[] elements_fwd = new int[3];
872
logln("calling set/next:");
873
for (int i = 0; i < 3; ++i) {
874
iter.setOffset(i);
875
logln("[" + i + "] " + (elements_fwd[i] = iter.next()));
876
}
877
878
for (int i = 0; i < 3; ++i) {
879
if (elements_next[i] != elements_fwd[i]) {
880
errln("mismatch at position " + i +
881
": " + elements_next[i] +
882
" != " + elements_fwd[i]);
883
}
884
}
885
}
886
887
//------------------------------------------------------------------------
888
// Internal utilities
889
//
890
private void compareArray(Collator c, String[] tests) {
891
for (int i = 0; i < tests.length; i += 3) {
892
893
int expect = 0;
894
if (tests[i+1].equals("<")) {
895
expect = -1;
896
} else if (tests[i+1].equals(">")) {
897
expect = 1;
898
} else if (tests[i+1].equals("=")) {
899
expect = 0;
900
} else {
901
expect = Integer.decode(tests[i+1]).intValue();
902
}
903
904
int result = c.compare(tests[i], tests[i+2]);
905
if (sign(result) != sign(expect))
906
{
907
errln( i/3 + ": compare(" + prettify(tests[i])
908
+ " , " + prettify(tests[i+2])
909
+ ") got " + result + "; expected " + expect);
910
}
911
else
912
{
913
// Collator.compare worked OK; now try the collation keys
914
CollationKey k1 = c.getCollationKey(tests[i]);
915
CollationKey k2 = c.getCollationKey(tests[i+2]);
916
917
result = k1.compareTo(k2);
918
if (sign(result) != sign(expect)) {
919
errln( i/3 + ": key(" + prettify(tests[i])
920
+ ").compareTo(key(" + prettify(tests[i+2])
921
+ ")) got " + result + "; expected " + expect);
922
923
errln(" " + prettify(k1) + " vs. " + prettify(k2));
924
}
925
}
926
}
927
}
928
929
private static final int sign(int i) {
930
if (i < 0) return -1;
931
if (i > 0) return 1;
932
return 0;
933
}
934
935
936
static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
937
938
String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
939
String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
940
String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck";
941
}
942
943