Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/java/text/MergeCollation.java
41152 views
1
/*
2
* Copyright (c) 1996, 2019, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
/*
27
* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28
* (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
29
*
30
* The original version of this source code and documentation is copyrighted
31
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32
* materials are provided under terms of a License Agreement between Taligent
33
* and Sun. This technology is protected by multiple US and International
34
* patents. This notice and attribution to Taligent may not be removed.
35
* Taligent is a registered trademark of Taligent, Inc.
36
*
37
*/
38
39
package java.text;
40
41
import java.util.ArrayList;
42
43
/**
44
* Utility class for normalizing and merging patterns for collation.
45
* Patterns are strings of the form <entry>*, where <entry> has the
46
* form:
47
* <pattern> := <entry>*
48
* <entry> := <separator><chars>{"/"<extension>}
49
* <separator> := "=", ",", ";", "<", "&"
50
* <chars>, and <extension> are both arbitrary strings.
51
* unquoted whitespaces are ignored.
52
* 'xxx' can be used to quote characters
53
* One difference from Collator is that & is used to reset to a current
54
* point. Or, in other words, it introduces a new sequence which is to
55
* be added to the old.
56
* That is: "a < b < c < d" is the same as "a < b & b < c & c < d" OR
57
* "a < b < d & b < c"
58
* XXX: make '' be a single quote.
59
* @see PatternEntry
60
* @author Mark Davis, Helena Shih
61
*/
62
63
final class MergeCollation {
64
65
/**
66
* Creates from a pattern
67
* @throws ParseException If the input pattern is incorrect.
68
*/
69
public MergeCollation(String pattern) throws ParseException
70
{
71
for (int i = 0; i < statusArray.length; i++)
72
statusArray[i] = 0;
73
setPattern(pattern);
74
}
75
76
/**
77
* recovers current pattern
78
*/
79
public String getPattern() {
80
return getPattern(true);
81
}
82
83
/**
84
* recovers current pattern.
85
* @param withWhiteSpace puts spacing around the entries, and \n
86
* before & and <
87
*/
88
public String getPattern(boolean withWhiteSpace) {
89
StringBuffer result = new StringBuffer();
90
PatternEntry tmp = null;
91
ArrayList<PatternEntry> extList = null;
92
int i;
93
for (i = 0; i < patterns.size(); ++i) {
94
PatternEntry entry = patterns.get(i);
95
if (!entry.extension.isEmpty()) {
96
if (extList == null)
97
extList = new ArrayList<>();
98
extList.add(entry);
99
} else {
100
if (extList != null) {
101
PatternEntry last = findLastWithNoExtension(i-1);
102
for (int j = extList.size() - 1; j >= 0 ; j--) {
103
tmp = extList.get(j);
104
tmp.addToBuffer(result, false, withWhiteSpace, last);
105
}
106
extList = null;
107
}
108
entry.addToBuffer(result, false, withWhiteSpace, null);
109
}
110
}
111
if (extList != null) {
112
PatternEntry last = findLastWithNoExtension(i-1);
113
for (int j = extList.size() - 1; j >= 0 ; j--) {
114
tmp = extList.get(j);
115
tmp.addToBuffer(result, false, withWhiteSpace, last);
116
}
117
extList = null;
118
}
119
return result.toString();
120
}
121
122
private final PatternEntry findLastWithNoExtension(int i) {
123
for (--i;i >= 0; --i) {
124
PatternEntry entry = patterns.get(i);
125
if (entry.extension.isEmpty()) {
126
return entry;
127
}
128
}
129
return null;
130
}
131
132
/**
133
* emits the pattern for collation builder.
134
* @return emits the string in the format understable to the collation
135
* builder.
136
*/
137
public String emitPattern() {
138
return emitPattern(true);
139
}
140
141
/**
142
* emits the pattern for collation builder.
143
* @param withWhiteSpace puts spacing around the entries, and \n
144
* before & and <
145
* @return emits the string in the format understable to the collation
146
* builder.
147
*/
148
public String emitPattern(boolean withWhiteSpace) {
149
StringBuffer result = new StringBuffer();
150
for (int i = 0; i < patterns.size(); ++i)
151
{
152
PatternEntry entry = patterns.get(i);
153
if (entry != null) {
154
entry.addToBuffer(result, true, withWhiteSpace, null);
155
}
156
}
157
return result.toString();
158
}
159
160
/**
161
* sets the pattern.
162
*/
163
public void setPattern(String pattern) throws ParseException
164
{
165
patterns.clear();
166
addPattern(pattern);
167
}
168
169
/**
170
* adds a pattern to the current one.
171
* @param pattern the new pattern to be added
172
*/
173
public void addPattern(String pattern) throws ParseException
174
{
175
if (pattern == null)
176
return;
177
178
PatternEntry.Parser parser = new PatternEntry.Parser(pattern);
179
180
PatternEntry entry = parser.next();
181
while (entry != null) {
182
fixEntry(entry);
183
entry = parser.next();
184
}
185
}
186
187
/**
188
* gets count of separate entries
189
* @return the size of pattern entries
190
*/
191
public int getCount() {
192
return patterns.size();
193
}
194
195
/**
196
* gets count of separate entries
197
* @param index the offset of the desired pattern entry
198
* @return the requested pattern entry
199
*/
200
public PatternEntry getItemAt(int index) {
201
return patterns.get(index);
202
}
203
204
//============================================================
205
// privates
206
//============================================================
207
ArrayList<PatternEntry> patterns = new ArrayList<>(); // a list of PatternEntries
208
209
private transient PatternEntry saveEntry = null;
210
private transient PatternEntry lastEntry = null;
211
212
// This is really used as a local variable inside fixEntry, but we cache
213
// it here to avoid newing it up every time the method is called.
214
private transient StringBuffer excess = new StringBuffer();
215
216
//
217
// When building a MergeCollation, we need to do lots of searches to see
218
// whether a given entry is already in the table. Since we're using an
219
// array, this would make the algorithm O(N*N). To speed things up, we
220
// use this bit array to remember whether the array contains any entries
221
// starting with each Unicode character. If not, we can avoid the search.
222
// Using BitSet would make this easier, but it's significantly slower.
223
//
224
private transient byte[] statusArray = new byte[8192];
225
private final byte BITARRAYMASK = (byte)0x1;
226
private final int BYTEPOWER = 3;
227
private final int BYTEMASK = (1 << BYTEPOWER) - 1;
228
229
/*
230
If the strength is RESET, then just change the lastEntry to
231
be the current. (If the current is not in patterns, signal an error).
232
If not, then remove the current entry, and add it after lastEntry
233
(which is usually at the end).
234
*/
235
private final void fixEntry(PatternEntry newEntry) throws ParseException
236
{
237
// check to see whether the new entry has the same characters as the previous
238
// entry did (this can happen when a pattern declaring a difference between two
239
// strings that are canonically equivalent is normalized). If so, and the strength
240
// is anything other than IDENTICAL or RESET, throw an exception (you can't
241
// declare a string to be unequal to itself). --rtg 5/24/99
242
if (lastEntry != null && newEntry.chars.equals(lastEntry.chars)
243
&& newEntry.extension.equals(lastEntry.extension)) {
244
if (newEntry.strength != Collator.IDENTICAL
245
&& newEntry.strength != PatternEntry.RESET) {
246
throw new ParseException("The entries " + lastEntry + " and "
247
+ newEntry + " are adjacent in the rules, but have conflicting "
248
+ "strengths: A character can't be unequal to itself.", -1);
249
} else {
250
// otherwise, just skip this entry and behave as though you never saw it
251
return;
252
}
253
}
254
255
boolean changeLastEntry = true;
256
if (newEntry.strength != PatternEntry.RESET) {
257
int oldIndex = -1;
258
259
if ((newEntry.chars.length() == 1)) {
260
261
char c = newEntry.chars.charAt(0);
262
int statusIndex = c >> BYTEPOWER;
263
byte bitClump = statusArray[statusIndex];
264
byte setBit = (byte)(BITARRAYMASK << (c & BYTEMASK));
265
266
if (bitClump != 0 && (bitClump & setBit) != 0) {
267
oldIndex = patterns.lastIndexOf(newEntry);
268
} else {
269
// We're going to add an element that starts with this
270
// character, so go ahead and set its bit.
271
statusArray[statusIndex] = (byte)(bitClump | setBit);
272
}
273
} else {
274
oldIndex = patterns.lastIndexOf(newEntry);
275
}
276
if (oldIndex != -1) {
277
patterns.remove(oldIndex);
278
}
279
280
excess.setLength(0);
281
int lastIndex = findLastEntry(lastEntry, excess);
282
283
if (excess.length() != 0) {
284
newEntry.extension = excess + newEntry.extension;
285
if (lastIndex != patterns.size()) {
286
lastEntry = saveEntry;
287
changeLastEntry = false;
288
}
289
}
290
if (lastIndex == patterns.size()) {
291
patterns.add(newEntry);
292
saveEntry = newEntry;
293
} else {
294
patterns.add(lastIndex, newEntry);
295
}
296
}
297
if (changeLastEntry) {
298
lastEntry = newEntry;
299
}
300
}
301
302
private final int findLastEntry(PatternEntry entry,
303
StringBuffer excessChars) throws ParseException
304
{
305
if (entry == null)
306
return 0;
307
308
if (entry.strength != PatternEntry.RESET) {
309
// Search backwards for string that contains this one;
310
// most likely entry is last one
311
312
int oldIndex = -1;
313
if ((entry.chars.length() == 1)) {
314
int index = entry.chars.charAt(0) >> BYTEPOWER;
315
if ((statusArray[index] &
316
(BITARRAYMASK << (entry.chars.charAt(0) & BYTEMASK))) != 0) {
317
oldIndex = patterns.lastIndexOf(entry);
318
}
319
} else {
320
oldIndex = patterns.lastIndexOf(entry);
321
}
322
if ((oldIndex == -1))
323
throw new ParseException("couldn't find last entry: "
324
+ entry, oldIndex);
325
return oldIndex + 1;
326
} else {
327
int i;
328
for (i = patterns.size() - 1; i >= 0; --i) {
329
PatternEntry e = patterns.get(i);
330
if (e.chars.regionMatches(0,entry.chars,0,
331
e.chars.length())) {
332
excessChars.append(entry.chars, e.chars.length(),
333
entry.chars.length());
334
break;
335
}
336
}
337
if (i == -1)
338
throw new ParseException("couldn't find: " + entry, i);
339
return i + 1;
340
}
341
}
342
}
343
344