Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/jdk/internal/icu/util/CodePointMap.java
41161 views
1
/*
2
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
// (c) 2018 and later: Unicode, Inc. and others.
26
// License & terms of use: http://www.unicode.org/copyright.html#License
27
28
// created: 2018may10 Markus W. Scherer
29
30
package jdk.internal.icu.util;
31
32
import java.util.Iterator;
33
import java.util.NoSuchElementException;
34
35
/**
36
* Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
37
* This does not implement java.util.Map.
38
*
39
* @stable ICU 63
40
*/
41
public abstract class CodePointMap implements Iterable<CodePointMap.Range> {
42
/**
43
* Selectors for how getRange() should report value ranges overlapping with surrogates.
44
* Most users should use NORMAL.
45
*
46
* @see #getRange
47
* @stable ICU 63
48
*/
49
public enum RangeOption {
50
/**
51
* getRange() enumerates all same-value ranges as stored in the map.
52
* Most users should use this option.
53
*
54
* @stable ICU 63
55
*/
56
NORMAL,
57
/**
58
* getRange() enumerates all same-value ranges as stored in the map,
59
* except that lead surrogates (U+D800..U+DBFF) are treated as having the
60
* surrogateValue, which is passed to getRange() as a separate parameter.
61
* The surrogateValue is not transformed via filter().
62
* See {@link Character#isHighSurrogate}.
63
*
64
* <p>Most users should use NORMAL instead.
65
*
66
* <p>This option is useful for maps that map surrogate code *units* to
67
* special values optimized for UTF-16 string processing
68
* or for special error behavior for unpaired surrogates,
69
* but those values are not to be associated with the lead surrogate code *points*.
70
*
71
* @stable ICU 63
72
*/
73
FIXED_LEAD_SURROGATES,
74
/**
75
* getRange() enumerates all same-value ranges as stored in the map,
76
* except that all surrogates (U+D800..U+DFFF) are treated as having the
77
* surrogateValue, which is passed to getRange() as a separate parameter.
78
* The surrogateValue is not transformed via filter().
79
* See {@link Character#isSurrogate}.
80
*
81
* <p>Most users should use NORMAL instead.
82
*
83
* <p>This option is useful for maps that map surrogate code *units* to
84
* special values optimized for UTF-16 string processing
85
* or for special error behavior for unpaired surrogates,
86
* but those values are not to be associated with the lead surrogate code *points*.
87
*
88
* @stable ICU 63
89
*/
90
FIXED_ALL_SURROGATES
91
}
92
93
/**
94
* Callback function interface: Modifies a map value.
95
* Optionally called by getRange().
96
* The modified value will be returned by the getRange() function.
97
*
98
* <p>Can be used to ignore some of the value bits,
99
* make a filter for one of several values,
100
* return a value index computed from the map value, etc.
101
*
102
* @see #getRange
103
* @see #iterator
104
* @stable ICU 63
105
*/
106
public interface ValueFilter {
107
/**
108
* Modifies the map value.
109
*
110
* @param value map value
111
* @return modified value
112
* @stable ICU 63
113
*/
114
public int apply(int value);
115
}
116
117
/**
118
* Range iteration result data.
119
* Code points from start to end map to the same value.
120
* The value may have been modified by {@link ValueFilter#apply(int)},
121
* or it may be the surrogateValue if a RangeOption other than "normal" was used.
122
*
123
* @see #getRange
124
* @see #iterator
125
* @stable ICU 63
126
*/
127
public static final class Range {
128
private int start;
129
private int end;
130
private int value;
131
132
/**
133
* Constructor. Sets start and end to -1 and value to 0.
134
*
135
* @stable ICU 63
136
*/
137
public Range() {
138
start = end = -1;
139
value = 0;
140
}
141
142
/**
143
* @return the start code point
144
* @stable ICU 63
145
*/
146
public int getStart() { return start; }
147
/**
148
* @return the (inclusive) end code point
149
* @stable ICU 63
150
*/
151
public int getEnd() { return end; }
152
/**
153
* @return the range value
154
* @stable ICU 63
155
*/
156
public int getValue() { return value; }
157
/**
158
* Sets the range. When using {@link #iterator()},
159
* iteration will resume after the newly set end.
160
*
161
* @param start new start code point
162
* @param end new end code point
163
* @param value new value
164
* @stable ICU 63
165
*/
166
public void set(int start, int end, int value) {
167
this.start = start;
168
this.end = end;
169
this.value = value;
170
}
171
}
172
173
private final class RangeIterator implements Iterator<Range> {
174
private Range range = new Range();
175
176
@Override
177
public boolean hasNext() {
178
return -1 <= range.end && range.end < 0x10ffff;
179
}
180
181
@Override
182
public Range next() {
183
if (getRange(range.end + 1, null, range)) {
184
return range;
185
} else {
186
throw new NoSuchElementException();
187
}
188
}
189
190
@Override
191
public final void remove() {
192
throw new UnsupportedOperationException();
193
}
194
}
195
196
/**
197
* Iterates over code points of a string and fetches map values.
198
* This does not implement java.util.Iterator.
199
*
200
* <pre>
201
* void onString(CodePointMap map, CharSequence s, int start) {
202
* CodePointMap.StringIterator iter = map.stringIterator(s, start);
203
* while (iter.next()) {
204
* int end = iter.getIndex(); // code point from between start and end
205
* useValue(s, start, end, iter.getCodePoint(), iter.getValue());
206
* start = end;
207
* }
208
* }
209
* </pre>
210
*
211
* <p>This class is not intended for public subclassing.
212
*
213
* @stable ICU 63
214
*/
215
public class StringIterator {
216
/**
217
* @internal
218
* @deprecated This API is ICU internal only.
219
*/
220
@Deprecated
221
protected CharSequence s;
222
/**
223
* @internal
224
* @deprecated This API is ICU internal only.
225
*/
226
@Deprecated
227
protected int sIndex;
228
/**
229
* @internal
230
* @deprecated This API is ICU internal only.
231
*/
232
@Deprecated
233
protected int c;
234
/**
235
* @internal
236
* @deprecated This API is ICU internal only.
237
*/
238
@Deprecated
239
protected int value;
240
241
/**
242
* @internal
243
* @deprecated This API is ICU internal only.
244
*/
245
@Deprecated
246
protected StringIterator(CharSequence s, int sIndex) {
247
this.s = s;
248
this.sIndex = sIndex;
249
c = -1;
250
value = 0;
251
}
252
253
/**
254
* Resets the iterator to a new string and/or a new string index.
255
*
256
* @param s string to iterate over
257
* @param sIndex string index where the iteration will start
258
* @stable ICU 63
259
*/
260
public void reset(CharSequence s, int sIndex) {
261
this.s = s;
262
this.sIndex = sIndex;
263
c = -1;
264
value = 0;
265
}
266
267
/**
268
* Reads the next code point, post-increments the string index,
269
* and gets a value from the map.
270
* Sets an implementation-defined error value if the code point is an unpaired surrogate.
271
*
272
* @return true if the string index was not yet at the end of the string;
273
* otherwise the iterator did not advance
274
* @stable ICU 63
275
*/
276
public boolean next() {
277
if (sIndex >= s.length()) {
278
return false;
279
}
280
c = Character.codePointAt(s, sIndex);
281
sIndex += Character.charCount(c);
282
value = get(c);
283
return true;
284
}
285
286
/**
287
* Reads the previous code point, pre-decrements the string index,
288
* and gets a value from the map.
289
* Sets an implementation-defined error value if the code point is an unpaired surrogate.
290
*
291
* @return true if the string index was not yet at the start of the string;
292
* otherwise the iterator did not advance
293
* @stable ICU 63
294
*/
295
public boolean previous() {
296
if (sIndex <= 0) {
297
return false;
298
}
299
c = Character.codePointBefore(s, sIndex);
300
sIndex -= Character.charCount(c);
301
value = get(c);
302
return true;
303
}
304
/**
305
* @return the string index
306
* @stable ICU 63
307
*/
308
public final int getIndex() { return sIndex; }
309
/**
310
* @return the code point
311
* @stable ICU 63
312
*/
313
public final int getCodePoint() { return c; }
314
/**
315
* @return the map value,
316
* or an implementation-defined error value if
317
* the code point is an unpaired surrogate
318
* @stable ICU 63
319
*/
320
public final int getValue() { return value; }
321
}
322
323
/**
324
* Protected no-args constructor.
325
*
326
* @stable ICU 63
327
*/
328
protected CodePointMap() {
329
}
330
331
/**
332
* Returns the value for a code point as stored in the map, with range checking.
333
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
334
*
335
* @param c the code point
336
* @return the map value,
337
* or an implementation-defined error value if
338
* the code point is not in the range 0..U+10FFFF
339
* @stable ICU 63
340
*/
341
public abstract int get(int c);
342
343
/**
344
* Sets the range object to a range of code points beginning with the start parameter.
345
* The range start is the same as the start input parameter
346
* (even if there are preceding code points that have the same value).
347
* The range end is the last code point such that
348
* all those from start to there have the same value.
349
* Returns false if start is not 0..U+10FFFF.
350
* Can be used to efficiently iterate over all same-value ranges in a map.
351
* (This is normally faster than iterating over code points and get()ting each value,
352
* but may be much slower than a data structure that stores ranges directly.)
353
*
354
* <p>If the {@link ValueFilter} parameter is not null, then
355
* the value to be delivered is passed through that filter, and the return value is the end
356
* of the range where all values are modified to the same actual value.
357
* The value is unchanged if that parameter is null.
358
*
359
* <p>Example:
360
* <pre>
361
* int start = 0;
362
* CodePointMap.Range range = new CodePointMap.Range();
363
* while (map.getRange(start, null, range)) {
364
* int end = range.getEnd();
365
* int value = range.getValue();
366
* // Work with the range start..end and its value.
367
* start = end + 1;
368
* }
369
* </pre>
370
*
371
* @param start range start
372
* @param filter an object that may modify the map data value,
373
* or null if the values from the map are to be used unmodified
374
* @param range the range object that will be set to the code point range and value
375
* @return true if start is 0..U+10FFFF; otherwise no new range is fetched
376
* @stable ICU 63
377
*/
378
public abstract boolean getRange(int start, ValueFilter filter, Range range);
379
380
/**
381
* Sets the range object to a range of code points beginning with the start parameter.
382
* The range start is the same as the start input parameter
383
* (even if there are preceding code points that have the same value).
384
* The range end is the last code point such that
385
* all those from start to there have the same value.
386
* Returns false if start is not 0..U+10FFFF.
387
*
388
* <p>Same as the simpler {@link #getRange(int, ValueFilter, Range)} but optionally
389
* modifies the range if it overlaps with surrogate code points.
390
*
391
* @param start range start
392
* @param option defines whether surrogates are treated normally,
393
* or as having the surrogateValue; usually {@link RangeOption#NORMAL}
394
* @param surrogateValue value for surrogates; ignored if option=={@link RangeOption#NORMAL}
395
* @param filter an object that may modify the map data value,
396
* or null if the values from the map are to be used unmodified
397
* @param range the range object that will be set to the code point range and value
398
* @return true if start is 0..U+10FFFF; otherwise no new range is fetched
399
* @stable ICU 63
400
*/
401
public boolean getRange(int start, RangeOption option, int surrogateValue,
402
ValueFilter filter, Range range) {
403
assert option != null;
404
if (!getRange(start, filter, range)) {
405
return false;
406
}
407
if (option == RangeOption.NORMAL) {
408
return true;
409
}
410
int surrEnd = option == RangeOption.FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
411
int end = range.end;
412
if (end < 0xd7ff || start > surrEnd) {
413
return true;
414
}
415
// The range overlaps with surrogates, or ends just before the first one.
416
if (range.value == surrogateValue) {
417
if (end >= surrEnd) {
418
// Surrogates followed by a non-surrValue range,
419
// or surrogates are part of a larger surrValue range.
420
return true;
421
}
422
} else {
423
if (start <= 0xd7ff) {
424
range.end = 0xd7ff; // Non-surrValue range ends before surrValue surrogates.
425
return true;
426
}
427
// Start is a surrogate with a non-surrValue code *unit* value.
428
// Return a surrValue code *point* range.
429
range.value = surrogateValue;
430
if (end > surrEnd) {
431
range.end = surrEnd; // Surrogate range ends before non-surrValue rest of range.
432
return true;
433
}
434
}
435
// See if the surrValue surrogate range can be merged with
436
// an immediately following range.
437
if (getRange(surrEnd + 1, filter, range) && range.value == surrogateValue) {
438
range.start = start;
439
return true;
440
}
441
range.start = start;
442
range.end = surrEnd;
443
range.value = surrogateValue;
444
return true;
445
}
446
447
/**
448
* Convenience iterator over same-map-value code point ranges.
449
* Same as looping over all ranges with {@link #getRange(int, ValueFilter, Range)}
450
* without filtering.
451
* Adjacent ranges have different map values.
452
*
453
* <p>The iterator always returns the same Range object.
454
*
455
* @return a Range iterator
456
* @stable ICU 63
457
*/
458
@Override
459
public Iterator<Range> iterator() {
460
return new RangeIterator();
461
}
462
463
/**
464
* Returns an iterator (not a java.util.Iterator) over code points of a string
465
* for fetching map values.
466
*
467
* @param s string to iterate over
468
* @param sIndex string index where the iteration will start
469
* @return the iterator
470
* @stable ICU 63
471
*/
472
public StringIterator stringIterator(CharSequence s, int sIndex) {
473
return new StringIterator(s, sIndex);
474
}
475
}
476
477