Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/java/lang/Character.java
41152 views
1
/*
2
* Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
package java.lang;
27
28
import jdk.internal.misc.CDS;
29
import jdk.internal.vm.annotation.IntrinsicCandidate;
30
31
import java.lang.constant.Constable;
32
import java.lang.constant.DynamicConstantDesc;
33
import java.util.Arrays;
34
import java.util.HashMap;
35
import java.util.Locale;
36
import java.util.Map;
37
import java.util.Optional;
38
39
import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST;
40
import static java.lang.constant.ConstantDescs.CD_char;
41
import static java.lang.constant.ConstantDescs.CD_int;
42
import static java.lang.constant.ConstantDescs.DEFAULT_NAME;
43
44
/**
45
* The {@code Character} class wraps a value of the primitive
46
* type {@code char} in an object. An object of class
47
* {@code Character} contains a single field whose type is
48
* {@code char}.
49
* <p>
50
* In addition, this class provides a large number of static methods for
51
* determining a character's category (lowercase letter, digit, etc.)
52
* and for converting characters from uppercase to lowercase and vice
53
* versa.
54
*
55
* <h2><a id="conformance">Unicode Conformance</a></h2>
56
* <p>
57
* The fields and methods of class {@code Character} are defined in terms
58
* of character information from the Unicode Standard, specifically the
59
* <i>UnicodeData</i> file that is part of the Unicode Character Database.
60
* This file specifies properties including name and category for every
61
* assigned Unicode code point or character range. The file is available
62
* from the Unicode Consortium at
63
* <a href="http://www.unicode.org">http://www.unicode.org</a>.
64
* <p>
65
* Character information is based on the Unicode Standard, version 13.0.
66
* <p>
67
* The Java platform has supported different versions of the Unicode
68
* Standard over time. Upgrades to newer versions of the Unicode Standard
69
* occurred in the following Java releases, each indicating the new version:
70
* <table class="striped">
71
* <caption style="display:none">Shows Java releases and supported Unicode versions</caption>
72
* <thead>
73
* <tr><th scope="col">Java release</th>
74
* <th scope="col">Unicode version</th></tr>
75
* </thead>
76
* <tbody>
77
* <tr><td>Java SE 15</td>
78
* <td>Unicode 13.0</td></tr>
79
* <tr><td>Java SE 13</td>
80
* <td>Unicode 12.1</td></tr>
81
* <tr><td>Java SE 12</td>
82
* <td>Unicode 11.0</td></tr>
83
* <tr><td>Java SE 11</td>
84
* <td>Unicode 10.0</td></tr>
85
* <tr><td>Java SE 9</td>
86
* <td>Unicode 8.0</td></tr>
87
* <tr><td>Java SE 8</td>
88
* <td>Unicode 6.2</td></tr>
89
* <tr><td>Java SE 7</td>
90
* <td>Unicode 6.0</td></tr>
91
* <tr><td>Java SE 5.0</td>
92
* <td>Unicode 4.0</td></tr>
93
* <tr><td>Java SE 1.4</td>
94
* <td>Unicode 3.0</td></tr>
95
* <tr><td>JDK 1.1</td>
96
* <td>Unicode 2.0</td></tr>
97
* <tr><td>JDK 1.0.2</td>
98
* <td>Unicode 1.1.5</td></tr>
99
* </tbody>
100
* </table>
101
* Variations from these base Unicode versions, such as recognized appendixes,
102
* are documented elsewhere.
103
* <h2><a id="unicode">Unicode Character Representations</a></h2>
104
*
105
* <p>The {@code char} data type (and therefore the value that a
106
* {@code Character} object encapsulates) are based on the
107
* original Unicode specification, which defined characters as
108
* fixed-width 16-bit entities. The Unicode Standard has since been
109
* changed to allow for characters whose representation requires more
110
* than 16 bits. The range of legal <em>code point</em>s is now
111
* U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
112
* (Refer to the <a
113
* href="http://www.unicode.org/reports/tr27/#notation"><i>
114
* definition</i></a> of the U+<i>n</i> notation in the Unicode
115
* Standard.)
116
*
117
* <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
118
* sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
119
* <a id="supplementary">Characters</a> whose code points are greater
120
* than U+FFFF are called <em>supplementary character</em>s. The Java
121
* platform uses the UTF-16 representation in {@code char} arrays and
122
* in the {@code String} and {@code StringBuffer} classes. In
123
* this representation, supplementary characters are represented as a pair
124
* of {@code char} values, the first from the <em>high-surrogates</em>
125
* range, (&#92;uD800-&#92;uDBFF), the second from the
126
* <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
127
*
128
* <p>A {@code char} value, therefore, represents Basic
129
* Multilingual Plane (BMP) code points, including the surrogate
130
* code points, or code units of the UTF-16 encoding. An
131
* {@code int} value represents all Unicode code points,
132
* including supplementary code points. The lower (least significant)
133
* 21 bits of {@code int} are used to represent Unicode code
134
* points and the upper (most significant) 11 bits must be zero.
135
* Unless otherwise specified, the behavior with respect to
136
* supplementary characters and surrogate {@code char} values is
137
* as follows:
138
*
139
* <ul>
140
* <li>The methods that only accept a {@code char} value cannot support
141
* supplementary characters. They treat {@code char} values from the
142
* surrogate ranges as undefined characters. For example,
143
* {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
144
* this specific value if followed by any low-surrogate value in a string
145
* would represent a letter.
146
*
147
* <li>The methods that accept an {@code int} value support all
148
* Unicode characters, including supplementary characters. For
149
* example, {@code Character.isLetter(0x2F81A)} returns
150
* {@code true} because the code point value represents a letter
151
* (a CJK ideograph).
152
* </ul>
153
*
154
* <p>In the Java SE API documentation, <em>Unicode code point</em> is
155
* used for character values in the range between U+0000 and U+10FFFF,
156
* and <em>Unicode code unit</em> is used for 16-bit
157
* {@code char} values that are code units of the <em>UTF-16</em>
158
* encoding. For more information on Unicode terminology, refer to the
159
* <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
160
*
161
* <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a>
162
* class; programmers should treat instances that are
163
* {@linkplain #equals(Object) equal} as interchangeable and should not
164
* use instances for synchronization, or unpredictable behavior may
165
* occur. For example, in a future release, synchronization may fail.
166
*
167
* @author Lee Boynton
168
* @author Guy Steele
169
* @author Akira Tanaka
170
* @author Martin Buchholz
171
* @author Ulf Zibis
172
* @since 1.0
173
*/
174
@jdk.internal.ValueBased
175
public final
176
class Character implements java.io.Serializable, Comparable<Character>, Constable {
177
/**
178
* The minimum radix available for conversion to and from strings.
179
* The constant value of this field is the smallest value permitted
180
* for the radix argument in radix-conversion methods such as the
181
* {@code digit} method, the {@code forDigit} method, and the
182
* {@code toString} method of class {@code Integer}.
183
*
184
* @see Character#digit(char, int)
185
* @see Character#forDigit(int, int)
186
* @see Integer#toString(int, int)
187
* @see Integer#valueOf(String)
188
*/
189
public static final int MIN_RADIX = 2;
190
191
/**
192
* The maximum radix available for conversion to and from strings.
193
* The constant value of this field is the largest value permitted
194
* for the radix argument in radix-conversion methods such as the
195
* {@code digit} method, the {@code forDigit} method, and the
196
* {@code toString} method of class {@code Integer}.
197
*
198
* @see Character#digit(char, int)
199
* @see Character#forDigit(int, int)
200
* @see Integer#toString(int, int)
201
* @see Integer#valueOf(String)
202
*/
203
public static final int MAX_RADIX = 36;
204
205
/**
206
* The constant value of this field is the smallest value of type
207
* {@code char}, {@code '\u005Cu0000'}.
208
*
209
* @since 1.0.2
210
*/
211
public static final char MIN_VALUE = '\u0000';
212
213
/**
214
* The constant value of this field is the largest value of type
215
* {@code char}, {@code '\u005CuFFFF'}.
216
*
217
* @since 1.0.2
218
*/
219
public static final char MAX_VALUE = '\uFFFF';
220
221
/**
222
* The {@code Class} instance representing the primitive type
223
* {@code char}.
224
*
225
* @since 1.1
226
*/
227
@SuppressWarnings("unchecked")
228
public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
229
230
/*
231
* Normative general types
232
*/
233
234
/*
235
* General character types
236
*/
237
238
/**
239
* General category "Cn" in the Unicode specification.
240
* @since 1.1
241
*/
242
public static final byte UNASSIGNED = 0;
243
244
/**
245
* General category "Lu" in the Unicode specification.
246
* @since 1.1
247
*/
248
public static final byte UPPERCASE_LETTER = 1;
249
250
/**
251
* General category "Ll" in the Unicode specification.
252
* @since 1.1
253
*/
254
public static final byte LOWERCASE_LETTER = 2;
255
256
/**
257
* General category "Lt" in the Unicode specification.
258
* @since 1.1
259
*/
260
public static final byte TITLECASE_LETTER = 3;
261
262
/**
263
* General category "Lm" in the Unicode specification.
264
* @since 1.1
265
*/
266
public static final byte MODIFIER_LETTER = 4;
267
268
/**
269
* General category "Lo" in the Unicode specification.
270
* @since 1.1
271
*/
272
public static final byte OTHER_LETTER = 5;
273
274
/**
275
* General category "Mn" in the Unicode specification.
276
* @since 1.1
277
*/
278
public static final byte NON_SPACING_MARK = 6;
279
280
/**
281
* General category "Me" in the Unicode specification.
282
* @since 1.1
283
*/
284
public static final byte ENCLOSING_MARK = 7;
285
286
/**
287
* General category "Mc" in the Unicode specification.
288
* @since 1.1
289
*/
290
public static final byte COMBINING_SPACING_MARK = 8;
291
292
/**
293
* General category "Nd" in the Unicode specification.
294
* @since 1.1
295
*/
296
public static final byte DECIMAL_DIGIT_NUMBER = 9;
297
298
/**
299
* General category "Nl" in the Unicode specification.
300
* @since 1.1
301
*/
302
public static final byte LETTER_NUMBER = 10;
303
304
/**
305
* General category "No" in the Unicode specification.
306
* @since 1.1
307
*/
308
public static final byte OTHER_NUMBER = 11;
309
310
/**
311
* General category "Zs" in the Unicode specification.
312
* @since 1.1
313
*/
314
public static final byte SPACE_SEPARATOR = 12;
315
316
/**
317
* General category "Zl" in the Unicode specification.
318
* @since 1.1
319
*/
320
public static final byte LINE_SEPARATOR = 13;
321
322
/**
323
* General category "Zp" in the Unicode specification.
324
* @since 1.1
325
*/
326
public static final byte PARAGRAPH_SEPARATOR = 14;
327
328
/**
329
* General category "Cc" in the Unicode specification.
330
* @since 1.1
331
*/
332
public static final byte CONTROL = 15;
333
334
/**
335
* General category "Cf" in the Unicode specification.
336
* @since 1.1
337
*/
338
public static final byte FORMAT = 16;
339
340
/**
341
* General category "Co" in the Unicode specification.
342
* @since 1.1
343
*/
344
public static final byte PRIVATE_USE = 18;
345
346
/**
347
* General category "Cs" in the Unicode specification.
348
* @since 1.1
349
*/
350
public static final byte SURROGATE = 19;
351
352
/**
353
* General category "Pd" in the Unicode specification.
354
* @since 1.1
355
*/
356
public static final byte DASH_PUNCTUATION = 20;
357
358
/**
359
* General category "Ps" in the Unicode specification.
360
* @since 1.1
361
*/
362
public static final byte START_PUNCTUATION = 21;
363
364
/**
365
* General category "Pe" in the Unicode specification.
366
* @since 1.1
367
*/
368
public static final byte END_PUNCTUATION = 22;
369
370
/**
371
* General category "Pc" in the Unicode specification.
372
* @since 1.1
373
*/
374
public static final byte CONNECTOR_PUNCTUATION = 23;
375
376
/**
377
* General category "Po" in the Unicode specification.
378
* @since 1.1
379
*/
380
public static final byte OTHER_PUNCTUATION = 24;
381
382
/**
383
* General category "Sm" in the Unicode specification.
384
* @since 1.1
385
*/
386
public static final byte MATH_SYMBOL = 25;
387
388
/**
389
* General category "Sc" in the Unicode specification.
390
* @since 1.1
391
*/
392
public static final byte CURRENCY_SYMBOL = 26;
393
394
/**
395
* General category "Sk" in the Unicode specification.
396
* @since 1.1
397
*/
398
public static final byte MODIFIER_SYMBOL = 27;
399
400
/**
401
* General category "So" in the Unicode specification.
402
* @since 1.1
403
*/
404
public static final byte OTHER_SYMBOL = 28;
405
406
/**
407
* General category "Pi" in the Unicode specification.
408
* @since 1.4
409
*/
410
public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
411
412
/**
413
* General category "Pf" in the Unicode specification.
414
* @since 1.4
415
*/
416
public static final byte FINAL_QUOTE_PUNCTUATION = 30;
417
418
/**
419
* Error flag. Use int (code point) to avoid confusion with U+FFFF.
420
*/
421
static final int ERROR = 0xFFFFFFFF;
422
423
424
/**
425
* Undefined bidirectional character type. Undefined {@code char}
426
* values have undefined directionality in the Unicode specification.
427
* @since 1.4
428
*/
429
public static final byte DIRECTIONALITY_UNDEFINED = -1;
430
431
/**
432
* Strong bidirectional character type "L" in the Unicode specification.
433
* @since 1.4
434
*/
435
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
436
437
/**
438
* Strong bidirectional character type "R" in the Unicode specification.
439
* @since 1.4
440
*/
441
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
442
443
/**
444
* Strong bidirectional character type "AL" in the Unicode specification.
445
* @since 1.4
446
*/
447
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
448
449
/**
450
* Weak bidirectional character type "EN" in the Unicode specification.
451
* @since 1.4
452
*/
453
public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
454
455
/**
456
* Weak bidirectional character type "ES" in the Unicode specification.
457
* @since 1.4
458
*/
459
public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
460
461
/**
462
* Weak bidirectional character type "ET" in the Unicode specification.
463
* @since 1.4
464
*/
465
public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
466
467
/**
468
* Weak bidirectional character type "AN" in the Unicode specification.
469
* @since 1.4
470
*/
471
public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
472
473
/**
474
* Weak bidirectional character type "CS" in the Unicode specification.
475
* @since 1.4
476
*/
477
public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
478
479
/**
480
* Weak bidirectional character type "NSM" in the Unicode specification.
481
* @since 1.4
482
*/
483
public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
484
485
/**
486
* Weak bidirectional character type "BN" in the Unicode specification.
487
* @since 1.4
488
*/
489
public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
490
491
/**
492
* Neutral bidirectional character type "B" in the Unicode specification.
493
* @since 1.4
494
*/
495
public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
496
497
/**
498
* Neutral bidirectional character type "S" in the Unicode specification.
499
* @since 1.4
500
*/
501
public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
502
503
/**
504
* Neutral bidirectional character type "WS" in the Unicode specification.
505
* @since 1.4
506
*/
507
public static final byte DIRECTIONALITY_WHITESPACE = 12;
508
509
/**
510
* Neutral bidirectional character type "ON" in the Unicode specification.
511
* @since 1.4
512
*/
513
public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
514
515
/**
516
* Strong bidirectional character type "LRE" in the Unicode specification.
517
* @since 1.4
518
*/
519
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
520
521
/**
522
* Strong bidirectional character type "LRO" in the Unicode specification.
523
* @since 1.4
524
*/
525
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
526
527
/**
528
* Strong bidirectional character type "RLE" in the Unicode specification.
529
* @since 1.4
530
*/
531
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
532
533
/**
534
* Strong bidirectional character type "RLO" in the Unicode specification.
535
* @since 1.4
536
*/
537
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
538
539
/**
540
* Weak bidirectional character type "PDF" in the Unicode specification.
541
* @since 1.4
542
*/
543
public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
544
545
/**
546
* Weak bidirectional character type "LRI" in the Unicode specification.
547
* @since 9
548
*/
549
public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
550
551
/**
552
* Weak bidirectional character type "RLI" in the Unicode specification.
553
* @since 9
554
*/
555
public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
556
557
/**
558
* Weak bidirectional character type "FSI" in the Unicode specification.
559
* @since 9
560
*/
561
public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
562
563
/**
564
* Weak bidirectional character type "PDI" in the Unicode specification.
565
* @since 9
566
*/
567
public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
568
569
/**
570
* The minimum value of a
571
* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
572
* Unicode high-surrogate code unit</a>
573
* in the UTF-16 encoding, constant {@code '\u005CuD800'}.
574
* A high-surrogate is also known as a <i>leading-surrogate</i>.
575
*
576
* @since 1.5
577
*/
578
public static final char MIN_HIGH_SURROGATE = '\uD800';
579
580
/**
581
* The maximum value of a
582
* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
583
* Unicode high-surrogate code unit</a>
584
* in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
585
* A high-surrogate is also known as a <i>leading-surrogate</i>.
586
*
587
* @since 1.5
588
*/
589
public static final char MAX_HIGH_SURROGATE = '\uDBFF';
590
591
/**
592
* The minimum value of a
593
* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
594
* Unicode low-surrogate code unit</a>
595
* in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
596
* A low-surrogate is also known as a <i>trailing-surrogate</i>.
597
*
598
* @since 1.5
599
*/
600
public static final char MIN_LOW_SURROGATE = '\uDC00';
601
602
/**
603
* The maximum value of a
604
* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
605
* Unicode low-surrogate code unit</a>
606
* in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
607
* A low-surrogate is also known as a <i>trailing-surrogate</i>.
608
*
609
* @since 1.5
610
*/
611
public static final char MAX_LOW_SURROGATE = '\uDFFF';
612
613
/**
614
* The minimum value of a Unicode surrogate code unit in the
615
* UTF-16 encoding, constant {@code '\u005CuD800'}.
616
*
617
* @since 1.5
618
*/
619
public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
620
621
/**
622
* The maximum value of a Unicode surrogate code unit in the
623
* UTF-16 encoding, constant {@code '\u005CuDFFF'}.
624
*
625
* @since 1.5
626
*/
627
public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
628
629
/**
630
* The minimum value of a
631
* <a href="http://www.unicode.org/glossary/#supplementary_code_point">
632
* Unicode supplementary code point</a>, constant {@code U+10000}.
633
*
634
* @since 1.5
635
*/
636
public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
637
638
/**
639
* The minimum value of a
640
* <a href="http://www.unicode.org/glossary/#code_point">
641
* Unicode code point</a>, constant {@code U+0000}.
642
*
643
* @since 1.5
644
*/
645
public static final int MIN_CODE_POINT = 0x000000;
646
647
/**
648
* The maximum value of a
649
* <a href="http://www.unicode.org/glossary/#code_point">
650
* Unicode code point</a>, constant {@code U+10FFFF}.
651
*
652
* @since 1.5
653
*/
654
public static final int MAX_CODE_POINT = 0X10FFFF;
655
656
/**
657
* Returns an {@link Optional} containing the nominal descriptor for this
658
* instance.
659
*
660
* @return an {@link Optional} describing the {@linkplain Character} instance
661
* @since 15
662
*/
663
@Override
664
public Optional<DynamicConstantDesc<Character>> describeConstable() {
665
return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value));
666
}
667
668
/**
669
* Instances of this class represent particular subsets of the Unicode
670
* character set. The only family of subsets defined in the
671
* {@code Character} class is {@link Character.UnicodeBlock}.
672
* Other portions of the Java API may define other subsets for their
673
* own purposes.
674
*
675
* @since 1.2
676
*/
677
public static class Subset {
678
679
private String name;
680
681
/**
682
* Constructs a new {@code Subset} instance.
683
*
684
* @param name The name of this subset
685
* @throws NullPointerException if name is {@code null}
686
*/
687
protected Subset(String name) {
688
if (name == null) {
689
throw new NullPointerException("name");
690
}
691
this.name = name;
692
}
693
694
/**
695
* Compares two {@code Subset} objects for equality.
696
* This method returns {@code true} if and only if
697
* {@code this} and the argument refer to the same
698
* object; since this method is {@code final}, this
699
* guarantee holds for all subclasses.
700
*/
701
public final boolean equals(Object obj) {
702
return (this == obj);
703
}
704
705
/**
706
* Returns the standard hash code as defined by the
707
* {@link Object#hashCode} method. This method
708
* is {@code final} in order to ensure that the
709
* {@code equals} and {@code hashCode} methods will
710
* be consistent in all subclasses.
711
*/
712
public final int hashCode() {
713
return super.hashCode();
714
}
715
716
/**
717
* Returns the name of this subset.
718
*/
719
public final String toString() {
720
return name;
721
}
722
}
723
724
// See http://www.unicode.org/Public/UNIDATA/Blocks.txt
725
// for the latest specification of Unicode Blocks.
726
727
/**
728
* A family of character subsets representing the character blocks in the
729
* Unicode specification. Character blocks generally define characters
730
* used for a specific script or purpose. A character is contained by
731
* at most one Unicode block.
732
*
733
* @since 1.2
734
*/
735
public static final class UnicodeBlock extends Subset {
736
/**
737
* 684 - the expected number of entities
738
* 0.75 - the default load factor of HashMap
739
*/
740
private static final int NUM_ENTITIES = 684;
741
private static Map<String, UnicodeBlock> map =
742
new HashMap<>((int)(NUM_ENTITIES / 0.75f + 1.0f));
743
744
/**
745
* Creates a UnicodeBlock with the given identifier name.
746
* This name must be the same as the block identifier.
747
*/
748
private UnicodeBlock(String idName) {
749
super(idName);
750
map.put(idName, this);
751
}
752
753
/**
754
* Creates a UnicodeBlock with the given identifier name and
755
* alias name.
756
*/
757
private UnicodeBlock(String idName, String alias) {
758
this(idName);
759
map.put(alias, this);
760
}
761
762
/**
763
* Creates a UnicodeBlock with the given identifier name and
764
* alias names.
765
*/
766
private UnicodeBlock(String idName, String... aliases) {
767
this(idName);
768
for (String alias : aliases)
769
map.put(alias, this);
770
}
771
772
/**
773
* Constant for the "Basic Latin" Unicode character block.
774
* @since 1.2
775
*/
776
public static final UnicodeBlock BASIC_LATIN =
777
new UnicodeBlock("BASIC_LATIN",
778
"BASIC LATIN",
779
"BASICLATIN");
780
781
/**
782
* Constant for the "Latin-1 Supplement" Unicode character block.
783
* @since 1.2
784
*/
785
public static final UnicodeBlock LATIN_1_SUPPLEMENT =
786
new UnicodeBlock("LATIN_1_SUPPLEMENT",
787
"LATIN-1 SUPPLEMENT",
788
"LATIN-1SUPPLEMENT");
789
790
/**
791
* Constant for the "Latin Extended-A" Unicode character block.
792
* @since 1.2
793
*/
794
public static final UnicodeBlock LATIN_EXTENDED_A =
795
new UnicodeBlock("LATIN_EXTENDED_A",
796
"LATIN EXTENDED-A",
797
"LATINEXTENDED-A");
798
799
/**
800
* Constant for the "Latin Extended-B" Unicode character block.
801
* @since 1.2
802
*/
803
public static final UnicodeBlock LATIN_EXTENDED_B =
804
new UnicodeBlock("LATIN_EXTENDED_B",
805
"LATIN EXTENDED-B",
806
"LATINEXTENDED-B");
807
808
/**
809
* Constant for the "IPA Extensions" Unicode character block.
810
* @since 1.2
811
*/
812
public static final UnicodeBlock IPA_EXTENSIONS =
813
new UnicodeBlock("IPA_EXTENSIONS",
814
"IPA EXTENSIONS",
815
"IPAEXTENSIONS");
816
817
/**
818
* Constant for the "Spacing Modifier Letters" Unicode character block.
819
* @since 1.2
820
*/
821
public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
822
new UnicodeBlock("SPACING_MODIFIER_LETTERS",
823
"SPACING MODIFIER LETTERS",
824
"SPACINGMODIFIERLETTERS");
825
826
/**
827
* Constant for the "Combining Diacritical Marks" Unicode character block.
828
* @since 1.2
829
*/
830
public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
831
new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
832
"COMBINING DIACRITICAL MARKS",
833
"COMBININGDIACRITICALMARKS");
834
835
/**
836
* Constant for the "Greek and Coptic" Unicode character block.
837
* <p>
838
* This block was previously known as the "Greek" block.
839
*
840
* @since 1.2
841
*/
842
public static final UnicodeBlock GREEK =
843
new UnicodeBlock("GREEK",
844
"GREEK AND COPTIC",
845
"GREEKANDCOPTIC");
846
847
/**
848
* Constant for the "Cyrillic" Unicode character block.
849
* @since 1.2
850
*/
851
public static final UnicodeBlock CYRILLIC =
852
new UnicodeBlock("CYRILLIC");
853
854
/**
855
* Constant for the "Armenian" Unicode character block.
856
* @since 1.2
857
*/
858
public static final UnicodeBlock ARMENIAN =
859
new UnicodeBlock("ARMENIAN");
860
861
/**
862
* Constant for the "Hebrew" Unicode character block.
863
* @since 1.2
864
*/
865
public static final UnicodeBlock HEBREW =
866
new UnicodeBlock("HEBREW");
867
868
/**
869
* Constant for the "Arabic" Unicode character block.
870
* @since 1.2
871
*/
872
public static final UnicodeBlock ARABIC =
873
new UnicodeBlock("ARABIC");
874
875
/**
876
* Constant for the "Devanagari" Unicode character block.
877
* @since 1.2
878
*/
879
public static final UnicodeBlock DEVANAGARI =
880
new UnicodeBlock("DEVANAGARI");
881
882
/**
883
* Constant for the "Bengali" Unicode character block.
884
* @since 1.2
885
*/
886
public static final UnicodeBlock BENGALI =
887
new UnicodeBlock("BENGALI");
888
889
/**
890
* Constant for the "Gurmukhi" Unicode character block.
891
* @since 1.2
892
*/
893
public static final UnicodeBlock GURMUKHI =
894
new UnicodeBlock("GURMUKHI");
895
896
/**
897
* Constant for the "Gujarati" Unicode character block.
898
* @since 1.2
899
*/
900
public static final UnicodeBlock GUJARATI =
901
new UnicodeBlock("GUJARATI");
902
903
/**
904
* Constant for the "Oriya" Unicode character block.
905
* @since 1.2
906
*/
907
public static final UnicodeBlock ORIYA =
908
new UnicodeBlock("ORIYA");
909
910
/**
911
* Constant for the "Tamil" Unicode character block.
912
* @since 1.2
913
*/
914
public static final UnicodeBlock TAMIL =
915
new UnicodeBlock("TAMIL");
916
917
/**
918
* Constant for the "Telugu" Unicode character block.
919
* @since 1.2
920
*/
921
public static final UnicodeBlock TELUGU =
922
new UnicodeBlock("TELUGU");
923
924
/**
925
* Constant for the "Kannada" Unicode character block.
926
* @since 1.2
927
*/
928
public static final UnicodeBlock KANNADA =
929
new UnicodeBlock("KANNADA");
930
931
/**
932
* Constant for the "Malayalam" Unicode character block.
933
* @since 1.2
934
*/
935
public static final UnicodeBlock MALAYALAM =
936
new UnicodeBlock("MALAYALAM");
937
938
/**
939
* Constant for the "Thai" Unicode character block.
940
* @since 1.2
941
*/
942
public static final UnicodeBlock THAI =
943
new UnicodeBlock("THAI");
944
945
/**
946
* Constant for the "Lao" Unicode character block.
947
* @since 1.2
948
*/
949
public static final UnicodeBlock LAO =
950
new UnicodeBlock("LAO");
951
952
/**
953
* Constant for the "Tibetan" Unicode character block.
954
* @since 1.2
955
*/
956
public static final UnicodeBlock TIBETAN =
957
new UnicodeBlock("TIBETAN");
958
959
/**
960
* Constant for the "Georgian" Unicode character block.
961
* @since 1.2
962
*/
963
public static final UnicodeBlock GEORGIAN =
964
new UnicodeBlock("GEORGIAN");
965
966
/**
967
* Constant for the "Hangul Jamo" Unicode character block.
968
* @since 1.2
969
*/
970
public static final UnicodeBlock HANGUL_JAMO =
971
new UnicodeBlock("HANGUL_JAMO",
972
"HANGUL JAMO",
973
"HANGULJAMO");
974
975
/**
976
* Constant for the "Latin Extended Additional" Unicode character block.
977
* @since 1.2
978
*/
979
public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
980
new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
981
"LATIN EXTENDED ADDITIONAL",
982
"LATINEXTENDEDADDITIONAL");
983
984
/**
985
* Constant for the "Greek Extended" Unicode character block.
986
* @since 1.2
987
*/
988
public static final UnicodeBlock GREEK_EXTENDED =
989
new UnicodeBlock("GREEK_EXTENDED",
990
"GREEK EXTENDED",
991
"GREEKEXTENDED");
992
993
/**
994
* Constant for the "General Punctuation" Unicode character block.
995
* @since 1.2
996
*/
997
public static final UnicodeBlock GENERAL_PUNCTUATION =
998
new UnicodeBlock("GENERAL_PUNCTUATION",
999
"GENERAL PUNCTUATION",
1000
"GENERALPUNCTUATION");
1001
1002
/**
1003
* Constant for the "Superscripts and Subscripts" Unicode character
1004
* block.
1005
* @since 1.2
1006
*/
1007
public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
1008
new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
1009
"SUPERSCRIPTS AND SUBSCRIPTS",
1010
"SUPERSCRIPTSANDSUBSCRIPTS");
1011
1012
/**
1013
* Constant for the "Currency Symbols" Unicode character block.
1014
* @since 1.2
1015
*/
1016
public static final UnicodeBlock CURRENCY_SYMBOLS =
1017
new UnicodeBlock("CURRENCY_SYMBOLS",
1018
"CURRENCY SYMBOLS",
1019
"CURRENCYSYMBOLS");
1020
1021
/**
1022
* Constant for the "Combining Diacritical Marks for Symbols" Unicode
1023
* character block.
1024
* <p>
1025
* This block was previously known as "Combining Marks for Symbols".
1026
* @since 1.2
1027
*/
1028
public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
1029
new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
1030
"COMBINING DIACRITICAL MARKS FOR SYMBOLS",
1031
"COMBININGDIACRITICALMARKSFORSYMBOLS",
1032
"COMBINING MARKS FOR SYMBOLS",
1033
"COMBININGMARKSFORSYMBOLS");
1034
1035
/**
1036
* Constant for the "Letterlike Symbols" Unicode character block.
1037
* @since 1.2
1038
*/
1039
public static final UnicodeBlock LETTERLIKE_SYMBOLS =
1040
new UnicodeBlock("LETTERLIKE_SYMBOLS",
1041
"LETTERLIKE SYMBOLS",
1042
"LETTERLIKESYMBOLS");
1043
1044
/**
1045
* Constant for the "Number Forms" Unicode character block.
1046
* @since 1.2
1047
*/
1048
public static final UnicodeBlock NUMBER_FORMS =
1049
new UnicodeBlock("NUMBER_FORMS",
1050
"NUMBER FORMS",
1051
"NUMBERFORMS");
1052
1053
/**
1054
* Constant for the "Arrows" Unicode character block.
1055
* @since 1.2
1056
*/
1057
public static final UnicodeBlock ARROWS =
1058
new UnicodeBlock("ARROWS");
1059
1060
/**
1061
* Constant for the "Mathematical Operators" Unicode character block.
1062
* @since 1.2
1063
*/
1064
public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1065
new UnicodeBlock("MATHEMATICAL_OPERATORS",
1066
"MATHEMATICAL OPERATORS",
1067
"MATHEMATICALOPERATORS");
1068
1069
/**
1070
* Constant for the "Miscellaneous Technical" Unicode character block.
1071
* @since 1.2
1072
*/
1073
public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1074
new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1075
"MISCELLANEOUS TECHNICAL",
1076
"MISCELLANEOUSTECHNICAL");
1077
1078
/**
1079
* Constant for the "Control Pictures" Unicode character block.
1080
* @since 1.2
1081
*/
1082
public static final UnicodeBlock CONTROL_PICTURES =
1083
new UnicodeBlock("CONTROL_PICTURES",
1084
"CONTROL PICTURES",
1085
"CONTROLPICTURES");
1086
1087
/**
1088
* Constant for the "Optical Character Recognition" Unicode character block.
1089
* @since 1.2
1090
*/
1091
public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1092
new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1093
"OPTICAL CHARACTER RECOGNITION",
1094
"OPTICALCHARACTERRECOGNITION");
1095
1096
/**
1097
* Constant for the "Enclosed Alphanumerics" Unicode character block.
1098
* @since 1.2
1099
*/
1100
public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1101
new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1102
"ENCLOSED ALPHANUMERICS",
1103
"ENCLOSEDALPHANUMERICS");
1104
1105
/**
1106
* Constant for the "Box Drawing" Unicode character block.
1107
* @since 1.2
1108
*/
1109
public static final UnicodeBlock BOX_DRAWING =
1110
new UnicodeBlock("BOX_DRAWING",
1111
"BOX DRAWING",
1112
"BOXDRAWING");
1113
1114
/**
1115
* Constant for the "Block Elements" Unicode character block.
1116
* @since 1.2
1117
*/
1118
public static final UnicodeBlock BLOCK_ELEMENTS =
1119
new UnicodeBlock("BLOCK_ELEMENTS",
1120
"BLOCK ELEMENTS",
1121
"BLOCKELEMENTS");
1122
1123
/**
1124
* Constant for the "Geometric Shapes" Unicode character block.
1125
* @since 1.2
1126
*/
1127
public static final UnicodeBlock GEOMETRIC_SHAPES =
1128
new UnicodeBlock("GEOMETRIC_SHAPES",
1129
"GEOMETRIC SHAPES",
1130
"GEOMETRICSHAPES");
1131
1132
/**
1133
* Constant for the "Miscellaneous Symbols" Unicode character block.
1134
* @since 1.2
1135
*/
1136
public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1137
new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1138
"MISCELLANEOUS SYMBOLS",
1139
"MISCELLANEOUSSYMBOLS");
1140
1141
/**
1142
* Constant for the "Dingbats" Unicode character block.
1143
* @since 1.2
1144
*/
1145
public static final UnicodeBlock DINGBATS =
1146
new UnicodeBlock("DINGBATS");
1147
1148
/**
1149
* Constant for the "CJK Symbols and Punctuation" Unicode character block.
1150
* @since 1.2
1151
*/
1152
public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1153
new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1154
"CJK SYMBOLS AND PUNCTUATION",
1155
"CJKSYMBOLSANDPUNCTUATION");
1156
1157
/**
1158
* Constant for the "Hiragana" Unicode character block.
1159
* @since 1.2
1160
*/
1161
public static final UnicodeBlock HIRAGANA =
1162
new UnicodeBlock("HIRAGANA");
1163
1164
/**
1165
* Constant for the "Katakana" Unicode character block.
1166
* @since 1.2
1167
*/
1168
public static final UnicodeBlock KATAKANA =
1169
new UnicodeBlock("KATAKANA");
1170
1171
/**
1172
* Constant for the "Bopomofo" Unicode character block.
1173
* @since 1.2
1174
*/
1175
public static final UnicodeBlock BOPOMOFO =
1176
new UnicodeBlock("BOPOMOFO");
1177
1178
/**
1179
* Constant for the "Hangul Compatibility Jamo" Unicode character block.
1180
* @since 1.2
1181
*/
1182
public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1183
new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1184
"HANGUL COMPATIBILITY JAMO",
1185
"HANGULCOMPATIBILITYJAMO");
1186
1187
/**
1188
* Constant for the "Kanbun" Unicode character block.
1189
* @since 1.2
1190
*/
1191
public static final UnicodeBlock KANBUN =
1192
new UnicodeBlock("KANBUN");
1193
1194
/**
1195
* Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1196
* @since 1.2
1197
*/
1198
public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1199
new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1200
"ENCLOSED CJK LETTERS AND MONTHS",
1201
"ENCLOSEDCJKLETTERSANDMONTHS");
1202
1203
/**
1204
* Constant for the "CJK Compatibility" Unicode character block.
1205
* @since 1.2
1206
*/
1207
public static final UnicodeBlock CJK_COMPATIBILITY =
1208
new UnicodeBlock("CJK_COMPATIBILITY",
1209
"CJK COMPATIBILITY",
1210
"CJKCOMPATIBILITY");
1211
1212
/**
1213
* Constant for the "CJK Unified Ideographs" Unicode character block.
1214
* @since 1.2
1215
*/
1216
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1217
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1218
"CJK UNIFIED IDEOGRAPHS",
1219
"CJKUNIFIEDIDEOGRAPHS");
1220
1221
/**
1222
* Constant for the "Hangul Syllables" Unicode character block.
1223
* @since 1.2
1224
*/
1225
public static final UnicodeBlock HANGUL_SYLLABLES =
1226
new UnicodeBlock("HANGUL_SYLLABLES",
1227
"HANGUL SYLLABLES",
1228
"HANGULSYLLABLES");
1229
1230
/**
1231
* Constant for the "Private Use Area" Unicode character block.
1232
* @since 1.2
1233
*/
1234
public static final UnicodeBlock PRIVATE_USE_AREA =
1235
new UnicodeBlock("PRIVATE_USE_AREA",
1236
"PRIVATE USE AREA",
1237
"PRIVATEUSEAREA");
1238
1239
/**
1240
* Constant for the "CJK Compatibility Ideographs" Unicode character
1241
* block.
1242
* @since 1.2
1243
*/
1244
public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1245
new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1246
"CJK COMPATIBILITY IDEOGRAPHS",
1247
"CJKCOMPATIBILITYIDEOGRAPHS");
1248
1249
/**
1250
* Constant for the "Alphabetic Presentation Forms" Unicode character block.
1251
* @since 1.2
1252
*/
1253
public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1254
new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1255
"ALPHABETIC PRESENTATION FORMS",
1256
"ALPHABETICPRESENTATIONFORMS");
1257
1258
/**
1259
* Constant for the "Arabic Presentation Forms-A" Unicode character
1260
* block.
1261
* @since 1.2
1262
*/
1263
public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1264
new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1265
"ARABIC PRESENTATION FORMS-A",
1266
"ARABICPRESENTATIONFORMS-A");
1267
1268
/**
1269
* Constant for the "Combining Half Marks" Unicode character block.
1270
* @since 1.2
1271
*/
1272
public static final UnicodeBlock COMBINING_HALF_MARKS =
1273
new UnicodeBlock("COMBINING_HALF_MARKS",
1274
"COMBINING HALF MARKS",
1275
"COMBININGHALFMARKS");
1276
1277
/**
1278
* Constant for the "CJK Compatibility Forms" Unicode character block.
1279
* @since 1.2
1280
*/
1281
public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1282
new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1283
"CJK COMPATIBILITY FORMS",
1284
"CJKCOMPATIBILITYFORMS");
1285
1286
/**
1287
* Constant for the "Small Form Variants" Unicode character block.
1288
* @since 1.2
1289
*/
1290
public static final UnicodeBlock SMALL_FORM_VARIANTS =
1291
new UnicodeBlock("SMALL_FORM_VARIANTS",
1292
"SMALL FORM VARIANTS",
1293
"SMALLFORMVARIANTS");
1294
1295
/**
1296
* Constant for the "Arabic Presentation Forms-B" Unicode character block.
1297
* @since 1.2
1298
*/
1299
public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1300
new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1301
"ARABIC PRESENTATION FORMS-B",
1302
"ARABICPRESENTATIONFORMS-B");
1303
1304
/**
1305
* Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1306
* block.
1307
* @since 1.2
1308
*/
1309
public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1310
new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1311
"HALFWIDTH AND FULLWIDTH FORMS",
1312
"HALFWIDTHANDFULLWIDTHFORMS");
1313
1314
/**
1315
* Constant for the "Specials" Unicode character block.
1316
* @since 1.2
1317
*/
1318
public static final UnicodeBlock SPECIALS =
1319
new UnicodeBlock("SPECIALS");
1320
1321
/**
1322
* @deprecated
1323
* Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1324
* {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1325
* These constants match the block definitions of the Unicode Standard.
1326
* The {@link #of(char)} and {@link #of(int)} methods return the
1327
* standard constants.
1328
*/
1329
@Deprecated(since="1.5")
1330
public static final UnicodeBlock SURROGATES_AREA =
1331
new UnicodeBlock("SURROGATES_AREA");
1332
1333
/**
1334
* Constant for the "Syriac" Unicode character block.
1335
* @since 1.4
1336
*/
1337
public static final UnicodeBlock SYRIAC =
1338
new UnicodeBlock("SYRIAC");
1339
1340
/**
1341
* Constant for the "Thaana" Unicode character block.
1342
* @since 1.4
1343
*/
1344
public static final UnicodeBlock THAANA =
1345
new UnicodeBlock("THAANA");
1346
1347
/**
1348
* Constant for the "Sinhala" Unicode character block.
1349
* @since 1.4
1350
*/
1351
public static final UnicodeBlock SINHALA =
1352
new UnicodeBlock("SINHALA");
1353
1354
/**
1355
* Constant for the "Myanmar" Unicode character block.
1356
* @since 1.4
1357
*/
1358
public static final UnicodeBlock MYANMAR =
1359
new UnicodeBlock("MYANMAR");
1360
1361
/**
1362
* Constant for the "Ethiopic" Unicode character block.
1363
* @since 1.4
1364
*/
1365
public static final UnicodeBlock ETHIOPIC =
1366
new UnicodeBlock("ETHIOPIC");
1367
1368
/**
1369
* Constant for the "Cherokee" Unicode character block.
1370
* @since 1.4
1371
*/
1372
public static final UnicodeBlock CHEROKEE =
1373
new UnicodeBlock("CHEROKEE");
1374
1375
/**
1376
* Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1377
* @since 1.4
1378
*/
1379
public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1380
new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1381
"UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1382
"UNIFIEDCANADIANABORIGINALSYLLABICS");
1383
1384
/**
1385
* Constant for the "Ogham" Unicode character block.
1386
* @since 1.4
1387
*/
1388
public static final UnicodeBlock OGHAM =
1389
new UnicodeBlock("OGHAM");
1390
1391
/**
1392
* Constant for the "Runic" Unicode character block.
1393
* @since 1.4
1394
*/
1395
public static final UnicodeBlock RUNIC =
1396
new UnicodeBlock("RUNIC");
1397
1398
/**
1399
* Constant for the "Khmer" Unicode character block.
1400
* @since 1.4
1401
*/
1402
public static final UnicodeBlock KHMER =
1403
new UnicodeBlock("KHMER");
1404
1405
/**
1406
* Constant for the "Mongolian" Unicode character block.
1407
* @since 1.4
1408
*/
1409
public static final UnicodeBlock MONGOLIAN =
1410
new UnicodeBlock("MONGOLIAN");
1411
1412
/**
1413
* Constant for the "Braille Patterns" Unicode character block.
1414
* @since 1.4
1415
*/
1416
public static final UnicodeBlock BRAILLE_PATTERNS =
1417
new UnicodeBlock("BRAILLE_PATTERNS",
1418
"BRAILLE PATTERNS",
1419
"BRAILLEPATTERNS");
1420
1421
/**
1422
* Constant for the "CJK Radicals Supplement" Unicode character block.
1423
* @since 1.4
1424
*/
1425
public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1426
new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1427
"CJK RADICALS SUPPLEMENT",
1428
"CJKRADICALSSUPPLEMENT");
1429
1430
/**
1431
* Constant for the "Kangxi Radicals" Unicode character block.
1432
* @since 1.4
1433
*/
1434
public static final UnicodeBlock KANGXI_RADICALS =
1435
new UnicodeBlock("KANGXI_RADICALS",
1436
"KANGXI RADICALS",
1437
"KANGXIRADICALS");
1438
1439
/**
1440
* Constant for the "Ideographic Description Characters" Unicode character block.
1441
* @since 1.4
1442
*/
1443
public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1444
new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1445
"IDEOGRAPHIC DESCRIPTION CHARACTERS",
1446
"IDEOGRAPHICDESCRIPTIONCHARACTERS");
1447
1448
/**
1449
* Constant for the "Bopomofo Extended" Unicode character block.
1450
* @since 1.4
1451
*/
1452
public static final UnicodeBlock BOPOMOFO_EXTENDED =
1453
new UnicodeBlock("BOPOMOFO_EXTENDED",
1454
"BOPOMOFO EXTENDED",
1455
"BOPOMOFOEXTENDED");
1456
1457
/**
1458
* Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1459
* @since 1.4
1460
*/
1461
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1462
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1463
"CJK UNIFIED IDEOGRAPHS EXTENSION A",
1464
"CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1465
1466
/**
1467
* Constant for the "Yi Syllables" Unicode character block.
1468
* @since 1.4
1469
*/
1470
public static final UnicodeBlock YI_SYLLABLES =
1471
new UnicodeBlock("YI_SYLLABLES",
1472
"YI SYLLABLES",
1473
"YISYLLABLES");
1474
1475
/**
1476
* Constant for the "Yi Radicals" Unicode character block.
1477
* @since 1.4
1478
*/
1479
public static final UnicodeBlock YI_RADICALS =
1480
new UnicodeBlock("YI_RADICALS",
1481
"YI RADICALS",
1482
"YIRADICALS");
1483
1484
/**
1485
* Constant for the "Cyrillic Supplement" Unicode character block.
1486
* This block was previously known as the "Cyrillic Supplementary" block.
1487
* @since 1.5
1488
*/
1489
public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1490
new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1491
"CYRILLIC SUPPLEMENTARY",
1492
"CYRILLICSUPPLEMENTARY",
1493
"CYRILLIC SUPPLEMENT",
1494
"CYRILLICSUPPLEMENT");
1495
1496
/**
1497
* Constant for the "Tagalog" Unicode character block.
1498
* @since 1.5
1499
*/
1500
public static final UnicodeBlock TAGALOG =
1501
new UnicodeBlock("TAGALOG");
1502
1503
/**
1504
* Constant for the "Hanunoo" Unicode character block.
1505
* @since 1.5
1506
*/
1507
public static final UnicodeBlock HANUNOO =
1508
new UnicodeBlock("HANUNOO");
1509
1510
/**
1511
* Constant for the "Buhid" Unicode character block.
1512
* @since 1.5
1513
*/
1514
public static final UnicodeBlock BUHID =
1515
new UnicodeBlock("BUHID");
1516
1517
/**
1518
* Constant for the "Tagbanwa" Unicode character block.
1519
* @since 1.5
1520
*/
1521
public static final UnicodeBlock TAGBANWA =
1522
new UnicodeBlock("TAGBANWA");
1523
1524
/**
1525
* Constant for the "Limbu" Unicode character block.
1526
* @since 1.5
1527
*/
1528
public static final UnicodeBlock LIMBU =
1529
new UnicodeBlock("LIMBU");
1530
1531
/**
1532
* Constant for the "Tai Le" Unicode character block.
1533
* @since 1.5
1534
*/
1535
public static final UnicodeBlock TAI_LE =
1536
new UnicodeBlock("TAI_LE",
1537
"TAI LE",
1538
"TAILE");
1539
1540
/**
1541
* Constant for the "Khmer Symbols" Unicode character block.
1542
* @since 1.5
1543
*/
1544
public static final UnicodeBlock KHMER_SYMBOLS =
1545
new UnicodeBlock("KHMER_SYMBOLS",
1546
"KHMER SYMBOLS",
1547
"KHMERSYMBOLS");
1548
1549
/**
1550
* Constant for the "Phonetic Extensions" Unicode character block.
1551
* @since 1.5
1552
*/
1553
public static final UnicodeBlock PHONETIC_EXTENSIONS =
1554
new UnicodeBlock("PHONETIC_EXTENSIONS",
1555
"PHONETIC EXTENSIONS",
1556
"PHONETICEXTENSIONS");
1557
1558
/**
1559
* Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1560
* @since 1.5
1561
*/
1562
public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1563
new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1564
"MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1565
"MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1566
1567
/**
1568
* Constant for the "Supplemental Arrows-A" Unicode character block.
1569
* @since 1.5
1570
*/
1571
public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1572
new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1573
"SUPPLEMENTAL ARROWS-A",
1574
"SUPPLEMENTALARROWS-A");
1575
1576
/**
1577
* Constant for the "Supplemental Arrows-B" Unicode character block.
1578
* @since 1.5
1579
*/
1580
public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1581
new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1582
"SUPPLEMENTAL ARROWS-B",
1583
"SUPPLEMENTALARROWS-B");
1584
1585
/**
1586
* Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1587
* character block.
1588
* @since 1.5
1589
*/
1590
public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1591
new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1592
"MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1593
"MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1594
1595
/**
1596
* Constant for the "Supplemental Mathematical Operators" Unicode
1597
* character block.
1598
* @since 1.5
1599
*/
1600
public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1601
new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1602
"SUPPLEMENTAL MATHEMATICAL OPERATORS",
1603
"SUPPLEMENTALMATHEMATICALOPERATORS");
1604
1605
/**
1606
* Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1607
* block.
1608
* @since 1.5
1609
*/
1610
public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1611
new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1612
"MISCELLANEOUS SYMBOLS AND ARROWS",
1613
"MISCELLANEOUSSYMBOLSANDARROWS");
1614
1615
/**
1616
* Constant for the "Katakana Phonetic Extensions" Unicode character
1617
* block.
1618
* @since 1.5
1619
*/
1620
public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1621
new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1622
"KATAKANA PHONETIC EXTENSIONS",
1623
"KATAKANAPHONETICEXTENSIONS");
1624
1625
/**
1626
* Constant for the "Yijing Hexagram Symbols" Unicode character block.
1627
* @since 1.5
1628
*/
1629
public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1630
new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1631
"YIJING HEXAGRAM SYMBOLS",
1632
"YIJINGHEXAGRAMSYMBOLS");
1633
1634
/**
1635
* Constant for the "Variation Selectors" Unicode character block.
1636
* @since 1.5
1637
*/
1638
public static final UnicodeBlock VARIATION_SELECTORS =
1639
new UnicodeBlock("VARIATION_SELECTORS",
1640
"VARIATION SELECTORS",
1641
"VARIATIONSELECTORS");
1642
1643
/**
1644
* Constant for the "Linear B Syllabary" Unicode character block.
1645
* @since 1.5
1646
*/
1647
public static final UnicodeBlock LINEAR_B_SYLLABARY =
1648
new UnicodeBlock("LINEAR_B_SYLLABARY",
1649
"LINEAR B SYLLABARY",
1650
"LINEARBSYLLABARY");
1651
1652
/**
1653
* Constant for the "Linear B Ideograms" Unicode character block.
1654
* @since 1.5
1655
*/
1656
public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1657
new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1658
"LINEAR B IDEOGRAMS",
1659
"LINEARBIDEOGRAMS");
1660
1661
/**
1662
* Constant for the "Aegean Numbers" Unicode character block.
1663
* @since 1.5
1664
*/
1665
public static final UnicodeBlock AEGEAN_NUMBERS =
1666
new UnicodeBlock("AEGEAN_NUMBERS",
1667
"AEGEAN NUMBERS",
1668
"AEGEANNUMBERS");
1669
1670
/**
1671
* Constant for the "Old Italic" Unicode character block.
1672
* @since 1.5
1673
*/
1674
public static final UnicodeBlock OLD_ITALIC =
1675
new UnicodeBlock("OLD_ITALIC",
1676
"OLD ITALIC",
1677
"OLDITALIC");
1678
1679
/**
1680
* Constant for the "Gothic" Unicode character block.
1681
* @since 1.5
1682
*/
1683
public static final UnicodeBlock GOTHIC =
1684
new UnicodeBlock("GOTHIC");
1685
1686
/**
1687
* Constant for the "Ugaritic" Unicode character block.
1688
* @since 1.5
1689
*/
1690
public static final UnicodeBlock UGARITIC =
1691
new UnicodeBlock("UGARITIC");
1692
1693
/**
1694
* Constant for the "Deseret" Unicode character block.
1695
* @since 1.5
1696
*/
1697
public static final UnicodeBlock DESERET =
1698
new UnicodeBlock("DESERET");
1699
1700
/**
1701
* Constant for the "Shavian" Unicode character block.
1702
* @since 1.5
1703
*/
1704
public static final UnicodeBlock SHAVIAN =
1705
new UnicodeBlock("SHAVIAN");
1706
1707
/**
1708
* Constant for the "Osmanya" Unicode character block.
1709
* @since 1.5
1710
*/
1711
public static final UnicodeBlock OSMANYA =
1712
new UnicodeBlock("OSMANYA");
1713
1714
/**
1715
* Constant for the "Cypriot Syllabary" Unicode character block.
1716
* @since 1.5
1717
*/
1718
public static final UnicodeBlock CYPRIOT_SYLLABARY =
1719
new UnicodeBlock("CYPRIOT_SYLLABARY",
1720
"CYPRIOT SYLLABARY",
1721
"CYPRIOTSYLLABARY");
1722
1723
/**
1724
* Constant for the "Byzantine Musical Symbols" Unicode character block.
1725
* @since 1.5
1726
*/
1727
public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1728
new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1729
"BYZANTINE MUSICAL SYMBOLS",
1730
"BYZANTINEMUSICALSYMBOLS");
1731
1732
/**
1733
* Constant for the "Musical Symbols" Unicode character block.
1734
* @since 1.5
1735
*/
1736
public static final UnicodeBlock MUSICAL_SYMBOLS =
1737
new UnicodeBlock("MUSICAL_SYMBOLS",
1738
"MUSICAL SYMBOLS",
1739
"MUSICALSYMBOLS");
1740
1741
/**
1742
* Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1743
* @since 1.5
1744
*/
1745
public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1746
new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1747
"TAI XUAN JING SYMBOLS",
1748
"TAIXUANJINGSYMBOLS");
1749
1750
/**
1751
* Constant for the "Mathematical Alphanumeric Symbols" Unicode
1752
* character block.
1753
* @since 1.5
1754
*/
1755
public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1756
new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1757
"MATHEMATICAL ALPHANUMERIC SYMBOLS",
1758
"MATHEMATICALALPHANUMERICSYMBOLS");
1759
1760
/**
1761
* Constant for the "CJK Unified Ideographs Extension B" Unicode
1762
* character block.
1763
* @since 1.5
1764
*/
1765
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1766
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1767
"CJK UNIFIED IDEOGRAPHS EXTENSION B",
1768
"CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1769
1770
/**
1771
* Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1772
* @since 1.5
1773
*/
1774
public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1775
new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1776
"CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1777
"CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1778
1779
/**
1780
* Constant for the "Tags" Unicode character block.
1781
* @since 1.5
1782
*/
1783
public static final UnicodeBlock TAGS =
1784
new UnicodeBlock("TAGS");
1785
1786
/**
1787
* Constant for the "Variation Selectors Supplement" Unicode character
1788
* block.
1789
* @since 1.5
1790
*/
1791
public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1792
new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1793
"VARIATION SELECTORS SUPPLEMENT",
1794
"VARIATIONSELECTORSSUPPLEMENT");
1795
1796
/**
1797
* Constant for the "Supplementary Private Use Area-A" Unicode character
1798
* block.
1799
* @since 1.5
1800
*/
1801
public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1802
new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1803
"SUPPLEMENTARY PRIVATE USE AREA-A",
1804
"SUPPLEMENTARYPRIVATEUSEAREA-A");
1805
1806
/**
1807
* Constant for the "Supplementary Private Use Area-B" Unicode character
1808
* block.
1809
* @since 1.5
1810
*/
1811
public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1812
new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1813
"SUPPLEMENTARY PRIVATE USE AREA-B",
1814
"SUPPLEMENTARYPRIVATEUSEAREA-B");
1815
1816
/**
1817
* Constant for the "High Surrogates" Unicode character block.
1818
* This block represents codepoint values in the high surrogate
1819
* range: U+D800 through U+DB7F
1820
*
1821
* @since 1.5
1822
*/
1823
public static final UnicodeBlock HIGH_SURROGATES =
1824
new UnicodeBlock("HIGH_SURROGATES",
1825
"HIGH SURROGATES",
1826
"HIGHSURROGATES");
1827
1828
/**
1829
* Constant for the "High Private Use Surrogates" Unicode character
1830
* block.
1831
* This block represents codepoint values in the private use high
1832
* surrogate range: U+DB80 through U+DBFF
1833
*
1834
* @since 1.5
1835
*/
1836
public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1837
new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1838
"HIGH PRIVATE USE SURROGATES",
1839
"HIGHPRIVATEUSESURROGATES");
1840
1841
/**
1842
* Constant for the "Low Surrogates" Unicode character block.
1843
* This block represents codepoint values in the low surrogate
1844
* range: U+DC00 through U+DFFF
1845
*
1846
* @since 1.5
1847
*/
1848
public static final UnicodeBlock LOW_SURROGATES =
1849
new UnicodeBlock("LOW_SURROGATES",
1850
"LOW SURROGATES",
1851
"LOWSURROGATES");
1852
1853
/**
1854
* Constant for the "Arabic Supplement" Unicode character block.
1855
* @since 1.7
1856
*/
1857
public static final UnicodeBlock ARABIC_SUPPLEMENT =
1858
new UnicodeBlock("ARABIC_SUPPLEMENT",
1859
"ARABIC SUPPLEMENT",
1860
"ARABICSUPPLEMENT");
1861
1862
/**
1863
* Constant for the "NKo" Unicode character block.
1864
* @since 1.7
1865
*/
1866
public static final UnicodeBlock NKO =
1867
new UnicodeBlock("NKO");
1868
1869
/**
1870
* Constant for the "Samaritan" Unicode character block.
1871
* @since 1.7
1872
*/
1873
public static final UnicodeBlock SAMARITAN =
1874
new UnicodeBlock("SAMARITAN");
1875
1876
/**
1877
* Constant for the "Mandaic" Unicode character block.
1878
* @since 1.7
1879
*/
1880
public static final UnicodeBlock MANDAIC =
1881
new UnicodeBlock("MANDAIC");
1882
1883
/**
1884
* Constant for the "Ethiopic Supplement" Unicode character block.
1885
* @since 1.7
1886
*/
1887
public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1888
new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1889
"ETHIOPIC SUPPLEMENT",
1890
"ETHIOPICSUPPLEMENT");
1891
1892
/**
1893
* Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1894
* Unicode character block.
1895
* @since 1.7
1896
*/
1897
public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1898
new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1899
"UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1900
"UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1901
1902
/**
1903
* Constant for the "New Tai Lue" Unicode character block.
1904
* @since 1.7
1905
*/
1906
public static final UnicodeBlock NEW_TAI_LUE =
1907
new UnicodeBlock("NEW_TAI_LUE",
1908
"NEW TAI LUE",
1909
"NEWTAILUE");
1910
1911
/**
1912
* Constant for the "Buginese" Unicode character block.
1913
* @since 1.7
1914
*/
1915
public static final UnicodeBlock BUGINESE =
1916
new UnicodeBlock("BUGINESE");
1917
1918
/**
1919
* Constant for the "Tai Tham" Unicode character block.
1920
* @since 1.7
1921
*/
1922
public static final UnicodeBlock TAI_THAM =
1923
new UnicodeBlock("TAI_THAM",
1924
"TAI THAM",
1925
"TAITHAM");
1926
1927
/**
1928
* Constant for the "Balinese" Unicode character block.
1929
* @since 1.7
1930
*/
1931
public static final UnicodeBlock BALINESE =
1932
new UnicodeBlock("BALINESE");
1933
1934
/**
1935
* Constant for the "Sundanese" Unicode character block.
1936
* @since 1.7
1937
*/
1938
public static final UnicodeBlock SUNDANESE =
1939
new UnicodeBlock("SUNDANESE");
1940
1941
/**
1942
* Constant for the "Batak" Unicode character block.
1943
* @since 1.7
1944
*/
1945
public static final UnicodeBlock BATAK =
1946
new UnicodeBlock("BATAK");
1947
1948
/**
1949
* Constant for the "Lepcha" Unicode character block.
1950
* @since 1.7
1951
*/
1952
public static final UnicodeBlock LEPCHA =
1953
new UnicodeBlock("LEPCHA");
1954
1955
/**
1956
* Constant for the "Ol Chiki" Unicode character block.
1957
* @since 1.7
1958
*/
1959
public static final UnicodeBlock OL_CHIKI =
1960
new UnicodeBlock("OL_CHIKI",
1961
"OL CHIKI",
1962
"OLCHIKI");
1963
1964
/**
1965
* Constant for the "Vedic Extensions" Unicode character block.
1966
* @since 1.7
1967
*/
1968
public static final UnicodeBlock VEDIC_EXTENSIONS =
1969
new UnicodeBlock("VEDIC_EXTENSIONS",
1970
"VEDIC EXTENSIONS",
1971
"VEDICEXTENSIONS");
1972
1973
/**
1974
* Constant for the "Phonetic Extensions Supplement" Unicode character
1975
* block.
1976
* @since 1.7
1977
*/
1978
public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1979
new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1980
"PHONETIC EXTENSIONS SUPPLEMENT",
1981
"PHONETICEXTENSIONSSUPPLEMENT");
1982
1983
/**
1984
* Constant for the "Combining Diacritical Marks Supplement" Unicode
1985
* character block.
1986
* @since 1.7
1987
*/
1988
public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1989
new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1990
"COMBINING DIACRITICAL MARKS SUPPLEMENT",
1991
"COMBININGDIACRITICALMARKSSUPPLEMENT");
1992
1993
/**
1994
* Constant for the "Glagolitic" Unicode character block.
1995
* @since 1.7
1996
*/
1997
public static final UnicodeBlock GLAGOLITIC =
1998
new UnicodeBlock("GLAGOLITIC");
1999
2000
/**
2001
* Constant for the "Latin Extended-C" Unicode character block.
2002
* @since 1.7
2003
*/
2004
public static final UnicodeBlock LATIN_EXTENDED_C =
2005
new UnicodeBlock("LATIN_EXTENDED_C",
2006
"LATIN EXTENDED-C",
2007
"LATINEXTENDED-C");
2008
2009
/**
2010
* Constant for the "Coptic" Unicode character block.
2011
* @since 1.7
2012
*/
2013
public static final UnicodeBlock COPTIC =
2014
new UnicodeBlock("COPTIC");
2015
2016
/**
2017
* Constant for the "Georgian Supplement" Unicode character block.
2018
* @since 1.7
2019
*/
2020
public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
2021
new UnicodeBlock("GEORGIAN_SUPPLEMENT",
2022
"GEORGIAN SUPPLEMENT",
2023
"GEORGIANSUPPLEMENT");
2024
2025
/**
2026
* Constant for the "Tifinagh" Unicode character block.
2027
* @since 1.7
2028
*/
2029
public static final UnicodeBlock TIFINAGH =
2030
new UnicodeBlock("TIFINAGH");
2031
2032
/**
2033
* Constant for the "Ethiopic Extended" Unicode character block.
2034
* @since 1.7
2035
*/
2036
public static final UnicodeBlock ETHIOPIC_EXTENDED =
2037
new UnicodeBlock("ETHIOPIC_EXTENDED",
2038
"ETHIOPIC EXTENDED",
2039
"ETHIOPICEXTENDED");
2040
2041
/**
2042
* Constant for the "Cyrillic Extended-A" Unicode character block.
2043
* @since 1.7
2044
*/
2045
public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2046
new UnicodeBlock("CYRILLIC_EXTENDED_A",
2047
"CYRILLIC EXTENDED-A",
2048
"CYRILLICEXTENDED-A");
2049
2050
/**
2051
* Constant for the "Supplemental Punctuation" Unicode character block.
2052
* @since 1.7
2053
*/
2054
public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2055
new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
2056
"SUPPLEMENTAL PUNCTUATION",
2057
"SUPPLEMENTALPUNCTUATION");
2058
2059
/**
2060
* Constant for the "CJK Strokes" Unicode character block.
2061
* @since 1.7
2062
*/
2063
public static final UnicodeBlock CJK_STROKES =
2064
new UnicodeBlock("CJK_STROKES",
2065
"CJK STROKES",
2066
"CJKSTROKES");
2067
2068
/**
2069
* Constant for the "Lisu" Unicode character block.
2070
* @since 1.7
2071
*/
2072
public static final UnicodeBlock LISU =
2073
new UnicodeBlock("LISU");
2074
2075
/**
2076
* Constant for the "Vai" Unicode character block.
2077
* @since 1.7
2078
*/
2079
public static final UnicodeBlock VAI =
2080
new UnicodeBlock("VAI");
2081
2082
/**
2083
* Constant for the "Cyrillic Extended-B" Unicode character block.
2084
* @since 1.7
2085
*/
2086
public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2087
new UnicodeBlock("CYRILLIC_EXTENDED_B",
2088
"CYRILLIC EXTENDED-B",
2089
"CYRILLICEXTENDED-B");
2090
2091
/**
2092
* Constant for the "Bamum" Unicode character block.
2093
* @since 1.7
2094
*/
2095
public static final UnicodeBlock BAMUM =
2096
new UnicodeBlock("BAMUM");
2097
2098
/**
2099
* Constant for the "Modifier Tone Letters" Unicode character block.
2100
* @since 1.7
2101
*/
2102
public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2103
new UnicodeBlock("MODIFIER_TONE_LETTERS",
2104
"MODIFIER TONE LETTERS",
2105
"MODIFIERTONELETTERS");
2106
2107
/**
2108
* Constant for the "Latin Extended-D" Unicode character block.
2109
* @since 1.7
2110
*/
2111
public static final UnicodeBlock LATIN_EXTENDED_D =
2112
new UnicodeBlock("LATIN_EXTENDED_D",
2113
"LATIN EXTENDED-D",
2114
"LATINEXTENDED-D");
2115
2116
/**
2117
* Constant for the "Syloti Nagri" Unicode character block.
2118
* @since 1.7
2119
*/
2120
public static final UnicodeBlock SYLOTI_NAGRI =
2121
new UnicodeBlock("SYLOTI_NAGRI",
2122
"SYLOTI NAGRI",
2123
"SYLOTINAGRI");
2124
2125
/**
2126
* Constant for the "Common Indic Number Forms" Unicode character block.
2127
* @since 1.7
2128
*/
2129
public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2130
new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2131
"COMMON INDIC NUMBER FORMS",
2132
"COMMONINDICNUMBERFORMS");
2133
2134
/**
2135
* Constant for the "Phags-pa" Unicode character block.
2136
* @since 1.7
2137
*/
2138
public static final UnicodeBlock PHAGS_PA =
2139
new UnicodeBlock("PHAGS_PA",
2140
"PHAGS-PA");
2141
2142
/**
2143
* Constant for the "Saurashtra" Unicode character block.
2144
* @since 1.7
2145
*/
2146
public static final UnicodeBlock SAURASHTRA =
2147
new UnicodeBlock("SAURASHTRA");
2148
2149
/**
2150
* Constant for the "Devanagari Extended" Unicode character block.
2151
* @since 1.7
2152
*/
2153
public static final UnicodeBlock DEVANAGARI_EXTENDED =
2154
new UnicodeBlock("DEVANAGARI_EXTENDED",
2155
"DEVANAGARI EXTENDED",
2156
"DEVANAGARIEXTENDED");
2157
2158
/**
2159
* Constant for the "Kayah Li" Unicode character block.
2160
* @since 1.7
2161
*/
2162
public static final UnicodeBlock KAYAH_LI =
2163
new UnicodeBlock("KAYAH_LI",
2164
"KAYAH LI",
2165
"KAYAHLI");
2166
2167
/**
2168
* Constant for the "Rejang" Unicode character block.
2169
* @since 1.7
2170
*/
2171
public static final UnicodeBlock REJANG =
2172
new UnicodeBlock("REJANG");
2173
2174
/**
2175
* Constant for the "Hangul Jamo Extended-A" Unicode character block.
2176
* @since 1.7
2177
*/
2178
public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2179
new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2180
"HANGUL JAMO EXTENDED-A",
2181
"HANGULJAMOEXTENDED-A");
2182
2183
/**
2184
* Constant for the "Javanese" Unicode character block.
2185
* @since 1.7
2186
*/
2187
public static final UnicodeBlock JAVANESE =
2188
new UnicodeBlock("JAVANESE");
2189
2190
/**
2191
* Constant for the "Cham" Unicode character block.
2192
* @since 1.7
2193
*/
2194
public static final UnicodeBlock CHAM =
2195
new UnicodeBlock("CHAM");
2196
2197
/**
2198
* Constant for the "Myanmar Extended-A" Unicode character block.
2199
* @since 1.7
2200
*/
2201
public static final UnicodeBlock MYANMAR_EXTENDED_A =
2202
new UnicodeBlock("MYANMAR_EXTENDED_A",
2203
"MYANMAR EXTENDED-A",
2204
"MYANMAREXTENDED-A");
2205
2206
/**
2207
* Constant for the "Tai Viet" Unicode character block.
2208
* @since 1.7
2209
*/
2210
public static final UnicodeBlock TAI_VIET =
2211
new UnicodeBlock("TAI_VIET",
2212
"TAI VIET",
2213
"TAIVIET");
2214
2215
/**
2216
* Constant for the "Ethiopic Extended-A" Unicode character block.
2217
* @since 1.7
2218
*/
2219
public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2220
new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2221
"ETHIOPIC EXTENDED-A",
2222
"ETHIOPICEXTENDED-A");
2223
2224
/**
2225
* Constant for the "Meetei Mayek" Unicode character block.
2226
* @since 1.7
2227
*/
2228
public static final UnicodeBlock MEETEI_MAYEK =
2229
new UnicodeBlock("MEETEI_MAYEK",
2230
"MEETEI MAYEK",
2231
"MEETEIMAYEK");
2232
2233
/**
2234
* Constant for the "Hangul Jamo Extended-B" Unicode character block.
2235
* @since 1.7
2236
*/
2237
public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2238
new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2239
"HANGUL JAMO EXTENDED-B",
2240
"HANGULJAMOEXTENDED-B");
2241
2242
/**
2243
* Constant for the "Vertical Forms" Unicode character block.
2244
* @since 1.7
2245
*/
2246
public static final UnicodeBlock VERTICAL_FORMS =
2247
new UnicodeBlock("VERTICAL_FORMS",
2248
"VERTICAL FORMS",
2249
"VERTICALFORMS");
2250
2251
/**
2252
* Constant for the "Ancient Greek Numbers" Unicode character block.
2253
* @since 1.7
2254
*/
2255
public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2256
new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2257
"ANCIENT GREEK NUMBERS",
2258
"ANCIENTGREEKNUMBERS");
2259
2260
/**
2261
* Constant for the "Ancient Symbols" Unicode character block.
2262
* @since 1.7
2263
*/
2264
public static final UnicodeBlock ANCIENT_SYMBOLS =
2265
new UnicodeBlock("ANCIENT_SYMBOLS",
2266
"ANCIENT SYMBOLS",
2267
"ANCIENTSYMBOLS");
2268
2269
/**
2270
* Constant for the "Phaistos Disc" Unicode character block.
2271
* @since 1.7
2272
*/
2273
public static final UnicodeBlock PHAISTOS_DISC =
2274
new UnicodeBlock("PHAISTOS_DISC",
2275
"PHAISTOS DISC",
2276
"PHAISTOSDISC");
2277
2278
/**
2279
* Constant for the "Lycian" Unicode character block.
2280
* @since 1.7
2281
*/
2282
public static final UnicodeBlock LYCIAN =
2283
new UnicodeBlock("LYCIAN");
2284
2285
/**
2286
* Constant for the "Carian" Unicode character block.
2287
* @since 1.7
2288
*/
2289
public static final UnicodeBlock CARIAN =
2290
new UnicodeBlock("CARIAN");
2291
2292
/**
2293
* Constant for the "Old Persian" Unicode character block.
2294
* @since 1.7
2295
*/
2296
public static final UnicodeBlock OLD_PERSIAN =
2297
new UnicodeBlock("OLD_PERSIAN",
2298
"OLD PERSIAN",
2299
"OLDPERSIAN");
2300
2301
/**
2302
* Constant for the "Imperial Aramaic" Unicode character block.
2303
* @since 1.7
2304
*/
2305
public static final UnicodeBlock IMPERIAL_ARAMAIC =
2306
new UnicodeBlock("IMPERIAL_ARAMAIC",
2307
"IMPERIAL ARAMAIC",
2308
"IMPERIALARAMAIC");
2309
2310
/**
2311
* Constant for the "Phoenician" Unicode character block.
2312
* @since 1.7
2313
*/
2314
public static final UnicodeBlock PHOENICIAN =
2315
new UnicodeBlock("PHOENICIAN");
2316
2317
/**
2318
* Constant for the "Lydian" Unicode character block.
2319
* @since 1.7
2320
*/
2321
public static final UnicodeBlock LYDIAN =
2322
new UnicodeBlock("LYDIAN");
2323
2324
/**
2325
* Constant for the "Kharoshthi" Unicode character block.
2326
* @since 1.7
2327
*/
2328
public static final UnicodeBlock KHAROSHTHI =
2329
new UnicodeBlock("KHAROSHTHI");
2330
2331
/**
2332
* Constant for the "Old South Arabian" Unicode character block.
2333
* @since 1.7
2334
*/
2335
public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2336
new UnicodeBlock("OLD_SOUTH_ARABIAN",
2337
"OLD SOUTH ARABIAN",
2338
"OLDSOUTHARABIAN");
2339
2340
/**
2341
* Constant for the "Avestan" Unicode character block.
2342
* @since 1.7
2343
*/
2344
public static final UnicodeBlock AVESTAN =
2345
new UnicodeBlock("AVESTAN");
2346
2347
/**
2348
* Constant for the "Inscriptional Parthian" Unicode character block.
2349
* @since 1.7
2350
*/
2351
public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2352
new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2353
"INSCRIPTIONAL PARTHIAN",
2354
"INSCRIPTIONALPARTHIAN");
2355
2356
/**
2357
* Constant for the "Inscriptional Pahlavi" Unicode character block.
2358
* @since 1.7
2359
*/
2360
public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2361
new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2362
"INSCRIPTIONAL PAHLAVI",
2363
"INSCRIPTIONALPAHLAVI");
2364
2365
/**
2366
* Constant for the "Old Turkic" Unicode character block.
2367
* @since 1.7
2368
*/
2369
public static final UnicodeBlock OLD_TURKIC =
2370
new UnicodeBlock("OLD_TURKIC",
2371
"OLD TURKIC",
2372
"OLDTURKIC");
2373
2374
/**
2375
* Constant for the "Rumi Numeral Symbols" Unicode character block.
2376
* @since 1.7
2377
*/
2378
public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2379
new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2380
"RUMI NUMERAL SYMBOLS",
2381
"RUMINUMERALSYMBOLS");
2382
2383
/**
2384
* Constant for the "Brahmi" Unicode character block.
2385
* @since 1.7
2386
*/
2387
public static final UnicodeBlock BRAHMI =
2388
new UnicodeBlock("BRAHMI");
2389
2390
/**
2391
* Constant for the "Kaithi" Unicode character block.
2392
* @since 1.7
2393
*/
2394
public static final UnicodeBlock KAITHI =
2395
new UnicodeBlock("KAITHI");
2396
2397
/**
2398
* Constant for the "Cuneiform" Unicode character block.
2399
* @since 1.7
2400
*/
2401
public static final UnicodeBlock CUNEIFORM =
2402
new UnicodeBlock("CUNEIFORM");
2403
2404
/**
2405
* Constant for the "Cuneiform Numbers and Punctuation" Unicode
2406
* character block.
2407
* @since 1.7
2408
*/
2409
public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2410
new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2411
"CUNEIFORM NUMBERS AND PUNCTUATION",
2412
"CUNEIFORMNUMBERSANDPUNCTUATION");
2413
2414
/**
2415
* Constant for the "Egyptian Hieroglyphs" Unicode character block.
2416
* @since 1.7
2417
*/
2418
public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2419
new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2420
"EGYPTIAN HIEROGLYPHS",
2421
"EGYPTIANHIEROGLYPHS");
2422
2423
/**
2424
* Constant for the "Bamum Supplement" Unicode character block.
2425
* @since 1.7
2426
*/
2427
public static final UnicodeBlock BAMUM_SUPPLEMENT =
2428
new UnicodeBlock("BAMUM_SUPPLEMENT",
2429
"BAMUM SUPPLEMENT",
2430
"BAMUMSUPPLEMENT");
2431
2432
/**
2433
* Constant for the "Kana Supplement" Unicode character block.
2434
* @since 1.7
2435
*/
2436
public static final UnicodeBlock KANA_SUPPLEMENT =
2437
new UnicodeBlock("KANA_SUPPLEMENT",
2438
"KANA SUPPLEMENT",
2439
"KANASUPPLEMENT");
2440
2441
/**
2442
* Constant for the "Ancient Greek Musical Notation" Unicode character
2443
* block.
2444
* @since 1.7
2445
*/
2446
public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2447
new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2448
"ANCIENT GREEK MUSICAL NOTATION",
2449
"ANCIENTGREEKMUSICALNOTATION");
2450
2451
/**
2452
* Constant for the "Counting Rod Numerals" Unicode character block.
2453
* @since 1.7
2454
*/
2455
public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2456
new UnicodeBlock("COUNTING_ROD_NUMERALS",
2457
"COUNTING ROD NUMERALS",
2458
"COUNTINGRODNUMERALS");
2459
2460
/**
2461
* Constant for the "Mahjong Tiles" Unicode character block.
2462
* @since 1.7
2463
*/
2464
public static final UnicodeBlock MAHJONG_TILES =
2465
new UnicodeBlock("MAHJONG_TILES",
2466
"MAHJONG TILES",
2467
"MAHJONGTILES");
2468
2469
/**
2470
* Constant for the "Domino Tiles" Unicode character block.
2471
* @since 1.7
2472
*/
2473
public static final UnicodeBlock DOMINO_TILES =
2474
new UnicodeBlock("DOMINO_TILES",
2475
"DOMINO TILES",
2476
"DOMINOTILES");
2477
2478
/**
2479
* Constant for the "Playing Cards" Unicode character block.
2480
* @since 1.7
2481
*/
2482
public static final UnicodeBlock PLAYING_CARDS =
2483
new UnicodeBlock("PLAYING_CARDS",
2484
"PLAYING CARDS",
2485
"PLAYINGCARDS");
2486
2487
/**
2488
* Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2489
* block.
2490
* @since 1.7
2491
*/
2492
public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2493
new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2494
"ENCLOSED ALPHANUMERIC SUPPLEMENT",
2495
"ENCLOSEDALPHANUMERICSUPPLEMENT");
2496
2497
/**
2498
* Constant for the "Enclosed Ideographic Supplement" Unicode character
2499
* block.
2500
* @since 1.7
2501
*/
2502
public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2503
new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2504
"ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2505
"ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2506
2507
/**
2508
* Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2509
* character block.
2510
* @since 1.7
2511
*/
2512
public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2513
new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2514
"MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2515
"MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2516
2517
/**
2518
* Constant for the "Emoticons" Unicode character block.
2519
* @since 1.7
2520
*/
2521
public static final UnicodeBlock EMOTICONS =
2522
new UnicodeBlock("EMOTICONS");
2523
2524
/**
2525
* Constant for the "Transport And Map Symbols" Unicode character block.
2526
* @since 1.7
2527
*/
2528
public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2529
new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2530
"TRANSPORT AND MAP SYMBOLS",
2531
"TRANSPORTANDMAPSYMBOLS");
2532
2533
/**
2534
* Constant for the "Alchemical Symbols" Unicode character block.
2535
* @since 1.7
2536
*/
2537
public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2538
new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2539
"ALCHEMICAL SYMBOLS",
2540
"ALCHEMICALSYMBOLS");
2541
2542
/**
2543
* Constant for the "CJK Unified Ideographs Extension C" Unicode
2544
* character block.
2545
* @since 1.7
2546
*/
2547
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2548
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2549
"CJK UNIFIED IDEOGRAPHS EXTENSION C",
2550
"CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2551
2552
/**
2553
* Constant for the "CJK Unified Ideographs Extension D" Unicode
2554
* character block.
2555
* @since 1.7
2556
*/
2557
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2558
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2559
"CJK UNIFIED IDEOGRAPHS EXTENSION D",
2560
"CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2561
2562
/**
2563
* Constant for the "Arabic Extended-A" Unicode character block.
2564
* @since 1.8
2565
*/
2566
public static final UnicodeBlock ARABIC_EXTENDED_A =
2567
new UnicodeBlock("ARABIC_EXTENDED_A",
2568
"ARABIC EXTENDED-A",
2569
"ARABICEXTENDED-A");
2570
2571
/**
2572
* Constant for the "Sundanese Supplement" Unicode character block.
2573
* @since 1.8
2574
*/
2575
public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2576
new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2577
"SUNDANESE SUPPLEMENT",
2578
"SUNDANESESUPPLEMENT");
2579
2580
/**
2581
* Constant for the "Meetei Mayek Extensions" Unicode character block.
2582
* @since 1.8
2583
*/
2584
public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2585
new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2586
"MEETEI MAYEK EXTENSIONS",
2587
"MEETEIMAYEKEXTENSIONS");
2588
2589
/**
2590
* Constant for the "Meroitic Hieroglyphs" Unicode character block.
2591
* @since 1.8
2592
*/
2593
public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2594
new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2595
"MEROITIC HIEROGLYPHS",
2596
"MEROITICHIEROGLYPHS");
2597
2598
/**
2599
* Constant for the "Meroitic Cursive" Unicode character block.
2600
* @since 1.8
2601
*/
2602
public static final UnicodeBlock MEROITIC_CURSIVE =
2603
new UnicodeBlock("MEROITIC_CURSIVE",
2604
"MEROITIC CURSIVE",
2605
"MEROITICCURSIVE");
2606
2607
/**
2608
* Constant for the "Sora Sompeng" Unicode character block.
2609
* @since 1.8
2610
*/
2611
public static final UnicodeBlock SORA_SOMPENG =
2612
new UnicodeBlock("SORA_SOMPENG",
2613
"SORA SOMPENG",
2614
"SORASOMPENG");
2615
2616
/**
2617
* Constant for the "Chakma" Unicode character block.
2618
* @since 1.8
2619
*/
2620
public static final UnicodeBlock CHAKMA =
2621
new UnicodeBlock("CHAKMA");
2622
2623
/**
2624
* Constant for the "Sharada" Unicode character block.
2625
* @since 1.8
2626
*/
2627
public static final UnicodeBlock SHARADA =
2628
new UnicodeBlock("SHARADA");
2629
2630
/**
2631
* Constant for the "Takri" Unicode character block.
2632
* @since 1.8
2633
*/
2634
public static final UnicodeBlock TAKRI =
2635
new UnicodeBlock("TAKRI");
2636
2637
/**
2638
* Constant for the "Miao" Unicode character block.
2639
* @since 1.8
2640
*/
2641
public static final UnicodeBlock MIAO =
2642
new UnicodeBlock("MIAO");
2643
2644
/**
2645
* Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2646
* character block.
2647
* @since 1.8
2648
*/
2649
public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2650
new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2651
"ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2652
"ARABICMATHEMATICALALPHABETICSYMBOLS");
2653
2654
/**
2655
* Constant for the "Combining Diacritical Marks Extended" Unicode
2656
* character block.
2657
* @since 9
2658
*/
2659
public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2660
new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2661
"COMBINING DIACRITICAL MARKS EXTENDED",
2662
"COMBININGDIACRITICALMARKSEXTENDED");
2663
2664
/**
2665
* Constant for the "Myanmar Extended-B" Unicode character block.
2666
* @since 9
2667
*/
2668
public static final UnicodeBlock MYANMAR_EXTENDED_B =
2669
new UnicodeBlock("MYANMAR_EXTENDED_B",
2670
"MYANMAR EXTENDED-B",
2671
"MYANMAREXTENDED-B");
2672
2673
/**
2674
* Constant for the "Latin Extended-E" Unicode character block.
2675
* @since 9
2676
*/
2677
public static final UnicodeBlock LATIN_EXTENDED_E =
2678
new UnicodeBlock("LATIN_EXTENDED_E",
2679
"LATIN EXTENDED-E",
2680
"LATINEXTENDED-E");
2681
2682
/**
2683
* Constant for the "Coptic Epact Numbers" Unicode character block.
2684
* @since 9
2685
*/
2686
public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2687
new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2688
"COPTIC EPACT NUMBERS",
2689
"COPTICEPACTNUMBERS");
2690
2691
/**
2692
* Constant for the "Old Permic" Unicode character block.
2693
* @since 9
2694
*/
2695
public static final UnicodeBlock OLD_PERMIC =
2696
new UnicodeBlock("OLD_PERMIC",
2697
"OLD PERMIC",
2698
"OLDPERMIC");
2699
2700
/**
2701
* Constant for the "Elbasan" Unicode character block.
2702
* @since 9
2703
*/
2704
public static final UnicodeBlock ELBASAN =
2705
new UnicodeBlock("ELBASAN");
2706
2707
/**
2708
* Constant for the "Caucasian Albanian" Unicode character block.
2709
* @since 9
2710
*/
2711
public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2712
new UnicodeBlock("CAUCASIAN_ALBANIAN",
2713
"CAUCASIAN ALBANIAN",
2714
"CAUCASIANALBANIAN");
2715
2716
/**
2717
* Constant for the "Linear A" Unicode character block.
2718
* @since 9
2719
*/
2720
public static final UnicodeBlock LINEAR_A =
2721
new UnicodeBlock("LINEAR_A",
2722
"LINEAR A",
2723
"LINEARA");
2724
2725
/**
2726
* Constant for the "Palmyrene" Unicode character block.
2727
* @since 9
2728
*/
2729
public static final UnicodeBlock PALMYRENE =
2730
new UnicodeBlock("PALMYRENE");
2731
2732
/**
2733
* Constant for the "Nabataean" Unicode character block.
2734
* @since 9
2735
*/
2736
public static final UnicodeBlock NABATAEAN =
2737
new UnicodeBlock("NABATAEAN");
2738
2739
/**
2740
* Constant for the "Old North Arabian" Unicode character block.
2741
* @since 9
2742
*/
2743
public static final UnicodeBlock OLD_NORTH_ARABIAN =
2744
new UnicodeBlock("OLD_NORTH_ARABIAN",
2745
"OLD NORTH ARABIAN",
2746
"OLDNORTHARABIAN");
2747
2748
/**
2749
* Constant for the "Manichaean" Unicode character block.
2750
* @since 9
2751
*/
2752
public static final UnicodeBlock MANICHAEAN =
2753
new UnicodeBlock("MANICHAEAN");
2754
2755
/**
2756
* Constant for the "Psalter Pahlavi" Unicode character block.
2757
* @since 9
2758
*/
2759
public static final UnicodeBlock PSALTER_PAHLAVI =
2760
new UnicodeBlock("PSALTER_PAHLAVI",
2761
"PSALTER PAHLAVI",
2762
"PSALTERPAHLAVI");
2763
2764
/**
2765
* Constant for the "Mahajani" Unicode character block.
2766
* @since 9
2767
*/
2768
public static final UnicodeBlock MAHAJANI =
2769
new UnicodeBlock("MAHAJANI");
2770
2771
/**
2772
* Constant for the "Sinhala Archaic Numbers" Unicode character block.
2773
* @since 9
2774
*/
2775
public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2776
new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2777
"SINHALA ARCHAIC NUMBERS",
2778
"SINHALAARCHAICNUMBERS");
2779
2780
/**
2781
* Constant for the "Khojki" Unicode character block.
2782
* @since 9
2783
*/
2784
public static final UnicodeBlock KHOJKI =
2785
new UnicodeBlock("KHOJKI");
2786
2787
/**
2788
* Constant for the "Khudawadi" Unicode character block.
2789
* @since 9
2790
*/
2791
public static final UnicodeBlock KHUDAWADI =
2792
new UnicodeBlock("KHUDAWADI");
2793
2794
/**
2795
* Constant for the "Grantha" Unicode character block.
2796
* @since 9
2797
*/
2798
public static final UnicodeBlock GRANTHA =
2799
new UnicodeBlock("GRANTHA");
2800
2801
/**
2802
* Constant for the "Tirhuta" Unicode character block.
2803
* @since 9
2804
*/
2805
public static final UnicodeBlock TIRHUTA =
2806
new UnicodeBlock("TIRHUTA");
2807
2808
/**
2809
* Constant for the "Siddham" Unicode character block.
2810
* @since 9
2811
*/
2812
public static final UnicodeBlock SIDDHAM =
2813
new UnicodeBlock("SIDDHAM");
2814
2815
/**
2816
* Constant for the "Modi" Unicode character block.
2817
* @since 9
2818
*/
2819
public static final UnicodeBlock MODI =
2820
new UnicodeBlock("MODI");
2821
2822
/**
2823
* Constant for the "Warang Citi" Unicode character block.
2824
* @since 9
2825
*/
2826
public static final UnicodeBlock WARANG_CITI =
2827
new UnicodeBlock("WARANG_CITI",
2828
"WARANG CITI",
2829
"WARANGCITI");
2830
2831
/**
2832
* Constant for the "Pau Cin Hau" Unicode character block.
2833
* @since 9
2834
*/
2835
public static final UnicodeBlock PAU_CIN_HAU =
2836
new UnicodeBlock("PAU_CIN_HAU",
2837
"PAU CIN HAU",
2838
"PAUCINHAU");
2839
2840
/**
2841
* Constant for the "Mro" Unicode character block.
2842
* @since 9
2843
*/
2844
public static final UnicodeBlock MRO =
2845
new UnicodeBlock("MRO");
2846
2847
/**
2848
* Constant for the "Bassa Vah" Unicode character block.
2849
* @since 9
2850
*/
2851
public static final UnicodeBlock BASSA_VAH =
2852
new UnicodeBlock("BASSA_VAH",
2853
"BASSA VAH",
2854
"BASSAVAH");
2855
2856
/**
2857
* Constant for the "Pahawh Hmong" Unicode character block.
2858
* @since 9
2859
*/
2860
public static final UnicodeBlock PAHAWH_HMONG =
2861
new UnicodeBlock("PAHAWH_HMONG",
2862
"PAHAWH HMONG",
2863
"PAHAWHHMONG");
2864
2865
/**
2866
* Constant for the "Duployan" Unicode character block.
2867
* @since 9
2868
*/
2869
public static final UnicodeBlock DUPLOYAN =
2870
new UnicodeBlock("DUPLOYAN");
2871
2872
/**
2873
* Constant for the "Shorthand Format Controls" Unicode character block.
2874
* @since 9
2875
*/
2876
public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2877
new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2878
"SHORTHAND FORMAT CONTROLS",
2879
"SHORTHANDFORMATCONTROLS");
2880
2881
/**
2882
* Constant for the "Mende Kikakui" Unicode character block.
2883
* @since 9
2884
*/
2885
public static final UnicodeBlock MENDE_KIKAKUI =
2886
new UnicodeBlock("MENDE_KIKAKUI",
2887
"MENDE KIKAKUI",
2888
"MENDEKIKAKUI");
2889
2890
/**
2891
* Constant for the "Ornamental Dingbats" Unicode character block.
2892
* @since 9
2893
*/
2894
public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2895
new UnicodeBlock("ORNAMENTAL_DINGBATS",
2896
"ORNAMENTAL DINGBATS",
2897
"ORNAMENTALDINGBATS");
2898
2899
/**
2900
* Constant for the "Geometric Shapes Extended" Unicode character block.
2901
* @since 9
2902
*/
2903
public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2904
new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2905
"GEOMETRIC SHAPES EXTENDED",
2906
"GEOMETRICSHAPESEXTENDED");
2907
2908
/**
2909
* Constant for the "Supplemental Arrows-C" Unicode character block.
2910
* @since 9
2911
*/
2912
public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2913
new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2914
"SUPPLEMENTAL ARROWS-C",
2915
"SUPPLEMENTALARROWS-C");
2916
2917
/**
2918
* Constant for the "Cherokee Supplement" Unicode character block.
2919
* @since 9
2920
*/
2921
public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2922
new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2923
"CHEROKEE SUPPLEMENT",
2924
"CHEROKEESUPPLEMENT");
2925
2926
/**
2927
* Constant for the "Hatran" Unicode character block.
2928
* @since 9
2929
*/
2930
public static final UnicodeBlock HATRAN =
2931
new UnicodeBlock("HATRAN");
2932
2933
/**
2934
* Constant for the "Old Hungarian" Unicode character block.
2935
* @since 9
2936
*/
2937
public static final UnicodeBlock OLD_HUNGARIAN =
2938
new UnicodeBlock("OLD_HUNGARIAN",
2939
"OLD HUNGARIAN",
2940
"OLDHUNGARIAN");
2941
2942
/**
2943
* Constant for the "Multani" Unicode character block.
2944
* @since 9
2945
*/
2946
public static final UnicodeBlock MULTANI =
2947
new UnicodeBlock("MULTANI");
2948
2949
/**
2950
* Constant for the "Ahom" Unicode character block.
2951
* @since 9
2952
*/
2953
public static final UnicodeBlock AHOM =
2954
new UnicodeBlock("AHOM");
2955
2956
/**
2957
* Constant for the "Early Dynastic Cuneiform" Unicode character block.
2958
* @since 9
2959
*/
2960
public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2961
new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
2962
"EARLY DYNASTIC CUNEIFORM",
2963
"EARLYDYNASTICCUNEIFORM");
2964
2965
/**
2966
* Constant for the "Anatolian Hieroglyphs" Unicode character block.
2967
* @since 9
2968
*/
2969
public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2970
new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
2971
"ANATOLIAN HIEROGLYPHS",
2972
"ANATOLIANHIEROGLYPHS");
2973
2974
/**
2975
* Constant for the "Sutton SignWriting" Unicode character block.
2976
* @since 9
2977
*/
2978
public static final UnicodeBlock SUTTON_SIGNWRITING =
2979
new UnicodeBlock("SUTTON_SIGNWRITING",
2980
"SUTTON SIGNWRITING",
2981
"SUTTONSIGNWRITING");
2982
2983
/**
2984
* Constant for the "Supplemental Symbols and Pictographs" Unicode
2985
* character block.
2986
* @since 9
2987
*/
2988
public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2989
new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2990
"SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
2991
"SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
2992
2993
/**
2994
* Constant for the "CJK Unified Ideographs Extension E" Unicode
2995
* character block.
2996
* @since 9
2997
*/
2998
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2999
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
3000
"CJK UNIFIED IDEOGRAPHS EXTENSION E",
3001
"CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
3002
3003
/**
3004
* Constant for the "Syriac Supplement" Unicode
3005
* character block.
3006
* @since 11
3007
*/
3008
public static final UnicodeBlock SYRIAC_SUPPLEMENT =
3009
new UnicodeBlock("SYRIAC_SUPPLEMENT",
3010
"SYRIAC SUPPLEMENT",
3011
"SYRIACSUPPLEMENT");
3012
3013
/**
3014
* Constant for the "Cyrillic Extended-C" Unicode
3015
* character block.
3016
* @since 11
3017
*/
3018
public static final UnicodeBlock CYRILLIC_EXTENDED_C =
3019
new UnicodeBlock("CYRILLIC_EXTENDED_C",
3020
"CYRILLIC EXTENDED-C",
3021
"CYRILLICEXTENDED-C");
3022
3023
/**
3024
* Constant for the "Osage" Unicode
3025
* character block.
3026
* @since 11
3027
*/
3028
public static final UnicodeBlock OSAGE =
3029
new UnicodeBlock("OSAGE");
3030
3031
/**
3032
* Constant for the "Newa" Unicode
3033
* character block.
3034
* @since 11
3035
*/
3036
public static final UnicodeBlock NEWA =
3037
new UnicodeBlock("NEWA");
3038
3039
/**
3040
* Constant for the "Mongolian Supplement" Unicode
3041
* character block.
3042
* @since 11
3043
*/
3044
public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
3045
new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
3046
"MONGOLIAN SUPPLEMENT",
3047
"MONGOLIANSUPPLEMENT");
3048
3049
/**
3050
* Constant for the "Marchen" Unicode
3051
* character block.
3052
* @since 11
3053
*/
3054
public static final UnicodeBlock MARCHEN =
3055
new UnicodeBlock("MARCHEN");
3056
3057
/**
3058
* Constant for the "Ideographic Symbols and Punctuation" Unicode
3059
* character block.
3060
* @since 11
3061
*/
3062
public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
3063
new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3064
"IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3065
"IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3066
3067
/**
3068
* Constant for the "Tangut" Unicode
3069
* character block.
3070
* @since 11
3071
*/
3072
public static final UnicodeBlock TANGUT =
3073
new UnicodeBlock("TANGUT");
3074
3075
/**
3076
* Constant for the "Tangut Components" Unicode
3077
* character block.
3078
* @since 11
3079
*/
3080
public static final UnicodeBlock TANGUT_COMPONENTS =
3081
new UnicodeBlock("TANGUT_COMPONENTS",
3082
"TANGUT COMPONENTS",
3083
"TANGUTCOMPONENTS");
3084
3085
/**
3086
* Constant for the "Kana Extended-A" Unicode
3087
* character block.
3088
* @since 11
3089
*/
3090
public static final UnicodeBlock KANA_EXTENDED_A =
3091
new UnicodeBlock("KANA_EXTENDED_A",
3092
"KANA EXTENDED-A",
3093
"KANAEXTENDED-A");
3094
/**
3095
* Constant for the "Glagolitic Supplement" Unicode
3096
* character block.
3097
* @since 11
3098
*/
3099
public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3100
new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3101
"GLAGOLITIC SUPPLEMENT",
3102
"GLAGOLITICSUPPLEMENT");
3103
/**
3104
* Constant for the "Adlam" Unicode
3105
* character block.
3106
* @since 11
3107
*/
3108
public static final UnicodeBlock ADLAM =
3109
new UnicodeBlock("ADLAM");
3110
3111
/**
3112
* Constant for the "Masaram Gondi" Unicode
3113
* character block.
3114
* @since 11
3115
*/
3116
public static final UnicodeBlock MASARAM_GONDI =
3117
new UnicodeBlock("MASARAM_GONDI",
3118
"MASARAM GONDI",
3119
"MASARAMGONDI");
3120
3121
/**
3122
* Constant for the "Zanabazar Square" Unicode
3123
* character block.
3124
* @since 11
3125
*/
3126
public static final UnicodeBlock ZANABAZAR_SQUARE =
3127
new UnicodeBlock("ZANABAZAR_SQUARE",
3128
"ZANABAZAR SQUARE",
3129
"ZANABAZARSQUARE");
3130
3131
/**
3132
* Constant for the "Nushu" Unicode
3133
* character block.
3134
* @since 11
3135
*/
3136
public static final UnicodeBlock NUSHU =
3137
new UnicodeBlock("NUSHU");
3138
3139
/**
3140
* Constant for the "Soyombo" Unicode
3141
* character block.
3142
* @since 11
3143
*/
3144
public static final UnicodeBlock SOYOMBO =
3145
new UnicodeBlock("SOYOMBO");
3146
3147
/**
3148
* Constant for the "Bhaiksuki" Unicode
3149
* character block.
3150
* @since 11
3151
*/
3152
public static final UnicodeBlock BHAIKSUKI =
3153
new UnicodeBlock("BHAIKSUKI");
3154
3155
/**
3156
* Constant for the "CJK Unified Ideographs Extension F" Unicode
3157
* character block.
3158
* @since 11
3159
*/
3160
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3161
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3162
"CJK UNIFIED IDEOGRAPHS EXTENSION F",
3163
"CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3164
/**
3165
* Constant for the "Georgian Extended" Unicode
3166
* character block.
3167
* @since 12
3168
*/
3169
public static final UnicodeBlock GEORGIAN_EXTENDED =
3170
new UnicodeBlock("GEORGIAN_EXTENDED",
3171
"GEORGIAN EXTENDED",
3172
"GEORGIANEXTENDED");
3173
3174
/**
3175
* Constant for the "Hanifi Rohingya" Unicode
3176
* character block.
3177
* @since 12
3178
*/
3179
public static final UnicodeBlock HANIFI_ROHINGYA =
3180
new UnicodeBlock("HANIFI_ROHINGYA",
3181
"HANIFI ROHINGYA",
3182
"HANIFIROHINGYA");
3183
3184
/**
3185
* Constant for the "Old Sogdian" Unicode
3186
* character block.
3187
* @since 12
3188
*/
3189
public static final UnicodeBlock OLD_SOGDIAN =
3190
new UnicodeBlock("OLD_SOGDIAN",
3191
"OLD SOGDIAN",
3192
"OLDSOGDIAN");
3193
3194
/**
3195
* Constant for the "Sogdian" Unicode
3196
* character block.
3197
* @since 12
3198
*/
3199
public static final UnicodeBlock SOGDIAN =
3200
new UnicodeBlock("SOGDIAN");
3201
3202
/**
3203
* Constant for the "Dogra" Unicode
3204
* character block.
3205
* @since 12
3206
*/
3207
public static final UnicodeBlock DOGRA =
3208
new UnicodeBlock("DOGRA");
3209
3210
/**
3211
* Constant for the "Gunjala Gondi" Unicode
3212
* character block.
3213
* @since 12
3214
*/
3215
public static final UnicodeBlock GUNJALA_GONDI =
3216
new UnicodeBlock("GUNJALA_GONDI",
3217
"GUNJALA GONDI",
3218
"GUNJALAGONDI");
3219
3220
/**
3221
* Constant for the "Makasar" Unicode
3222
* character block.
3223
* @since 12
3224
*/
3225
public static final UnicodeBlock MAKASAR =
3226
new UnicodeBlock("MAKASAR");
3227
3228
/**
3229
* Constant for the "Medefaidrin" Unicode
3230
* character block.
3231
* @since 12
3232
*/
3233
public static final UnicodeBlock MEDEFAIDRIN =
3234
new UnicodeBlock("MEDEFAIDRIN");
3235
3236
/**
3237
* Constant for the "Mayan Numerals" Unicode
3238
* character block.
3239
* @since 12
3240
*/
3241
public static final UnicodeBlock MAYAN_NUMERALS =
3242
new UnicodeBlock("MAYAN_NUMERALS",
3243
"MAYAN NUMERALS",
3244
"MAYANNUMERALS");
3245
3246
/**
3247
* Constant for the "Indic Siyaq Numbers" Unicode
3248
* character block.
3249
* @since 12
3250
*/
3251
public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
3252
new UnicodeBlock("INDIC_SIYAQ_NUMBERS",
3253
"INDIC SIYAQ NUMBERS",
3254
"INDICSIYAQNUMBERS");
3255
3256
/**
3257
* Constant for the "Chess Symbols" Unicode
3258
* character block.
3259
* @since 12
3260
*/
3261
public static final UnicodeBlock CHESS_SYMBOLS =
3262
new UnicodeBlock("CHESS_SYMBOLS",
3263
"CHESS SYMBOLS",
3264
"CHESSSYMBOLS");
3265
3266
/**
3267
* Constant for the "Elymaic" Unicode
3268
* character block.
3269
* @since 13
3270
*/
3271
public static final UnicodeBlock ELYMAIC =
3272
new UnicodeBlock("ELYMAIC");
3273
3274
/**
3275
* Constant for the "Nandinagari" Unicode
3276
* character block.
3277
* @since 13
3278
*/
3279
public static final UnicodeBlock NANDINAGARI =
3280
new UnicodeBlock("NANDINAGARI");
3281
3282
/**
3283
* Constant for the "Tamil Supplement" Unicode
3284
* character block.
3285
* @since 13
3286
*/
3287
public static final UnicodeBlock TAMIL_SUPPLEMENT =
3288
new UnicodeBlock("TAMIL_SUPPLEMENT",
3289
"TAMIL SUPPLEMENT",
3290
"TAMILSUPPLEMENT");
3291
3292
/**
3293
* Constant for the "Egyptian Hieroglyph Format Controls" Unicode
3294
* character block.
3295
* @since 13
3296
*/
3297
public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
3298
new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS",
3299
"EGYPTIAN HIEROGLYPH FORMAT CONTROLS",
3300
"EGYPTIANHIEROGLYPHFORMATCONTROLS");
3301
3302
/**
3303
* Constant for the "Small Kana Extension" Unicode
3304
* character block.
3305
* @since 13
3306
*/
3307
public static final UnicodeBlock SMALL_KANA_EXTENSION =
3308
new UnicodeBlock("SMALL_KANA_EXTENSION",
3309
"SMALL KANA EXTENSION",
3310
"SMALLKANAEXTENSION");
3311
3312
/**
3313
* Constant for the "Nyiakeng Puachue Hmong" Unicode
3314
* character block.
3315
* @since 13
3316
*/
3317
public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
3318
new UnicodeBlock("NYIAKENG_PUACHUE_HMONG",
3319
"NYIAKENG PUACHUE HMONG",
3320
"NYIAKENGPUACHUEHMONG");
3321
3322
/**
3323
* Constant for the "Wancho" Unicode
3324
* character block.
3325
* @since 13
3326
*/
3327
public static final UnicodeBlock WANCHO =
3328
new UnicodeBlock("WANCHO");
3329
3330
/**
3331
* Constant for the "Ottoman Siyaq Numbers" Unicode
3332
* character block.
3333
* @since 13
3334
*/
3335
public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
3336
new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS",
3337
"OTTOMAN SIYAQ NUMBERS",
3338
"OTTOMANSIYAQNUMBERS");
3339
3340
/**
3341
* Constant for the "Symbols and Pictographs Extended-A" Unicode
3342
* character block.
3343
* @since 13
3344
*/
3345
public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
3346
new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A",
3347
"SYMBOLS AND PICTOGRAPHS EXTENDED-A",
3348
"SYMBOLSANDPICTOGRAPHSEXTENDED-A");
3349
3350
/**
3351
* Constant for the "Yezidi" Unicode
3352
* character block.
3353
* @since 15
3354
*/
3355
public static final UnicodeBlock YEZIDI =
3356
new UnicodeBlock("YEZIDI");
3357
3358
/**
3359
* Constant for the "Chorasmian" Unicode
3360
* character block.
3361
* @since 15
3362
*/
3363
public static final UnicodeBlock CHORASMIAN =
3364
new UnicodeBlock("CHORASMIAN");
3365
3366
/**
3367
* Constant for the "Dives Akuru" Unicode
3368
* character block.
3369
* @since 15
3370
*/
3371
public static final UnicodeBlock DIVES_AKURU =
3372
new UnicodeBlock("DIVES_AKURU",
3373
"DIVES AKURU",
3374
"DIVESAKURU");
3375
3376
/**
3377
* Constant for the "Lisu Supplement" Unicode
3378
* character block.
3379
* @since 15
3380
*/
3381
public static final UnicodeBlock LISU_SUPPLEMENT =
3382
new UnicodeBlock("LISU_SUPPLEMENT",
3383
"LISU SUPPLEMENT",
3384
"LISUSUPPLEMENT");
3385
3386
/**
3387
* Constant for the "Khitan Small Script" Unicode
3388
* character block.
3389
* @since 15
3390
*/
3391
public static final UnicodeBlock KHITAN_SMALL_SCRIPT =
3392
new UnicodeBlock("KHITAN_SMALL_SCRIPT",
3393
"KHITAN SMALL SCRIPT",
3394
"KHITANSMALLSCRIPT");
3395
3396
/**
3397
* Constant for the "Tangut Supplement" Unicode
3398
* character block.
3399
* @since 15
3400
*/
3401
public static final UnicodeBlock TANGUT_SUPPLEMENT =
3402
new UnicodeBlock("TANGUT_SUPPLEMENT",
3403
"TANGUT SUPPLEMENT",
3404
"TANGUTSUPPLEMENT");
3405
3406
/**
3407
* Constant for the "Symbols for Legacy Computing" Unicode
3408
* character block.
3409
* @since 15
3410
*/
3411
public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING =
3412
new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING",
3413
"SYMBOLS FOR LEGACY COMPUTING",
3414
"SYMBOLSFORLEGACYCOMPUTING");
3415
3416
/**
3417
* Constant for the "CJK Unified Ideographs Extension G" Unicode
3418
* character block.
3419
* @since 15
3420
*/
3421
public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G =
3422
new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G",
3423
"CJK UNIFIED IDEOGRAPHS EXTENSION G",
3424
"CJKUNIFIEDIDEOGRAPHSEXTENSIONG");
3425
3426
private static final int[] blockStarts = {
3427
0x0000, // 0000..007F; Basic Latin
3428
0x0080, // 0080..00FF; Latin-1 Supplement
3429
0x0100, // 0100..017F; Latin Extended-A
3430
0x0180, // 0180..024F; Latin Extended-B
3431
0x0250, // 0250..02AF; IPA Extensions
3432
0x02B0, // 02B0..02FF; Spacing Modifier Letters
3433
0x0300, // 0300..036F; Combining Diacritical Marks
3434
0x0370, // 0370..03FF; Greek and Coptic
3435
0x0400, // 0400..04FF; Cyrillic
3436
0x0500, // 0500..052F; Cyrillic Supplement
3437
0x0530, // 0530..058F; Armenian
3438
0x0590, // 0590..05FF; Hebrew
3439
0x0600, // 0600..06FF; Arabic
3440
0x0700, // 0700..074F; Syriac
3441
0x0750, // 0750..077F; Arabic Supplement
3442
0x0780, // 0780..07BF; Thaana
3443
0x07C0, // 07C0..07FF; NKo
3444
0x0800, // 0800..083F; Samaritan
3445
0x0840, // 0840..085F; Mandaic
3446
0x0860, // 0860..086F; Syriac Supplement
3447
0x0870, // unassigned
3448
0x08A0, // 08A0..08FF; Arabic Extended-A
3449
0x0900, // 0900..097F; Devanagari
3450
0x0980, // 0980..09FF; Bengali
3451
0x0A00, // 0A00..0A7F; Gurmukhi
3452
0x0A80, // 0A80..0AFF; Gujarati
3453
0x0B00, // 0B00..0B7F; Oriya
3454
0x0B80, // 0B80..0BFF; Tamil
3455
0x0C00, // 0C00..0C7F; Telugu
3456
0x0C80, // 0C80..0CFF; Kannada
3457
0x0D00, // 0D00..0D7F; Malayalam
3458
0x0D80, // 0D80..0DFF; Sinhala
3459
0x0E00, // 0E00..0E7F; Thai
3460
0x0E80, // 0E80..0EFF; Lao
3461
0x0F00, // 0F00..0FFF; Tibetan
3462
0x1000, // 1000..109F; Myanmar
3463
0x10A0, // 10A0..10FF; Georgian
3464
0x1100, // 1100..11FF; Hangul Jamo
3465
0x1200, // 1200..137F; Ethiopic
3466
0x1380, // 1380..139F; Ethiopic Supplement
3467
0x13A0, // 13A0..13FF; Cherokee
3468
0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics
3469
0x1680, // 1680..169F; Ogham
3470
0x16A0, // 16A0..16FF; Runic
3471
0x1700, // 1700..171F; Tagalog
3472
0x1720, // 1720..173F; Hanunoo
3473
0x1740, // 1740..175F; Buhid
3474
0x1760, // 1760..177F; Tagbanwa
3475
0x1780, // 1780..17FF; Khmer
3476
0x1800, // 1800..18AF; Mongolian
3477
0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3478
0x1900, // 1900..194F; Limbu
3479
0x1950, // 1950..197F; Tai Le
3480
0x1980, // 1980..19DF; New Tai Lue
3481
0x19E0, // 19E0..19FF; Khmer Symbols
3482
0x1A00, // 1A00..1A1F; Buginese
3483
0x1A20, // 1A20..1AAF; Tai Tham
3484
0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended
3485
0x1B00, // 1B00..1B7F; Balinese
3486
0x1B80, // 1B80..1BBF; Sundanese
3487
0x1BC0, // 1BC0..1BFF; Batak
3488
0x1C00, // 1C00..1C4F; Lepcha
3489
0x1C50, // 1C50..1C7F; Ol Chiki
3490
0x1C80, // 1C80..1C8F; Cyrillic Extended-C
3491
0x1C90, // 1C90..1CBF; Georgian Extended
3492
0x1CC0, // 1CC0..1CCF; Sundanese Supplement
3493
0x1CD0, // 1CD0..1CFF; Vedic Extensions
3494
0x1D00, // 1D00..1D7F; Phonetic Extensions
3495
0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement
3496
0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement
3497
0x1E00, // 1E00..1EFF; Latin Extended Additional
3498
0x1F00, // 1F00..1FFF; Greek Extended
3499
0x2000, // 2000..206F; General Punctuation
3500
0x2070, // 2070..209F; Superscripts and Subscripts
3501
0x20A0, // 20A0..20CF; Currency Symbols
3502
0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols
3503
0x2100, // 2100..214F; Letterlike Symbols
3504
0x2150, // 2150..218F; Number Forms
3505
0x2190, // 2190..21FF; Arrows
3506
0x2200, // 2200..22FF; Mathematical Operators
3507
0x2300, // 2300..23FF; Miscellaneous Technical
3508
0x2400, // 2400..243F; Control Pictures
3509
0x2440, // 2440..245F; Optical Character Recognition
3510
0x2460, // 2460..24FF; Enclosed Alphanumerics
3511
0x2500, // 2500..257F; Box Drawing
3512
0x2580, // 2580..259F; Block Elements
3513
0x25A0, // 25A0..25FF; Geometric Shapes
3514
0x2600, // 2600..26FF; Miscellaneous Symbols
3515
0x2700, // 2700..27BF; Dingbats
3516
0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3517
0x27F0, // 27F0..27FF; Supplemental Arrows-A
3518
0x2800, // 2800..28FF; Braille Patterns
3519
0x2900, // 2900..297F; Supplemental Arrows-B
3520
0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B
3521
0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators
3522
0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows
3523
0x2C00, // 2C00..2C5F; Glagolitic
3524
0x2C60, // 2C60..2C7F; Latin Extended-C
3525
0x2C80, // 2C80..2CFF; Coptic
3526
0x2D00, // 2D00..2D2F; Georgian Supplement
3527
0x2D30, // 2D30..2D7F; Tifinagh
3528
0x2D80, // 2D80..2DDF; Ethiopic Extended
3529
0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A
3530
0x2E00, // 2E00..2E7F; Supplemental Punctuation
3531
0x2E80, // 2E80..2EFF; CJK Radicals Supplement
3532
0x2F00, // 2F00..2FDF; Kangxi Radicals
3533
0x2FE0, // unassigned
3534
0x2FF0, // 2FF0..2FFF; Ideographic Description Characters
3535
0x3000, // 3000..303F; CJK Symbols and Punctuation
3536
0x3040, // 3040..309F; Hiragana
3537
0x30A0, // 30A0..30FF; Katakana
3538
0x3100, // 3100..312F; Bopomofo
3539
0x3130, // 3130..318F; Hangul Compatibility Jamo
3540
0x3190, // 3190..319F; Kanbun
3541
0x31A0, // 31A0..31BF; Bopomofo Extended
3542
0x31C0, // 31C0..31EF; CJK Strokes
3543
0x31F0, // 31F0..31FF; Katakana Phonetic Extensions
3544
0x3200, // 3200..32FF; Enclosed CJK Letters and Months
3545
0x3300, // 3300..33FF; CJK Compatibility
3546
0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A
3547
0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols
3548
0x4E00, // 4E00..9FFF; CJK Unified Ideographs
3549
0xA000, // A000..A48F; Yi Syllables
3550
0xA490, // A490..A4CF; Yi Radicals
3551
0xA4D0, // A4D0..A4FF; Lisu
3552
0xA500, // A500..A63F; Vai
3553
0xA640, // A640..A69F; Cyrillic Extended-B
3554
0xA6A0, // A6A0..A6FF; Bamum
3555
0xA700, // A700..A71F; Modifier Tone Letters
3556
0xA720, // A720..A7FF; Latin Extended-D
3557
0xA800, // A800..A82F; Syloti Nagri
3558
0xA830, // A830..A83F; Common Indic Number Forms
3559
0xA840, // A840..A87F; Phags-pa
3560
0xA880, // A880..A8DF; Saurashtra
3561
0xA8E0, // A8E0..A8FF; Devanagari Extended
3562
0xA900, // A900..A92F; Kayah Li
3563
0xA930, // A930..A95F; Rejang
3564
0xA960, // A960..A97F; Hangul Jamo Extended-A
3565
0xA980, // A980..A9DF; Javanese
3566
0xA9E0, // A9E0..A9FF; Myanmar Extended-B
3567
0xAA00, // AA00..AA5F; Cham
3568
0xAA60, // AA60..AA7F; Myanmar Extended-A
3569
0xAA80, // AA80..AADF; Tai Viet
3570
0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions
3571
0xAB00, // AB00..AB2F; Ethiopic Extended-A
3572
0xAB30, // AB30..AB6F; Latin Extended-E
3573
0xAB70, // AB70..ABBF; Cherokee Supplement
3574
0xABC0, // ABC0..ABFF; Meetei Mayek
3575
0xAC00, // AC00..D7AF; Hangul Syllables
3576
0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B
3577
0xD800, // D800..DB7F; High Surrogates
3578
0xDB80, // DB80..DBFF; High Private Use Surrogates
3579
0xDC00, // DC00..DFFF; Low Surrogates
3580
0xE000, // E000..F8FF; Private Use Area
3581
0xF900, // F900..FAFF; CJK Compatibility Ideographs
3582
0xFB00, // FB00..FB4F; Alphabetic Presentation Forms
3583
0xFB50, // FB50..FDFF; Arabic Presentation Forms-A
3584
0xFE00, // FE00..FE0F; Variation Selectors
3585
0xFE10, // FE10..FE1F; Vertical Forms
3586
0xFE20, // FE20..FE2F; Combining Half Marks
3587
0xFE30, // FE30..FE4F; CJK Compatibility Forms
3588
0xFE50, // FE50..FE6F; Small Form Variants
3589
0xFE70, // FE70..FEFF; Arabic Presentation Forms-B
3590
0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms
3591
0xFFF0, // FFF0..FFFF; Specials
3592
0x10000, // 10000..1007F; Linear B Syllabary
3593
0x10080, // 10080..100FF; Linear B Ideograms
3594
0x10100, // 10100..1013F; Aegean Numbers
3595
0x10140, // 10140..1018F; Ancient Greek Numbers
3596
0x10190, // 10190..101CF; Ancient Symbols
3597
0x101D0, // 101D0..101FF; Phaistos Disc
3598
0x10200, // unassigned
3599
0x10280, // 10280..1029F; Lycian
3600
0x102A0, // 102A0..102DF; Carian
3601
0x102E0, // 102E0..102FF; Coptic Epact Numbers
3602
0x10300, // 10300..1032F; Old Italic
3603
0x10330, // 10330..1034F; Gothic
3604
0x10350, // 10350..1037F; Old Permic
3605
0x10380, // 10380..1039F; Ugaritic
3606
0x103A0, // 103A0..103DF; Old Persian
3607
0x103E0, // unassigned
3608
0x10400, // 10400..1044F; Deseret
3609
0x10450, // 10450..1047F; Shavian
3610
0x10480, // 10480..104AF; Osmanya
3611
0x104B0, // 104B0..104FF; Osage
3612
0x10500, // 10500..1052F; Elbasan
3613
0x10530, // 10530..1056F; Caucasian Albanian
3614
0x10570, // unassigned
3615
0x10600, // 10600..1077F; Linear A
3616
0x10780, // unassigned
3617
0x10800, // 10800..1083F; Cypriot Syllabary
3618
0x10840, // 10840..1085F; Imperial Aramaic
3619
0x10860, // 10860..1087F; Palmyrene
3620
0x10880, // 10880..108AF; Nabataean
3621
0x108B0, // unassigned
3622
0x108E0, // 108E0..108FF; Hatran
3623
0x10900, // 10900..1091F; Phoenician
3624
0x10920, // 10920..1093F; Lydian
3625
0x10940, // unassigned
3626
0x10980, // 10980..1099F; Meroitic Hieroglyphs
3627
0x109A0, // 109A0..109FF; Meroitic Cursive
3628
0x10A00, // 10A00..10A5F; Kharoshthi
3629
0x10A60, // 10A60..10A7F; Old South Arabian
3630
0x10A80, // 10A80..10A9F; Old North Arabian
3631
0x10AA0, // unassigned
3632
0x10AC0, // 10AC0..10AFF; Manichaean
3633
0x10B00, // 10B00..10B3F; Avestan
3634
0x10B40, // 10B40..10B5F; Inscriptional Parthian
3635
0x10B60, // 10B60..10B7F; Inscriptional Pahlavi
3636
0x10B80, // 10B80..10BAF; Psalter Pahlavi
3637
0x10BB0, // unassigned
3638
0x10C00, // 10C00..10C4F; Old Turkic
3639
0x10C50, // unassigned
3640
0x10C80, // 10C80..10CFF; Old Hungarian
3641
0x10D00, // 10D00..10D3F; Hanifi Rohingya
3642
0x10D40, // unassigned
3643
0x10E60, // 10E60..10E7F; Rumi Numeral Symbols
3644
0x10E80, // 10E80..10EBF; Yezidi
3645
0x10EC0, // unassigned
3646
0x10F00, // 10F00..10F2F; Old Sogdian
3647
0x10F30, // 10F30..10F6F; Sogdian
3648
0x10F70, // unassigned
3649
0x10FB0, // 10FB0..10FDF; Chorasmian
3650
0x10FE0, // 10FE0..10FFF; Elymaic
3651
0x11000, // 11000..1107F; Brahmi
3652
0x11080, // 11080..110CF; Kaithi
3653
0x110D0, // 110D0..110FF; Sora Sompeng
3654
0x11100, // 11100..1114F; Chakma
3655
0x11150, // 11150..1117F; Mahajani
3656
0x11180, // 11180..111DF; Sharada
3657
0x111E0, // 111E0..111FF; Sinhala Archaic Numbers
3658
0x11200, // 11200..1124F; Khojki
3659
0x11250, // unassigned
3660
0x11280, // 11280..112AF; Multani
3661
0x112B0, // 112B0..112FF; Khudawadi
3662
0x11300, // 11300..1137F; Grantha
3663
0x11380, // unassigned
3664
0x11400, // 11400..1147F; Newa
3665
0x11480, // 11480..114DF; Tirhuta
3666
0x114E0, // unassigned
3667
0x11580, // 11580..115FF; Siddham
3668
0x11600, // 11600..1165F; Modi
3669
0x11660, // 11660..1167F; Mongolian Supplement
3670
0x11680, // 11680..116CF; Takri
3671
0x116D0, // unassigned
3672
0x11700, // 11700..1173F; Ahom
3673
0x11740, // unassigned
3674
0x11800, // 11800..1184F; Dogra
3675
0x11850, // unassigned
3676
0x118A0, // 118A0..118FF; Warang Citi
3677
0x11900, // 11900..1195F; Dives Akuru
3678
0x11960, // unassigned
3679
0x119A0, // 119A0..119FF; Nandinagari
3680
0x11A00, // 11A00..11A4F; Zanabazar Square
3681
0x11A50, // 11A50..11AAF; Soyombo
3682
0x11AB0, // unassigned
3683
0x11AC0, // 11AC0..11AFF; Pau Cin Hau
3684
0x11B00, // unassigned
3685
0x11C00, // 11C00..11C6F; Bhaiksuki
3686
0x11C70, // 11C70..11CBF; Marchen
3687
0x11CC0, // unassigned
3688
0x11D00, // 11D00..11D5F; Masaram Gondi
3689
0x11D60, // 11D60..11DAF; Gunjala Gondi
3690
0x11DB0, // unassigned
3691
0x11EE0, // 11EE0..11EFF; Makasar
3692
0x11F00, // unassigned
3693
0x11FB0, // 11FB0..11FBF; Lisu Supplement
3694
0x11FC0, // 11FC0..11FFF; Tamil Supplement
3695
0x12000, // 12000..123FF; Cuneiform
3696
0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation
3697
0x12480, // 12480..1254F; Early Dynastic Cuneiform
3698
0x12550, // unassigned
3699
0x13000, // 13000..1342F; Egyptian Hieroglyphs
3700
0x13430, // 13430..1343F; Egyptian Hieroglyph Format Controls
3701
0x13440, // unassigned
3702
0x14400, // 14400..1467F; Anatolian Hieroglyphs
3703
0x14680, // unassigned
3704
0x16800, // 16800..16A3F; Bamum Supplement
3705
0x16A40, // 16A40..16A6F; Mro
3706
0x16A70, // unassigned
3707
0x16AD0, // 16AD0..16AFF; Bassa Vah
3708
0x16B00, // 16B00..16B8F; Pahawh Hmong
3709
0x16B90, // unassigned
3710
0x16E40, // 16E40..16E9F; Medefaidrin
3711
0x16EA0, // unassigned
3712
0x16F00, // 16F00..16F9F; Miao
3713
0x16FA0, // unassigned
3714
0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation
3715
0x17000, // 17000..187FF; Tangut
3716
0x18800, // 18800..18AFF; Tangut Components
3717
0x18B00, // 18B00..18CFF; Khitan Small Script
3718
0x18D00, // 18D00..18D8F; Tangut Supplement
3719
0x18D90, // unassigned
3720
0x1B000, // 1B000..1B0FF; Kana Supplement
3721
0x1B100, // 1B100..1B12F; Kana Extended-A
3722
0x1B130, // 1B130..1B16F; Small Kana Extension
3723
0x1B170, // 1B170..1B2FF; Nushu
3724
0x1B300, // unassigned
3725
0x1BC00, // 1BC00..1BC9F; Duployan
3726
0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls
3727
0x1BCB0, // unassigned
3728
0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols
3729
0x1D100, // 1D100..1D1FF; Musical Symbols
3730
0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation
3731
0x1D250, // unassigned
3732
0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals
3733
0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols
3734
0x1D360, // 1D360..1D37F; Counting Rod Numerals
3735
0x1D380, // unassigned
3736
0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols
3737
0x1D800, // 1D800..1DAAF; Sutton SignWriting
3738
0x1DAB0, // unassigned
3739
0x1E000, // 1E000..1E02F; Glagolitic Supplement
3740
0x1E030, // unassigned
3741
0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong
3742
0x1E150, // unassigned
3743
0x1E2C0, // 1E2C0..1E2FF; Wancho
3744
0x1E300, // unassigned
3745
0x1E800, // 1E800..1E8DF; Mende Kikakui
3746
0x1E8E0, // unassigned
3747
0x1E900, // 1E900..1E95F; Adlam
3748
0x1E960, // unassigned
3749
0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers
3750
0x1ECC0, // unassigned
3751
0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers
3752
0x1ED50, // unassigned
3753
0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
3754
0x1EF00, // unassigned
3755
0x1F000, // 1F000..1F02F; Mahjong Tiles
3756
0x1F030, // 1F030..1F09F; Domino Tiles
3757
0x1F0A0, // 1F0A0..1F0FF; Playing Cards
3758
0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement
3759
0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement
3760
0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
3761
0x1F600, // 1F600..1F64F; Emoticons
3762
0x1F650, // 1F650..1F67F; Ornamental Dingbats
3763
0x1F680, // 1F680..1F6FF; Transport and Map Symbols
3764
0x1F700, // 1F700..1F77F; Alchemical Symbols
3765
0x1F780, // 1F780..1F7FF; Geometric Shapes Extended
3766
0x1F800, // 1F800..1F8FF; Supplemental Arrows-C
3767
0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs
3768
0x1FA00, // 1FA00..1FA6F; Chess Symbols
3769
0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A
3770
0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing
3771
0x1FC00, // unassigned
3772
0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B
3773
0x2A6E0, // unassigned
3774
0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C
3775
0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D
3776
0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E
3777
0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
3778
0x2EBF0, // unassigned
3779
0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
3780
0x2FA20, // unassigned
3781
0x30000, // 30000..3134F; CJK Unified Ideographs Extension G
3782
0x31350, // unassigned
3783
0xE0000, // E0000..E007F; Tags
3784
0xE0080, // unassigned
3785
0xE0100, // E0100..E01EF; Variation Selectors Supplement
3786
0xE01F0, // unassigned
3787
0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A
3788
0x100000, // 100000..10FFFF; Supplementary Private Use Area-B
3789
};
3790
3791
private static final UnicodeBlock[] blocks = {
3792
BASIC_LATIN,
3793
LATIN_1_SUPPLEMENT,
3794
LATIN_EXTENDED_A,
3795
LATIN_EXTENDED_B,
3796
IPA_EXTENSIONS,
3797
SPACING_MODIFIER_LETTERS,
3798
COMBINING_DIACRITICAL_MARKS,
3799
GREEK,
3800
CYRILLIC,
3801
CYRILLIC_SUPPLEMENTARY,
3802
ARMENIAN,
3803
HEBREW,
3804
ARABIC,
3805
SYRIAC,
3806
ARABIC_SUPPLEMENT,
3807
THAANA,
3808
NKO,
3809
SAMARITAN,
3810
MANDAIC,
3811
SYRIAC_SUPPLEMENT,
3812
null,
3813
ARABIC_EXTENDED_A,
3814
DEVANAGARI,
3815
BENGALI,
3816
GURMUKHI,
3817
GUJARATI,
3818
ORIYA,
3819
TAMIL,
3820
TELUGU,
3821
KANNADA,
3822
MALAYALAM,
3823
SINHALA,
3824
THAI,
3825
LAO,
3826
TIBETAN,
3827
MYANMAR,
3828
GEORGIAN,
3829
HANGUL_JAMO,
3830
ETHIOPIC,
3831
ETHIOPIC_SUPPLEMENT,
3832
CHEROKEE,
3833
UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
3834
OGHAM,
3835
RUNIC,
3836
TAGALOG,
3837
HANUNOO,
3838
BUHID,
3839
TAGBANWA,
3840
KHMER,
3841
MONGOLIAN,
3842
UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
3843
LIMBU,
3844
TAI_LE,
3845
NEW_TAI_LUE,
3846
KHMER_SYMBOLS,
3847
BUGINESE,
3848
TAI_THAM,
3849
COMBINING_DIACRITICAL_MARKS_EXTENDED,
3850
BALINESE,
3851
SUNDANESE,
3852
BATAK,
3853
LEPCHA,
3854
OL_CHIKI,
3855
CYRILLIC_EXTENDED_C,
3856
GEORGIAN_EXTENDED,
3857
SUNDANESE_SUPPLEMENT,
3858
VEDIC_EXTENSIONS,
3859
PHONETIC_EXTENSIONS,
3860
PHONETIC_EXTENSIONS_SUPPLEMENT,
3861
COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
3862
LATIN_EXTENDED_ADDITIONAL,
3863
GREEK_EXTENDED,
3864
GENERAL_PUNCTUATION,
3865
SUPERSCRIPTS_AND_SUBSCRIPTS,
3866
CURRENCY_SYMBOLS,
3867
COMBINING_MARKS_FOR_SYMBOLS,
3868
LETTERLIKE_SYMBOLS,
3869
NUMBER_FORMS,
3870
ARROWS,
3871
MATHEMATICAL_OPERATORS,
3872
MISCELLANEOUS_TECHNICAL,
3873
CONTROL_PICTURES,
3874
OPTICAL_CHARACTER_RECOGNITION,
3875
ENCLOSED_ALPHANUMERICS,
3876
BOX_DRAWING,
3877
BLOCK_ELEMENTS,
3878
GEOMETRIC_SHAPES,
3879
MISCELLANEOUS_SYMBOLS,
3880
DINGBATS,
3881
MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
3882
SUPPLEMENTAL_ARROWS_A,
3883
BRAILLE_PATTERNS,
3884
SUPPLEMENTAL_ARROWS_B,
3885
MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
3886
SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
3887
MISCELLANEOUS_SYMBOLS_AND_ARROWS,
3888
GLAGOLITIC,
3889
LATIN_EXTENDED_C,
3890
COPTIC,
3891
GEORGIAN_SUPPLEMENT,
3892
TIFINAGH,
3893
ETHIOPIC_EXTENDED,
3894
CYRILLIC_EXTENDED_A,
3895
SUPPLEMENTAL_PUNCTUATION,
3896
CJK_RADICALS_SUPPLEMENT,
3897
KANGXI_RADICALS,
3898
null,
3899
IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
3900
CJK_SYMBOLS_AND_PUNCTUATION,
3901
HIRAGANA,
3902
KATAKANA,
3903
BOPOMOFO,
3904
HANGUL_COMPATIBILITY_JAMO,
3905
KANBUN,
3906
BOPOMOFO_EXTENDED,
3907
CJK_STROKES,
3908
KATAKANA_PHONETIC_EXTENSIONS,
3909
ENCLOSED_CJK_LETTERS_AND_MONTHS,
3910
CJK_COMPATIBILITY,
3911
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
3912
YIJING_HEXAGRAM_SYMBOLS,
3913
CJK_UNIFIED_IDEOGRAPHS,
3914
YI_SYLLABLES,
3915
YI_RADICALS,
3916
LISU,
3917
VAI,
3918
CYRILLIC_EXTENDED_B,
3919
BAMUM,
3920
MODIFIER_TONE_LETTERS,
3921
LATIN_EXTENDED_D,
3922
SYLOTI_NAGRI,
3923
COMMON_INDIC_NUMBER_FORMS,
3924
PHAGS_PA,
3925
SAURASHTRA,
3926
DEVANAGARI_EXTENDED,
3927
KAYAH_LI,
3928
REJANG,
3929
HANGUL_JAMO_EXTENDED_A,
3930
JAVANESE,
3931
MYANMAR_EXTENDED_B,
3932
CHAM,
3933
MYANMAR_EXTENDED_A,
3934
TAI_VIET,
3935
MEETEI_MAYEK_EXTENSIONS,
3936
ETHIOPIC_EXTENDED_A,
3937
LATIN_EXTENDED_E,
3938
CHEROKEE_SUPPLEMENT,
3939
MEETEI_MAYEK,
3940
HANGUL_SYLLABLES,
3941
HANGUL_JAMO_EXTENDED_B,
3942
HIGH_SURROGATES,
3943
HIGH_PRIVATE_USE_SURROGATES,
3944
LOW_SURROGATES,
3945
PRIVATE_USE_AREA,
3946
CJK_COMPATIBILITY_IDEOGRAPHS,
3947
ALPHABETIC_PRESENTATION_FORMS,
3948
ARABIC_PRESENTATION_FORMS_A,
3949
VARIATION_SELECTORS,
3950
VERTICAL_FORMS,
3951
COMBINING_HALF_MARKS,
3952
CJK_COMPATIBILITY_FORMS,
3953
SMALL_FORM_VARIANTS,
3954
ARABIC_PRESENTATION_FORMS_B,
3955
HALFWIDTH_AND_FULLWIDTH_FORMS,
3956
SPECIALS,
3957
LINEAR_B_SYLLABARY,
3958
LINEAR_B_IDEOGRAMS,
3959
AEGEAN_NUMBERS,
3960
ANCIENT_GREEK_NUMBERS,
3961
ANCIENT_SYMBOLS,
3962
PHAISTOS_DISC,
3963
null,
3964
LYCIAN,
3965
CARIAN,
3966
COPTIC_EPACT_NUMBERS,
3967
OLD_ITALIC,
3968
GOTHIC,
3969
OLD_PERMIC,
3970
UGARITIC,
3971
OLD_PERSIAN,
3972
null,
3973
DESERET,
3974
SHAVIAN,
3975
OSMANYA,
3976
OSAGE,
3977
ELBASAN,
3978
CAUCASIAN_ALBANIAN,
3979
null,
3980
LINEAR_A,
3981
null,
3982
CYPRIOT_SYLLABARY,
3983
IMPERIAL_ARAMAIC,
3984
PALMYRENE,
3985
NABATAEAN,
3986
null,
3987
HATRAN,
3988
PHOENICIAN,
3989
LYDIAN,
3990
null,
3991
MEROITIC_HIEROGLYPHS,
3992
MEROITIC_CURSIVE,
3993
KHAROSHTHI,
3994
OLD_SOUTH_ARABIAN,
3995
OLD_NORTH_ARABIAN,
3996
null,
3997
MANICHAEAN,
3998
AVESTAN,
3999
INSCRIPTIONAL_PARTHIAN,
4000
INSCRIPTIONAL_PAHLAVI,
4001
PSALTER_PAHLAVI,
4002
null,
4003
OLD_TURKIC,
4004
null,
4005
OLD_HUNGARIAN,
4006
HANIFI_ROHINGYA,
4007
null,
4008
RUMI_NUMERAL_SYMBOLS,
4009
YEZIDI,
4010
null,
4011
OLD_SOGDIAN,
4012
SOGDIAN,
4013
null,
4014
CHORASMIAN,
4015
ELYMAIC,
4016
BRAHMI,
4017
KAITHI,
4018
SORA_SOMPENG,
4019
CHAKMA,
4020
MAHAJANI,
4021
SHARADA,
4022
SINHALA_ARCHAIC_NUMBERS,
4023
KHOJKI,
4024
null,
4025
MULTANI,
4026
KHUDAWADI,
4027
GRANTHA,
4028
null,
4029
NEWA,
4030
TIRHUTA,
4031
null,
4032
SIDDHAM,
4033
MODI,
4034
MONGOLIAN_SUPPLEMENT,
4035
TAKRI,
4036
null,
4037
AHOM,
4038
null,
4039
DOGRA,
4040
null,
4041
WARANG_CITI,
4042
DIVES_AKURU,
4043
null,
4044
NANDINAGARI,
4045
ZANABAZAR_SQUARE,
4046
SOYOMBO,
4047
null,
4048
PAU_CIN_HAU,
4049
null,
4050
BHAIKSUKI,
4051
MARCHEN,
4052
null,
4053
MASARAM_GONDI,
4054
GUNJALA_GONDI,
4055
null,
4056
MAKASAR,
4057
null,
4058
LISU_SUPPLEMENT,
4059
TAMIL_SUPPLEMENT,
4060
CUNEIFORM,
4061
CUNEIFORM_NUMBERS_AND_PUNCTUATION,
4062
EARLY_DYNASTIC_CUNEIFORM,
4063
null,
4064
EGYPTIAN_HIEROGLYPHS,
4065
EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS,
4066
null,
4067
ANATOLIAN_HIEROGLYPHS,
4068
null,
4069
BAMUM_SUPPLEMENT,
4070
MRO,
4071
null,
4072
BASSA_VAH,
4073
PAHAWH_HMONG,
4074
null,
4075
MEDEFAIDRIN,
4076
null,
4077
MIAO,
4078
null,
4079
IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
4080
TANGUT,
4081
TANGUT_COMPONENTS,
4082
KHITAN_SMALL_SCRIPT,
4083
TANGUT_SUPPLEMENT,
4084
null,
4085
KANA_SUPPLEMENT,
4086
KANA_EXTENDED_A,
4087
SMALL_KANA_EXTENSION,
4088
NUSHU,
4089
null,
4090
DUPLOYAN,
4091
SHORTHAND_FORMAT_CONTROLS,
4092
null,
4093
BYZANTINE_MUSICAL_SYMBOLS,
4094
MUSICAL_SYMBOLS,
4095
ANCIENT_GREEK_MUSICAL_NOTATION,
4096
null,
4097
MAYAN_NUMERALS,
4098
TAI_XUAN_JING_SYMBOLS,
4099
COUNTING_ROD_NUMERALS,
4100
null,
4101
MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
4102
SUTTON_SIGNWRITING,
4103
null,
4104
GLAGOLITIC_SUPPLEMENT,
4105
null,
4106
NYIAKENG_PUACHUE_HMONG,
4107
null,
4108
WANCHO,
4109
null,
4110
MENDE_KIKAKUI,
4111
null,
4112
ADLAM,
4113
null,
4114
INDIC_SIYAQ_NUMBERS,
4115
null,
4116
OTTOMAN_SIYAQ_NUMBERS,
4117
null,
4118
ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
4119
null,
4120
MAHJONG_TILES,
4121
DOMINO_TILES,
4122
PLAYING_CARDS,
4123
ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
4124
ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
4125
MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
4126
EMOTICONS,
4127
ORNAMENTAL_DINGBATS,
4128
TRANSPORT_AND_MAP_SYMBOLS,
4129
ALCHEMICAL_SYMBOLS,
4130
GEOMETRIC_SHAPES_EXTENDED,
4131
SUPPLEMENTAL_ARROWS_C,
4132
SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
4133
CHESS_SYMBOLS,
4134
SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A,
4135
SYMBOLS_FOR_LEGACY_COMPUTING,
4136
null,
4137
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
4138
null,
4139
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
4140
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
4141
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
4142
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
4143
null,
4144
CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
4145
null,
4146
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G,
4147
null,
4148
TAGS,
4149
null,
4150
VARIATION_SELECTORS_SUPPLEMENT,
4151
null,
4152
SUPPLEMENTARY_PRIVATE_USE_AREA_A,
4153
SUPPLEMENTARY_PRIVATE_USE_AREA_B,
4154
};
4155
4156
4157
/**
4158
* Returns the object representing the Unicode block containing the
4159
* given character, or {@code null} if the character is not a
4160
* member of a defined block.
4161
*
4162
* <p><b>Note:</b> This method cannot handle
4163
* <a href="Character.html#supplementary"> supplementary
4164
* characters</a>. To support all Unicode characters, including
4165
* supplementary characters, use the {@link #of(int)} method.
4166
*
4167
* @param c The character in question
4168
* @return The {@code UnicodeBlock} instance representing the
4169
* Unicode block of which this character is a member, or
4170
* {@code null} if the character is not a member of any
4171
* Unicode block
4172
*/
4173
public static UnicodeBlock of(char c) {
4174
return of((int)c);
4175
}
4176
4177
/**
4178
* Returns the object representing the Unicode block
4179
* containing the given character (Unicode code point), or
4180
* {@code null} if the character is not a member of a
4181
* defined block.
4182
*
4183
* @param codePoint the character (Unicode code point) in question.
4184
* @return The {@code UnicodeBlock} instance representing the
4185
* Unicode block of which this character is a member, or
4186
* {@code null} if the character is not a member of any
4187
* Unicode block
4188
* @throws IllegalArgumentException if the specified
4189
* {@code codePoint} is an invalid Unicode code point.
4190
* @see Character#isValidCodePoint(int)
4191
* @since 1.5
4192
*/
4193
public static UnicodeBlock of(int codePoint) {
4194
if (!isValidCodePoint(codePoint)) {
4195
throw new IllegalArgumentException(
4196
String.format("Not a valid Unicode code point: 0x%X", codePoint));
4197
}
4198
4199
int top, bottom, current;
4200
bottom = 0;
4201
top = blockStarts.length;
4202
current = top/2;
4203
4204
// invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
4205
while (top - bottom > 1) {
4206
if (codePoint >= blockStarts[current]) {
4207
bottom = current;
4208
} else {
4209
top = current;
4210
}
4211
current = (top + bottom) / 2;
4212
}
4213
return blocks[current];
4214
}
4215
4216
/**
4217
* Returns the UnicodeBlock with the given name. Block
4218
* names are determined by The Unicode Standard. The file
4219
* {@code Blocks-<version>.txt} defines blocks for a particular
4220
* version of the standard. The {@link Character} class specifies
4221
* the version of the standard that it supports.
4222
* <p>
4223
* This method accepts block names in the following forms:
4224
* <ol>
4225
* <li> Canonical block names as defined by the Unicode Standard.
4226
* For example, the standard defines a "Basic Latin" block. Therefore, this
4227
* method accepts "Basic Latin" as a valid block name. The documentation of
4228
* each UnicodeBlock provides the canonical name.
4229
* <li>Canonical block names with all spaces removed. For example, "BasicLatin"
4230
* is a valid block name for the "Basic Latin" block.
4231
* <li>The text representation of each constant UnicodeBlock identifier.
4232
* For example, this method will return the {@link #BASIC_LATIN} block if
4233
* provided with the "BASIC_LATIN" name. This form replaces all spaces and
4234
* hyphens in the canonical name with underscores.
4235
* </ol>
4236
* Finally, character case is ignored for all of the valid block name forms.
4237
* For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
4238
* The en_US locale's case mapping rules are used to provide case-insensitive
4239
* string comparisons for block name validation.
4240
* <p>
4241
* If the Unicode Standard changes block names, both the previous and
4242
* current names will be accepted.
4243
*
4244
* @param blockName A {@code UnicodeBlock} name.
4245
* @return The {@code UnicodeBlock} instance identified
4246
* by {@code blockName}
4247
* @throws IllegalArgumentException if {@code blockName} is an
4248
* invalid name
4249
* @throws NullPointerException if {@code blockName} is null
4250
* @since 1.5
4251
*/
4252
public static final UnicodeBlock forName(String blockName) {
4253
UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
4254
if (block == null) {
4255
throw new IllegalArgumentException("Not a valid block name: "
4256
+ blockName);
4257
}
4258
return block;
4259
}
4260
}
4261
4262
4263
/**
4264
* A family of character subsets representing the character scripts
4265
* defined in the <a href="http://www.unicode.org/reports/tr24/">
4266
* <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
4267
* character is assigned to a single Unicode script, either a specific
4268
* script, such as {@link Character.UnicodeScript#LATIN Latin}, or
4269
* one of the following three special values,
4270
* {@link Character.UnicodeScript#INHERITED Inherited},
4271
* {@link Character.UnicodeScript#COMMON Common} or
4272
* {@link Character.UnicodeScript#UNKNOWN Unknown}.
4273
*
4274
* @since 1.7
4275
*/
4276
public static enum UnicodeScript {
4277
/**
4278
* Unicode script "Common".
4279
*/
4280
COMMON,
4281
4282
/**
4283
* Unicode script "Latin".
4284
*/
4285
LATIN,
4286
4287
/**
4288
* Unicode script "Greek".
4289
*/
4290
GREEK,
4291
4292
/**
4293
* Unicode script "Cyrillic".
4294
*/
4295
CYRILLIC,
4296
4297
/**
4298
* Unicode script "Armenian".
4299
*/
4300
ARMENIAN,
4301
4302
/**
4303
* Unicode script "Hebrew".
4304
*/
4305
HEBREW,
4306
4307
/**
4308
* Unicode script "Arabic".
4309
*/
4310
ARABIC,
4311
4312
/**
4313
* Unicode script "Syriac".
4314
*/
4315
SYRIAC,
4316
4317
/**
4318
* Unicode script "Thaana".
4319
*/
4320
THAANA,
4321
4322
/**
4323
* Unicode script "Devanagari".
4324
*/
4325
DEVANAGARI,
4326
4327
/**
4328
* Unicode script "Bengali".
4329
*/
4330
BENGALI,
4331
4332
/**
4333
* Unicode script "Gurmukhi".
4334
*/
4335
GURMUKHI,
4336
4337
/**
4338
* Unicode script "Gujarati".
4339
*/
4340
GUJARATI,
4341
4342
/**
4343
* Unicode script "Oriya".
4344
*/
4345
ORIYA,
4346
4347
/**
4348
* Unicode script "Tamil".
4349
*/
4350
TAMIL,
4351
4352
/**
4353
* Unicode script "Telugu".
4354
*/
4355
TELUGU,
4356
4357
/**
4358
* Unicode script "Kannada".
4359
*/
4360
KANNADA,
4361
4362
/**
4363
* Unicode script "Malayalam".
4364
*/
4365
MALAYALAM,
4366
4367
/**
4368
* Unicode script "Sinhala".
4369
*/
4370
SINHALA,
4371
4372
/**
4373
* Unicode script "Thai".
4374
*/
4375
THAI,
4376
4377
/**
4378
* Unicode script "Lao".
4379
*/
4380
LAO,
4381
4382
/**
4383
* Unicode script "Tibetan".
4384
*/
4385
TIBETAN,
4386
4387
/**
4388
* Unicode script "Myanmar".
4389
*/
4390
MYANMAR,
4391
4392
/**
4393
* Unicode script "Georgian".
4394
*/
4395
GEORGIAN,
4396
4397
/**
4398
* Unicode script "Hangul".
4399
*/
4400
HANGUL,
4401
4402
/**
4403
* Unicode script "Ethiopic".
4404
*/
4405
ETHIOPIC,
4406
4407
/**
4408
* Unicode script "Cherokee".
4409
*/
4410
CHEROKEE,
4411
4412
/**
4413
* Unicode script "Canadian_Aboriginal".
4414
*/
4415
CANADIAN_ABORIGINAL,
4416
4417
/**
4418
* Unicode script "Ogham".
4419
*/
4420
OGHAM,
4421
4422
/**
4423
* Unicode script "Runic".
4424
*/
4425
RUNIC,
4426
4427
/**
4428
* Unicode script "Khmer".
4429
*/
4430
KHMER,
4431
4432
/**
4433
* Unicode script "Mongolian".
4434
*/
4435
MONGOLIAN,
4436
4437
/**
4438
* Unicode script "Hiragana".
4439
*/
4440
HIRAGANA,
4441
4442
/**
4443
* Unicode script "Katakana".
4444
*/
4445
KATAKANA,
4446
4447
/**
4448
* Unicode script "Bopomofo".
4449
*/
4450
BOPOMOFO,
4451
4452
/**
4453
* Unicode script "Han".
4454
*/
4455
HAN,
4456
4457
/**
4458
* Unicode script "Yi".
4459
*/
4460
YI,
4461
4462
/**
4463
* Unicode script "Old_Italic".
4464
*/
4465
OLD_ITALIC,
4466
4467
/**
4468
* Unicode script "Gothic".
4469
*/
4470
GOTHIC,
4471
4472
/**
4473
* Unicode script "Deseret".
4474
*/
4475
DESERET,
4476
4477
/**
4478
* Unicode script "Inherited".
4479
*/
4480
INHERITED,
4481
4482
/**
4483
* Unicode script "Tagalog".
4484
*/
4485
TAGALOG,
4486
4487
/**
4488
* Unicode script "Hanunoo".
4489
*/
4490
HANUNOO,
4491
4492
/**
4493
* Unicode script "Buhid".
4494
*/
4495
BUHID,
4496
4497
/**
4498
* Unicode script "Tagbanwa".
4499
*/
4500
TAGBANWA,
4501
4502
/**
4503
* Unicode script "Limbu".
4504
*/
4505
LIMBU,
4506
4507
/**
4508
* Unicode script "Tai_Le".
4509
*/
4510
TAI_LE,
4511
4512
/**
4513
* Unicode script "Linear_B".
4514
*/
4515
LINEAR_B,
4516
4517
/**
4518
* Unicode script "Ugaritic".
4519
*/
4520
UGARITIC,
4521
4522
/**
4523
* Unicode script "Shavian".
4524
*/
4525
SHAVIAN,
4526
4527
/**
4528
* Unicode script "Osmanya".
4529
*/
4530
OSMANYA,
4531
4532
/**
4533
* Unicode script "Cypriot".
4534
*/
4535
CYPRIOT,
4536
4537
/**
4538
* Unicode script "Braille".
4539
*/
4540
BRAILLE,
4541
4542
/**
4543
* Unicode script "Buginese".
4544
*/
4545
BUGINESE,
4546
4547
/**
4548
* Unicode script "Coptic".
4549
*/
4550
COPTIC,
4551
4552
/**
4553
* Unicode script "New_Tai_Lue".
4554
*/
4555
NEW_TAI_LUE,
4556
4557
/**
4558
* Unicode script "Glagolitic".
4559
*/
4560
GLAGOLITIC,
4561
4562
/**
4563
* Unicode script "Tifinagh".
4564
*/
4565
TIFINAGH,
4566
4567
/**
4568
* Unicode script "Syloti_Nagri".
4569
*/
4570
SYLOTI_NAGRI,
4571
4572
/**
4573
* Unicode script "Old_Persian".
4574
*/
4575
OLD_PERSIAN,
4576
4577
/**
4578
* Unicode script "Kharoshthi".
4579
*/
4580
KHAROSHTHI,
4581
4582
/**
4583
* Unicode script "Balinese".
4584
*/
4585
BALINESE,
4586
4587
/**
4588
* Unicode script "Cuneiform".
4589
*/
4590
CUNEIFORM,
4591
4592
/**
4593
* Unicode script "Phoenician".
4594
*/
4595
PHOENICIAN,
4596
4597
/**
4598
* Unicode script "Phags_Pa".
4599
*/
4600
PHAGS_PA,
4601
4602
/**
4603
* Unicode script "Nko".
4604
*/
4605
NKO,
4606
4607
/**
4608
* Unicode script "Sundanese".
4609
*/
4610
SUNDANESE,
4611
4612
/**
4613
* Unicode script "Batak".
4614
*/
4615
BATAK,
4616
4617
/**
4618
* Unicode script "Lepcha".
4619
*/
4620
LEPCHA,
4621
4622
/**
4623
* Unicode script "Ol_Chiki".
4624
*/
4625
OL_CHIKI,
4626
4627
/**
4628
* Unicode script "Vai".
4629
*/
4630
VAI,
4631
4632
/**
4633
* Unicode script "Saurashtra".
4634
*/
4635
SAURASHTRA,
4636
4637
/**
4638
* Unicode script "Kayah_Li".
4639
*/
4640
KAYAH_LI,
4641
4642
/**
4643
* Unicode script "Rejang".
4644
*/
4645
REJANG,
4646
4647
/**
4648
* Unicode script "Lycian".
4649
*/
4650
LYCIAN,
4651
4652
/**
4653
* Unicode script "Carian".
4654
*/
4655
CARIAN,
4656
4657
/**
4658
* Unicode script "Lydian".
4659
*/
4660
LYDIAN,
4661
4662
/**
4663
* Unicode script "Cham".
4664
*/
4665
CHAM,
4666
4667
/**
4668
* Unicode script "Tai_Tham".
4669
*/
4670
TAI_THAM,
4671
4672
/**
4673
* Unicode script "Tai_Viet".
4674
*/
4675
TAI_VIET,
4676
4677
/**
4678
* Unicode script "Avestan".
4679
*/
4680
AVESTAN,
4681
4682
/**
4683
* Unicode script "Egyptian_Hieroglyphs".
4684
*/
4685
EGYPTIAN_HIEROGLYPHS,
4686
4687
/**
4688
* Unicode script "Samaritan".
4689
*/
4690
SAMARITAN,
4691
4692
/**
4693
* Unicode script "Mandaic".
4694
*/
4695
MANDAIC,
4696
4697
/**
4698
* Unicode script "Lisu".
4699
*/
4700
LISU,
4701
4702
/**
4703
* Unicode script "Bamum".
4704
*/
4705
BAMUM,
4706
4707
/**
4708
* Unicode script "Javanese".
4709
*/
4710
JAVANESE,
4711
4712
/**
4713
* Unicode script "Meetei_Mayek".
4714
*/
4715
MEETEI_MAYEK,
4716
4717
/**
4718
* Unicode script "Imperial_Aramaic".
4719
*/
4720
IMPERIAL_ARAMAIC,
4721
4722
/**
4723
* Unicode script "Old_South_Arabian".
4724
*/
4725
OLD_SOUTH_ARABIAN,
4726
4727
/**
4728
* Unicode script "Inscriptional_Parthian".
4729
*/
4730
INSCRIPTIONAL_PARTHIAN,
4731
4732
/**
4733
* Unicode script "Inscriptional_Pahlavi".
4734
*/
4735
INSCRIPTIONAL_PAHLAVI,
4736
4737
/**
4738
* Unicode script "Old_Turkic".
4739
*/
4740
OLD_TURKIC,
4741
4742
/**
4743
* Unicode script "Brahmi".
4744
*/
4745
BRAHMI,
4746
4747
/**
4748
* Unicode script "Kaithi".
4749
*/
4750
KAITHI,
4751
4752
/**
4753
* Unicode script "Meroitic Hieroglyphs".
4754
* @since 1.8
4755
*/
4756
MEROITIC_HIEROGLYPHS,
4757
4758
/**
4759
* Unicode script "Meroitic Cursive".
4760
* @since 1.8
4761
*/
4762
MEROITIC_CURSIVE,
4763
4764
/**
4765
* Unicode script "Sora Sompeng".
4766
* @since 1.8
4767
*/
4768
SORA_SOMPENG,
4769
4770
/**
4771
* Unicode script "Chakma".
4772
* @since 1.8
4773
*/
4774
CHAKMA,
4775
4776
/**
4777
* Unicode script "Sharada".
4778
* @since 1.8
4779
*/
4780
SHARADA,
4781
4782
/**
4783
* Unicode script "Takri".
4784
* @since 1.8
4785
*/
4786
TAKRI,
4787
4788
/**
4789
* Unicode script "Miao".
4790
* @since 1.8
4791
*/
4792
MIAO,
4793
4794
/**
4795
* Unicode script "Caucasian Albanian".
4796
* @since 9
4797
*/
4798
CAUCASIAN_ALBANIAN,
4799
4800
/**
4801
* Unicode script "Bassa Vah".
4802
* @since 9
4803
*/
4804
BASSA_VAH,
4805
4806
/**
4807
* Unicode script "Duployan".
4808
* @since 9
4809
*/
4810
DUPLOYAN,
4811
4812
/**
4813
* Unicode script "Elbasan".
4814
* @since 9
4815
*/
4816
ELBASAN,
4817
4818
/**
4819
* Unicode script "Grantha".
4820
* @since 9
4821
*/
4822
GRANTHA,
4823
4824
/**
4825
* Unicode script "Pahawh Hmong".
4826
* @since 9
4827
*/
4828
PAHAWH_HMONG,
4829
4830
/**
4831
* Unicode script "Khojki".
4832
* @since 9
4833
*/
4834
KHOJKI,
4835
4836
/**
4837
* Unicode script "Linear A".
4838
* @since 9
4839
*/
4840
LINEAR_A,
4841
4842
/**
4843
* Unicode script "Mahajani".
4844
* @since 9
4845
*/
4846
MAHAJANI,
4847
4848
/**
4849
* Unicode script "Manichaean".
4850
* @since 9
4851
*/
4852
MANICHAEAN,
4853
4854
/**
4855
* Unicode script "Mende Kikakui".
4856
* @since 9
4857
*/
4858
MENDE_KIKAKUI,
4859
4860
/**
4861
* Unicode script "Modi".
4862
* @since 9
4863
*/
4864
MODI,
4865
4866
/**
4867
* Unicode script "Mro".
4868
* @since 9
4869
*/
4870
MRO,
4871
4872
/**
4873
* Unicode script "Old North Arabian".
4874
* @since 9
4875
*/
4876
OLD_NORTH_ARABIAN,
4877
4878
/**
4879
* Unicode script "Nabataean".
4880
* @since 9
4881
*/
4882
NABATAEAN,
4883
4884
/**
4885
* Unicode script "Palmyrene".
4886
* @since 9
4887
*/
4888
PALMYRENE,
4889
4890
/**
4891
* Unicode script "Pau Cin Hau".
4892
* @since 9
4893
*/
4894
PAU_CIN_HAU,
4895
4896
/**
4897
* Unicode script "Old Permic".
4898
* @since 9
4899
*/
4900
OLD_PERMIC,
4901
4902
/**
4903
* Unicode script "Psalter Pahlavi".
4904
* @since 9
4905
*/
4906
PSALTER_PAHLAVI,
4907
4908
/**
4909
* Unicode script "Siddham".
4910
* @since 9
4911
*/
4912
SIDDHAM,
4913
4914
/**
4915
* Unicode script "Khudawadi".
4916
* @since 9
4917
*/
4918
KHUDAWADI,
4919
4920
/**
4921
* Unicode script "Tirhuta".
4922
* @since 9
4923
*/
4924
TIRHUTA,
4925
4926
/**
4927
* Unicode script "Warang Citi".
4928
* @since 9
4929
*/
4930
WARANG_CITI,
4931
4932
/**
4933
* Unicode script "Ahom".
4934
* @since 9
4935
*/
4936
AHOM,
4937
4938
/**
4939
* Unicode script "Anatolian Hieroglyphs".
4940
* @since 9
4941
*/
4942
ANATOLIAN_HIEROGLYPHS,
4943
4944
/**
4945
* Unicode script "Hatran".
4946
* @since 9
4947
*/
4948
HATRAN,
4949
4950
/**
4951
* Unicode script "Multani".
4952
* @since 9
4953
*/
4954
MULTANI,
4955
4956
/**
4957
* Unicode script "Old Hungarian".
4958
* @since 9
4959
*/
4960
OLD_HUNGARIAN,
4961
4962
/**
4963
* Unicode script "SignWriting".
4964
* @since 9
4965
*/
4966
SIGNWRITING,
4967
4968
/**
4969
* Unicode script "Adlam".
4970
* @since 11
4971
*/
4972
ADLAM,
4973
4974
/**
4975
* Unicode script "Bhaiksuki".
4976
* @since 11
4977
*/
4978
BHAIKSUKI,
4979
4980
/**
4981
* Unicode script "Marchen".
4982
* @since 11
4983
*/
4984
MARCHEN,
4985
4986
/**
4987
* Unicode script "Newa".
4988
* @since 11
4989
*/
4990
NEWA,
4991
4992
/**
4993
* Unicode script "Osage".
4994
* @since 11
4995
*/
4996
OSAGE,
4997
4998
/**
4999
* Unicode script "Tangut".
5000
* @since 11
5001
*/
5002
TANGUT,
5003
5004
/**
5005
* Unicode script "Masaram Gondi".
5006
* @since 11
5007
*/
5008
MASARAM_GONDI,
5009
5010
/**
5011
* Unicode script "Nushu".
5012
* @since 11
5013
*/
5014
NUSHU,
5015
5016
/**
5017
* Unicode script "Soyombo".
5018
* @since 11
5019
*/
5020
SOYOMBO,
5021
5022
/**
5023
* Unicode script "Zanabazar Square".
5024
* @since 11
5025
*/
5026
ZANABAZAR_SQUARE,
5027
5028
/**
5029
* Unicode script "Hanifi Rohingya".
5030
* @since 12
5031
*/
5032
HANIFI_ROHINGYA,
5033
5034
/**
5035
* Unicode script "Old Sogdian".
5036
* @since 12
5037
*/
5038
OLD_SOGDIAN,
5039
5040
/**
5041
* Unicode script "Sogdian".
5042
* @since 12
5043
*/
5044
SOGDIAN,
5045
5046
/**
5047
* Unicode script "Dogra".
5048
* @since 12
5049
*/
5050
DOGRA,
5051
5052
/**
5053
* Unicode script "Gunjala Gondi".
5054
* @since 12
5055
*/
5056
GUNJALA_GONDI,
5057
5058
/**
5059
* Unicode script "Makasar".
5060
* @since 12
5061
*/
5062
MAKASAR,
5063
5064
/**
5065
* Unicode script "Medefaidrin".
5066
* @since 12
5067
*/
5068
MEDEFAIDRIN,
5069
5070
/**
5071
* Unicode script "Elymaic".
5072
* @since 13
5073
*/
5074
ELYMAIC,
5075
5076
/**
5077
* Unicode script "Nandinagari".
5078
* @since 13
5079
*/
5080
NANDINAGARI,
5081
5082
/**
5083
* Unicode script "Nyiakeng Puachue Hmong".
5084
* @since 13
5085
*/
5086
NYIAKENG_PUACHUE_HMONG,
5087
5088
/**
5089
* Unicode script "Wancho".
5090
* @since 13
5091
*/
5092
WANCHO,
5093
5094
/**
5095
* Unicode script "Yezidi".
5096
* @since 15
5097
*/
5098
YEZIDI,
5099
5100
/**
5101
* Unicode script "Chorasmian".
5102
* @since 15
5103
*/
5104
CHORASMIAN,
5105
5106
/**
5107
* Unicode script "Dives Akuru".
5108
* @since 15
5109
*/
5110
DIVES_AKURU,
5111
5112
/**
5113
* Unicode script "Khitan Small Script".
5114
* @since 15
5115
*/
5116
KHITAN_SMALL_SCRIPT,
5117
5118
/**
5119
* Unicode script "Unknown".
5120
*/
5121
UNKNOWN;
5122
5123
private static final int[] scriptStarts = {
5124
0x0000, // 0000..0040; COMMON
5125
0x0041, // 0041..005A; LATIN
5126
0x005B, // 005B..0060; COMMON
5127
0x0061, // 0061..007A; LATIN
5128
0x007B, // 007B..00A9; COMMON
5129
0x00AA, // 00AA ; LATIN
5130
0x00AB, // 00AB..00B9; COMMON
5131
0x00BA, // 00BA ; LATIN
5132
0x00BB, // 00BB..00BF; COMMON
5133
0x00C0, // 00C0..00D6; LATIN
5134
0x00D7, // 00D7 ; COMMON
5135
0x00D8, // 00D8..00F6; LATIN
5136
0x00F7, // 00F7 ; COMMON
5137
0x00F8, // 00F8..02B8; LATIN
5138
0x02B9, // 02B9..02DF; COMMON
5139
0x02E0, // 02E0..02E4; LATIN
5140
0x02E5, // 02E5..02E9; COMMON
5141
0x02EA, // 02EA..02EB; BOPOMOFO
5142
0x02EC, // 02EC..02FF; COMMON
5143
0x0300, // 0300..036F; INHERITED
5144
0x0370, // 0370..0373; GREEK
5145
0x0374, // 0374 ; COMMON
5146
0x0375, // 0375..0377; GREEK
5147
0x0378, // 0378..0379; UNKNOWN
5148
0x037A, // 037A..037D; GREEK
5149
0x037E, // 037E ; COMMON
5150
0x037F, // 037F ; GREEK
5151
0x0380, // 0380..0383; UNKNOWN
5152
0x0384, // 0384 ; GREEK
5153
0x0385, // 0385 ; COMMON
5154
0x0386, // 0386 ; GREEK
5155
0x0387, // 0387 ; COMMON
5156
0x0388, // 0388..038A; GREEK
5157
0x038B, // 038B ; UNKNOWN
5158
0x038C, // 038C ; GREEK
5159
0x038D, // 038D ; UNKNOWN
5160
0x038E, // 038E..03A1; GREEK
5161
0x03A2, // 03A2 ; UNKNOWN
5162
0x03A3, // 03A3..03E1; GREEK
5163
0x03E2, // 03E2..03EF; COPTIC
5164
0x03F0, // 03F0..03FF; GREEK
5165
0x0400, // 0400..0484; CYRILLIC
5166
0x0485, // 0485..0486; INHERITED
5167
0x0487, // 0487..052F; CYRILLIC
5168
0x0530, // 0530 ; UNKNOWN
5169
0x0531, // 0531..0556; ARMENIAN
5170
0x0557, // 0557..0558; UNKNOWN
5171
0x0559, // 0559..058A; ARMENIAN
5172
0x058B, // 058B..058C; UNKNOWN
5173
0x058D, // 058D..058F; ARMENIAN
5174
0x0590, // 0590 ; UNKNOWN
5175
0x0591, // 0591..05C7; HEBREW
5176
0x05C8, // 05C8..05CF; UNKNOWN
5177
0x05D0, // 05D0..05EA; HEBREW
5178
0x05EB, // 05EB..05EE; UNKNOWN
5179
0x05EF, // 05EF..05F4; HEBREW
5180
0x05F5, // 05F5..05FF; UNKNOWN
5181
0x0600, // 0600..0604; ARABIC
5182
0x0605, // 0605 ; COMMON
5183
0x0606, // 0606..060B; ARABIC
5184
0x060C, // 060C ; COMMON
5185
0x060D, // 060D..061A; ARABIC
5186
0x061B, // 061B ; COMMON
5187
0x061C, // 061C ; ARABIC
5188
0x061D, // 061D ; UNKNOWN
5189
0x061E, // 061E ; ARABIC
5190
0x061F, // 061F ; COMMON
5191
0x0620, // 0620..063F; ARABIC
5192
0x0640, // 0640 ; COMMON
5193
0x0641, // 0641..064A; ARABIC
5194
0x064B, // 064B..0655; INHERITED
5195
0x0656, // 0656..066F; ARABIC
5196
0x0670, // 0670 ; INHERITED
5197
0x0671, // 0671..06DC; ARABIC
5198
0x06DD, // 06DD ; COMMON
5199
0x06DE, // 06DE..06FF; ARABIC
5200
0x0700, // 0700..070D; SYRIAC
5201
0x070E, // 070E ; UNKNOWN
5202
0x070F, // 070F..074A; SYRIAC
5203
0x074B, // 074B..074C; UNKNOWN
5204
0x074D, // 074D..074F; SYRIAC
5205
0x0750, // 0750..077F; ARABIC
5206
0x0780, // 0780..07B1; THAANA
5207
0x07B2, // 07B2..07BF; UNKNOWN
5208
0x07C0, // 07C0..07FA; NKO
5209
0x07FB, // 07FB..07FC; UNKNOWN
5210
0x07FD, // 07FD..07FF; NKO
5211
0x0800, // 0800..082D; SAMARITAN
5212
0x082E, // 082E..082F; UNKNOWN
5213
0x0830, // 0830..083E; SAMARITAN
5214
0x083F, // 083F ; UNKNOWN
5215
0x0840, // 0840..085B; MANDAIC
5216
0x085C, // 085C..085D; UNKNOWN
5217
0x085E, // 085E ; MANDAIC
5218
0x085F, // 085F ; UNKNOWN
5219
0x0860, // 0860..086A; SYRIAC
5220
0x086B, // 086B..089F; UNKNOWN
5221
0x08A0, // 08A0..08B4; ARABIC
5222
0x08B5, // 08B5 ; UNKNOWN
5223
0x08B6, // 08B6..08C7; ARABIC
5224
0x08C8, // 08C8..08D2; UNKNOWN
5225
0x08D3, // 08D3..08E1; ARABIC
5226
0x08E2, // 08E2 ; COMMON
5227
0x08E3, // 08E3..08FF; ARABIC
5228
0x0900, // 0900..0950; DEVANAGARI
5229
0x0951, // 0951..0954; INHERITED
5230
0x0955, // 0955..0963; DEVANAGARI
5231
0x0964, // 0964..0965; COMMON
5232
0x0966, // 0966..097F; DEVANAGARI
5233
0x0980, // 0980..0983; BENGALI
5234
0x0984, // 0984 ; UNKNOWN
5235
0x0985, // 0985..098C; BENGALI
5236
0x098D, // 098D..098E; UNKNOWN
5237
0x098F, // 098F..0990; BENGALI
5238
0x0991, // 0991..0992; UNKNOWN
5239
0x0993, // 0993..09A8; BENGALI
5240
0x09A9, // 09A9 ; UNKNOWN
5241
0x09AA, // 09AA..09B0; BENGALI
5242
0x09B1, // 09B1 ; UNKNOWN
5243
0x09B2, // 09B2 ; BENGALI
5244
0x09B3, // 09B3..09B5; UNKNOWN
5245
0x09B6, // 09B6..09B9; BENGALI
5246
0x09BA, // 09BA..09BB; UNKNOWN
5247
0x09BC, // 09BC..09C4; BENGALI
5248
0x09C5, // 09C5..09C6; UNKNOWN
5249
0x09C7, // 09C7..09C8; BENGALI
5250
0x09C9, // 09C9..09CA; UNKNOWN
5251
0x09CB, // 09CB..09CE; BENGALI
5252
0x09CF, // 09CF..09D6; UNKNOWN
5253
0x09D7, // 09D7 ; BENGALI
5254
0x09D8, // 09D8..09DB; UNKNOWN
5255
0x09DC, // 09DC..09DD; BENGALI
5256
0x09DE, // 09DE ; UNKNOWN
5257
0x09DF, // 09DF..09E3; BENGALI
5258
0x09E4, // 09E4..09E5; UNKNOWN
5259
0x09E6, // 09E6..09FE; BENGALI
5260
0x09FF, // 09FF..0A00; UNKNOWN
5261
0x0A01, // 0A01..0A03; GURMUKHI
5262
0x0A04, // 0A04 ; UNKNOWN
5263
0x0A05, // 0A05..0A0A; GURMUKHI
5264
0x0A0B, // 0A0B..0A0E; UNKNOWN
5265
0x0A0F, // 0A0F..0A10; GURMUKHI
5266
0x0A11, // 0A11..0A12; UNKNOWN
5267
0x0A13, // 0A13..0A28; GURMUKHI
5268
0x0A29, // 0A29 ; UNKNOWN
5269
0x0A2A, // 0A2A..0A30; GURMUKHI
5270
0x0A31, // 0A31 ; UNKNOWN
5271
0x0A32, // 0A32..0A33; GURMUKHI
5272
0x0A34, // 0A34 ; UNKNOWN
5273
0x0A35, // 0A35..0A36; GURMUKHI
5274
0x0A37, // 0A37 ; UNKNOWN
5275
0x0A38, // 0A38..0A39; GURMUKHI
5276
0x0A3A, // 0A3A..0A3B; UNKNOWN
5277
0x0A3C, // 0A3C ; GURMUKHI
5278
0x0A3D, // 0A3D ; UNKNOWN
5279
0x0A3E, // 0A3E..0A42; GURMUKHI
5280
0x0A43, // 0A43..0A46; UNKNOWN
5281
0x0A47, // 0A47..0A48; GURMUKHI
5282
0x0A49, // 0A49..0A4A; UNKNOWN
5283
0x0A4B, // 0A4B..0A4D; GURMUKHI
5284
0x0A4E, // 0A4E..0A50; UNKNOWN
5285
0x0A51, // 0A51 ; GURMUKHI
5286
0x0A52, // 0A52..0A58; UNKNOWN
5287
0x0A59, // 0A59..0A5C; GURMUKHI
5288
0x0A5D, // 0A5D ; UNKNOWN
5289
0x0A5E, // 0A5E ; GURMUKHI
5290
0x0A5F, // 0A5F..0A65; UNKNOWN
5291
0x0A66, // 0A66..0A76; GURMUKHI
5292
0x0A77, // 0A77..0A80; UNKNOWN
5293
0x0A81, // 0A81..0A83; GUJARATI
5294
0x0A84, // 0A84 ; UNKNOWN
5295
0x0A85, // 0A85..0A8D; GUJARATI
5296
0x0A8E, // 0A8E ; UNKNOWN
5297
0x0A8F, // 0A8F..0A91; GUJARATI
5298
0x0A92, // 0A92 ; UNKNOWN
5299
0x0A93, // 0A93..0AA8; GUJARATI
5300
0x0AA9, // 0AA9 ; UNKNOWN
5301
0x0AAA, // 0AAA..0AB0; GUJARATI
5302
0x0AB1, // 0AB1 ; UNKNOWN
5303
0x0AB2, // 0AB2..0AB3; GUJARATI
5304
0x0AB4, // 0AB4 ; UNKNOWN
5305
0x0AB5, // 0AB5..0AB9; GUJARATI
5306
0x0ABA, // 0ABA..0ABB; UNKNOWN
5307
0x0ABC, // 0ABC..0AC5; GUJARATI
5308
0x0AC6, // 0AC6 ; UNKNOWN
5309
0x0AC7, // 0AC7..0AC9; GUJARATI
5310
0x0ACA, // 0ACA ; UNKNOWN
5311
0x0ACB, // 0ACB..0ACD; GUJARATI
5312
0x0ACE, // 0ACE..0ACF; UNKNOWN
5313
0x0AD0, // 0AD0 ; GUJARATI
5314
0x0AD1, // 0AD1..0ADF; UNKNOWN
5315
0x0AE0, // 0AE0..0AE3; GUJARATI
5316
0x0AE4, // 0AE4..0AE5; UNKNOWN
5317
0x0AE6, // 0AE6..0AF1; GUJARATI
5318
0x0AF2, // 0AF2..0AF8; UNKNOWN
5319
0x0AF9, // 0AF9..0AFF; GUJARATI
5320
0x0B00, // 0B00 ; UNKNOWN
5321
0x0B01, // 0B01..0B03; ORIYA
5322
0x0B04, // 0B04 ; UNKNOWN
5323
0x0B05, // 0B05..0B0C; ORIYA
5324
0x0B0D, // 0B0D..0B0E; UNKNOWN
5325
0x0B0F, // 0B0F..0B10; ORIYA
5326
0x0B11, // 0B11..0B12; UNKNOWN
5327
0x0B13, // 0B13..0B28; ORIYA
5328
0x0B29, // 0B29 ; UNKNOWN
5329
0x0B2A, // 0B2A..0B30; ORIYA
5330
0x0B31, // 0B31 ; UNKNOWN
5331
0x0B32, // 0B32..0B33; ORIYA
5332
0x0B34, // 0B34 ; UNKNOWN
5333
0x0B35, // 0B35..0B39; ORIYA
5334
0x0B3A, // 0B3A..0B3B; UNKNOWN
5335
0x0B3C, // 0B3C..0B44; ORIYA
5336
0x0B45, // 0B45..0B46; UNKNOWN
5337
0x0B47, // 0B47..0B48; ORIYA
5338
0x0B49, // 0B49..0B4A; UNKNOWN
5339
0x0B4B, // 0B4B..0B4D; ORIYA
5340
0x0B4E, // 0B4E..0B54; UNKNOWN
5341
0x0B55, // 0B55..0B57; ORIYA
5342
0x0B58, // 0B58..0B5B; UNKNOWN
5343
0x0B5C, // 0B5C..0B5D; ORIYA
5344
0x0B5E, // 0B5E ; UNKNOWN
5345
0x0B5F, // 0B5F..0B63; ORIYA
5346
0x0B64, // 0B64..0B65; UNKNOWN
5347
0x0B66, // 0B66..0B77; ORIYA
5348
0x0B78, // 0B78..0B81; UNKNOWN
5349
0x0B82, // 0B82..0B83; TAMIL
5350
0x0B84, // 0B84 ; UNKNOWN
5351
0x0B85, // 0B85..0B8A; TAMIL
5352
0x0B8B, // 0B8B..0B8D; UNKNOWN
5353
0x0B8E, // 0B8E..0B90; TAMIL
5354
0x0B91, // 0B91 ; UNKNOWN
5355
0x0B92, // 0B92..0B95; TAMIL
5356
0x0B96, // 0B96..0B98; UNKNOWN
5357
0x0B99, // 0B99..0B9A; TAMIL
5358
0x0B9B, // 0B9B ; UNKNOWN
5359
0x0B9C, // 0B9C ; TAMIL
5360
0x0B9D, // 0B9D ; UNKNOWN
5361
0x0B9E, // 0B9E..0B9F; TAMIL
5362
0x0BA0, // 0BA0..0BA2; UNKNOWN
5363
0x0BA3, // 0BA3..0BA4; TAMIL
5364
0x0BA5, // 0BA5..0BA7; UNKNOWN
5365
0x0BA8, // 0BA8..0BAA; TAMIL
5366
0x0BAB, // 0BAB..0BAD; UNKNOWN
5367
0x0BAE, // 0BAE..0BB9; TAMIL
5368
0x0BBA, // 0BBA..0BBD; UNKNOWN
5369
0x0BBE, // 0BBE..0BC2; TAMIL
5370
0x0BC3, // 0BC3..0BC5; UNKNOWN
5371
0x0BC6, // 0BC6..0BC8; TAMIL
5372
0x0BC9, // 0BC9 ; UNKNOWN
5373
0x0BCA, // 0BCA..0BCD; TAMIL
5374
0x0BCE, // 0BCE..0BCF; UNKNOWN
5375
0x0BD0, // 0BD0 ; TAMIL
5376
0x0BD1, // 0BD1..0BD6; UNKNOWN
5377
0x0BD7, // 0BD7 ; TAMIL
5378
0x0BD8, // 0BD8..0BE5; UNKNOWN
5379
0x0BE6, // 0BE6..0BFA; TAMIL
5380
0x0BFB, // 0BFB..0BFF; UNKNOWN
5381
0x0C00, // 0C00..0C0C; TELUGU
5382
0x0C0D, // 0C0D ; UNKNOWN
5383
0x0C0E, // 0C0E..0C10; TELUGU
5384
0x0C11, // 0C11 ; UNKNOWN
5385
0x0C12, // 0C12..0C28; TELUGU
5386
0x0C29, // 0C29 ; UNKNOWN
5387
0x0C2A, // 0C2A..0C39; TELUGU
5388
0x0C3A, // 0C3A..0C3C; UNKNOWN
5389
0x0C3D, // 0C3D..0C44; TELUGU
5390
0x0C45, // 0C45 ; UNKNOWN
5391
0x0C46, // 0C46..0C48; TELUGU
5392
0x0C49, // 0C49 ; UNKNOWN
5393
0x0C4A, // 0C4A..0C4D; TELUGU
5394
0x0C4E, // 0C4E..0C54; UNKNOWN
5395
0x0C55, // 0C55..0C56; TELUGU
5396
0x0C57, // 0C57 ; UNKNOWN
5397
0x0C58, // 0C58..0C5A; TELUGU
5398
0x0C5B, // 0C5B..0C5F; UNKNOWN
5399
0x0C60, // 0C60..0C63; TELUGU
5400
0x0C64, // 0C64..0C65; UNKNOWN
5401
0x0C66, // 0C66..0C6F; TELUGU
5402
0x0C70, // 0C70..0C76; UNKNOWN
5403
0x0C77, // 0C77..0C7F; TELUGU
5404
0x0C80, // 0C80..0C8C; KANNADA
5405
0x0C8D, // 0C8D ; UNKNOWN
5406
0x0C8E, // 0C8E..0C90; KANNADA
5407
0x0C91, // 0C91 ; UNKNOWN
5408
0x0C92, // 0C92..0CA8; KANNADA
5409
0x0CA9, // 0CA9 ; UNKNOWN
5410
0x0CAA, // 0CAA..0CB3; KANNADA
5411
0x0CB4, // 0CB4 ; UNKNOWN
5412
0x0CB5, // 0CB5..0CB9; KANNADA
5413
0x0CBA, // 0CBA..0CBB; UNKNOWN
5414
0x0CBC, // 0CBC..0CC4; KANNADA
5415
0x0CC5, // 0CC5 ; UNKNOWN
5416
0x0CC6, // 0CC6..0CC8; KANNADA
5417
0x0CC9, // 0CC9 ; UNKNOWN
5418
0x0CCA, // 0CCA..0CCD; KANNADA
5419
0x0CCE, // 0CCE..0CD4; UNKNOWN
5420
0x0CD5, // 0CD5..0CD6; KANNADA
5421
0x0CD7, // 0CD7..0CDD; UNKNOWN
5422
0x0CDE, // 0CDE ; KANNADA
5423
0x0CDF, // 0CDF ; UNKNOWN
5424
0x0CE0, // 0CE0..0CE3; KANNADA
5425
0x0CE4, // 0CE4..0CE5; UNKNOWN
5426
0x0CE6, // 0CE6..0CEF; KANNADA
5427
0x0CF0, // 0CF0 ; UNKNOWN
5428
0x0CF1, // 0CF1..0CF2; KANNADA
5429
0x0CF3, // 0CF3..0CFF; UNKNOWN
5430
0x0D00, // 0D00..0D0C; MALAYALAM
5431
0x0D0D, // 0D0D ; UNKNOWN
5432
0x0D0E, // 0D0E..0D10; MALAYALAM
5433
0x0D11, // 0D11 ; UNKNOWN
5434
0x0D12, // 0D12..0D44; MALAYALAM
5435
0x0D45, // 0D45 ; UNKNOWN
5436
0x0D46, // 0D46..0D48; MALAYALAM
5437
0x0D49, // 0D49 ; UNKNOWN
5438
0x0D4A, // 0D4A..0D4F; MALAYALAM
5439
0x0D50, // 0D50..0D53; UNKNOWN
5440
0x0D54, // 0D54..0D63; MALAYALAM
5441
0x0D64, // 0D64..0D65; UNKNOWN
5442
0x0D66, // 0D66..0D7F; MALAYALAM
5443
0x0D80, // 0D80 ; UNKNOWN
5444
0x0D81, // 0D81..0D83; SINHALA
5445
0x0D84, // 0D84 ; UNKNOWN
5446
0x0D85, // 0D85..0D96; SINHALA
5447
0x0D97, // 0D97..0D99; UNKNOWN
5448
0x0D9A, // 0D9A..0DB1; SINHALA
5449
0x0DB2, // 0DB2 ; UNKNOWN
5450
0x0DB3, // 0DB3..0DBB; SINHALA
5451
0x0DBC, // 0DBC ; UNKNOWN
5452
0x0DBD, // 0DBD ; SINHALA
5453
0x0DBE, // 0DBE..0DBF; UNKNOWN
5454
0x0DC0, // 0DC0..0DC6; SINHALA
5455
0x0DC7, // 0DC7..0DC9; UNKNOWN
5456
0x0DCA, // 0DCA ; SINHALA
5457
0x0DCB, // 0DCB..0DCE; UNKNOWN
5458
0x0DCF, // 0DCF..0DD4; SINHALA
5459
0x0DD5, // 0DD5 ; UNKNOWN
5460
0x0DD6, // 0DD6 ; SINHALA
5461
0x0DD7, // 0DD7 ; UNKNOWN
5462
0x0DD8, // 0DD8..0DDF; SINHALA
5463
0x0DE0, // 0DE0..0DE5; UNKNOWN
5464
0x0DE6, // 0DE6..0DEF; SINHALA
5465
0x0DF0, // 0DF0..0DF1; UNKNOWN
5466
0x0DF2, // 0DF2..0DF4; SINHALA
5467
0x0DF5, // 0DF5..0E00; UNKNOWN
5468
0x0E01, // 0E01..0E3A; THAI
5469
0x0E3B, // 0E3B..0E3E; UNKNOWN
5470
0x0E3F, // 0E3F ; COMMON
5471
0x0E40, // 0E40..0E5B; THAI
5472
0x0E5C, // 0E5C..0E80; UNKNOWN
5473
0x0E81, // 0E81..0E82; LAO
5474
0x0E83, // 0E83 ; UNKNOWN
5475
0x0E84, // 0E84 ; LAO
5476
0x0E85, // 0E85 ; UNKNOWN
5477
0x0E86, // 0E86..0E8A; LAO
5478
0x0E8B, // 0E8B ; UNKNOWN
5479
0x0E8C, // 0E8C..0EA3; LAO
5480
0x0EA4, // 0EA4 ; UNKNOWN
5481
0x0EA5, // 0EA5 ; LAO
5482
0x0EA6, // 0EA6 ; UNKNOWN
5483
0x0EA7, // 0EA7..0EBD; LAO
5484
0x0EBE, // 0EBE..0EBF; UNKNOWN
5485
0x0EC0, // 0EC0..0EC4; LAO
5486
0x0EC5, // 0EC5 ; UNKNOWN
5487
0x0EC6, // 0EC6 ; LAO
5488
0x0EC7, // 0EC7 ; UNKNOWN
5489
0x0EC8, // 0EC8..0ECD; LAO
5490
0x0ECE, // 0ECE..0ECF; UNKNOWN
5491
0x0ED0, // 0ED0..0ED9; LAO
5492
0x0EDA, // 0EDA..0EDB; UNKNOWN
5493
0x0EDC, // 0EDC..0EDF; LAO
5494
0x0EE0, // 0EE0..0EFF; UNKNOWN
5495
0x0F00, // 0F00..0F47; TIBETAN
5496
0x0F48, // 0F48 ; UNKNOWN
5497
0x0F49, // 0F49..0F6C; TIBETAN
5498
0x0F6D, // 0F6D..0F70; UNKNOWN
5499
0x0F71, // 0F71..0F97; TIBETAN
5500
0x0F98, // 0F98 ; UNKNOWN
5501
0x0F99, // 0F99..0FBC; TIBETAN
5502
0x0FBD, // 0FBD ; UNKNOWN
5503
0x0FBE, // 0FBE..0FCC; TIBETAN
5504
0x0FCD, // 0FCD ; UNKNOWN
5505
0x0FCE, // 0FCE..0FD4; TIBETAN
5506
0x0FD5, // 0FD5..0FD8; COMMON
5507
0x0FD9, // 0FD9..0FDA; TIBETAN
5508
0x0FDB, // 0FDB..0FFF; UNKNOWN
5509
0x1000, // 1000..109F; MYANMAR
5510
0x10A0, // 10A0..10C5; GEORGIAN
5511
0x10C6, // 10C6 ; UNKNOWN
5512
0x10C7, // 10C7 ; GEORGIAN
5513
0x10C8, // 10C8..10CC; UNKNOWN
5514
0x10CD, // 10CD ; GEORGIAN
5515
0x10CE, // 10CE..10CF; UNKNOWN
5516
0x10D0, // 10D0..10FA; GEORGIAN
5517
0x10FB, // 10FB ; COMMON
5518
0x10FC, // 10FC..10FF; GEORGIAN
5519
0x1100, // 1100..11FF; HANGUL
5520
0x1200, // 1200..1248; ETHIOPIC
5521
0x1249, // 1249 ; UNKNOWN
5522
0x124A, // 124A..124D; ETHIOPIC
5523
0x124E, // 124E..124F; UNKNOWN
5524
0x1250, // 1250..1256; ETHIOPIC
5525
0x1257, // 1257 ; UNKNOWN
5526
0x1258, // 1258 ; ETHIOPIC
5527
0x1259, // 1259 ; UNKNOWN
5528
0x125A, // 125A..125D; ETHIOPIC
5529
0x125E, // 125E..125F; UNKNOWN
5530
0x1260, // 1260..1288; ETHIOPIC
5531
0x1289, // 1289 ; UNKNOWN
5532
0x128A, // 128A..128D; ETHIOPIC
5533
0x128E, // 128E..128F; UNKNOWN
5534
0x1290, // 1290..12B0; ETHIOPIC
5535
0x12B1, // 12B1 ; UNKNOWN
5536
0x12B2, // 12B2..12B5; ETHIOPIC
5537
0x12B6, // 12B6..12B7; UNKNOWN
5538
0x12B8, // 12B8..12BE; ETHIOPIC
5539
0x12BF, // 12BF ; UNKNOWN
5540
0x12C0, // 12C0 ; ETHIOPIC
5541
0x12C1, // 12C1 ; UNKNOWN
5542
0x12C2, // 12C2..12C5; ETHIOPIC
5543
0x12C6, // 12C6..12C7; UNKNOWN
5544
0x12C8, // 12C8..12D6; ETHIOPIC
5545
0x12D7, // 12D7 ; UNKNOWN
5546
0x12D8, // 12D8..1310; ETHIOPIC
5547
0x1311, // 1311 ; UNKNOWN
5548
0x1312, // 1312..1315; ETHIOPIC
5549
0x1316, // 1316..1317; UNKNOWN
5550
0x1318, // 1318..135A; ETHIOPIC
5551
0x135B, // 135B..135C; UNKNOWN
5552
0x135D, // 135D..137C; ETHIOPIC
5553
0x137D, // 137D..137F; UNKNOWN
5554
0x1380, // 1380..1399; ETHIOPIC
5555
0x139A, // 139A..139F; UNKNOWN
5556
0x13A0, // 13A0..13F5; CHEROKEE
5557
0x13F6, // 13F6..13F7; UNKNOWN
5558
0x13F8, // 13F8..13FD; CHEROKEE
5559
0x13FE, // 13FE..13FF; UNKNOWN
5560
0x1400, // 1400..167F; CANADIAN_ABORIGINAL
5561
0x1680, // 1680..169C; OGHAM
5562
0x169D, // 169D..169F; UNKNOWN
5563
0x16A0, // 16A0..16EA; RUNIC
5564
0x16EB, // 16EB..16ED; COMMON
5565
0x16EE, // 16EE..16F8; RUNIC
5566
0x16F9, // 16F9..16FF; UNKNOWN
5567
0x1700, // 1700..170C; TAGALOG
5568
0x170D, // 170D ; UNKNOWN
5569
0x170E, // 170E..1714; TAGALOG
5570
0x1715, // 1715..171F; UNKNOWN
5571
0x1720, // 1720..1734; HANUNOO
5572
0x1735, // 1735..1736; COMMON
5573
0x1737, // 1737..173F; UNKNOWN
5574
0x1740, // 1740..1753; BUHID
5575
0x1754, // 1754..175F; UNKNOWN
5576
0x1760, // 1760..176C; TAGBANWA
5577
0x176D, // 176D ; UNKNOWN
5578
0x176E, // 176E..1770; TAGBANWA
5579
0x1771, // 1771 ; UNKNOWN
5580
0x1772, // 1772..1773; TAGBANWA
5581
0x1774, // 1774..177F; UNKNOWN
5582
0x1780, // 1780..17DD; KHMER
5583
0x17DE, // 17DE..17DF; UNKNOWN
5584
0x17E0, // 17E0..17E9; KHMER
5585
0x17EA, // 17EA..17EF; UNKNOWN
5586
0x17F0, // 17F0..17F9; KHMER
5587
0x17FA, // 17FA..17FF; UNKNOWN
5588
0x1800, // 1800..1801; MONGOLIAN
5589
0x1802, // 1802..1803; COMMON
5590
0x1804, // 1804 ; MONGOLIAN
5591
0x1805, // 1805 ; COMMON
5592
0x1806, // 1806..180E; MONGOLIAN
5593
0x180F, // 180F ; UNKNOWN
5594
0x1810, // 1810..1819; MONGOLIAN
5595
0x181A, // 181A..181F; UNKNOWN
5596
0x1820, // 1820..1878; MONGOLIAN
5597
0x1879, // 1879..187F; UNKNOWN
5598
0x1880, // 1880..18AA; MONGOLIAN
5599
0x18AB, // 18AB..18AF; UNKNOWN
5600
0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL
5601
0x18F6, // 18F6..18FF; UNKNOWN
5602
0x1900, // 1900..191E; LIMBU
5603
0x191F, // 191F ; UNKNOWN
5604
0x1920, // 1920..192B; LIMBU
5605
0x192C, // 192C..192F; UNKNOWN
5606
0x1930, // 1930..193B; LIMBU
5607
0x193C, // 193C..193F; UNKNOWN
5608
0x1940, // 1940 ; LIMBU
5609
0x1941, // 1941..1943; UNKNOWN
5610
0x1944, // 1944..194F; LIMBU
5611
0x1950, // 1950..196D; TAI_LE
5612
0x196E, // 196E..196F; UNKNOWN
5613
0x1970, // 1970..1974; TAI_LE
5614
0x1975, // 1975..197F; UNKNOWN
5615
0x1980, // 1980..19AB; NEW_TAI_LUE
5616
0x19AC, // 19AC..19AF; UNKNOWN
5617
0x19B0, // 19B0..19C9; NEW_TAI_LUE
5618
0x19CA, // 19CA..19CF; UNKNOWN
5619
0x19D0, // 19D0..19DA; NEW_TAI_LUE
5620
0x19DB, // 19DB..19DD; UNKNOWN
5621
0x19DE, // 19DE..19DF; NEW_TAI_LUE
5622
0x19E0, // 19E0..19FF; KHMER
5623
0x1A00, // 1A00..1A1B; BUGINESE
5624
0x1A1C, // 1A1C..1A1D; UNKNOWN
5625
0x1A1E, // 1A1E..1A1F; BUGINESE
5626
0x1A20, // 1A20..1A5E; TAI_THAM
5627
0x1A5F, // 1A5F ; UNKNOWN
5628
0x1A60, // 1A60..1A7C; TAI_THAM
5629
0x1A7D, // 1A7D..1A7E; UNKNOWN
5630
0x1A7F, // 1A7F..1A89; TAI_THAM
5631
0x1A8A, // 1A8A..1A8F; UNKNOWN
5632
0x1A90, // 1A90..1A99; TAI_THAM
5633
0x1A9A, // 1A9A..1A9F; UNKNOWN
5634
0x1AA0, // 1AA0..1AAD; TAI_THAM
5635
0x1AAE, // 1AAE..1AAF; UNKNOWN
5636
0x1AB0, // 1AB0..1AC0; INHERITED
5637
0x1AC1, // 1AC1..1AFF; UNKNOWN
5638
0x1B00, // 1B00..1B4B; BALINESE
5639
0x1B4C, // 1B4C..1B4F; UNKNOWN
5640
0x1B50, // 1B50..1B7C; BALINESE
5641
0x1B7D, // 1B7D..1B7F; UNKNOWN
5642
0x1B80, // 1B80..1BBF; SUNDANESE
5643
0x1BC0, // 1BC0..1BF3; BATAK
5644
0x1BF4, // 1BF4..1BFB; UNKNOWN
5645
0x1BFC, // 1BFC..1BFF; BATAK
5646
0x1C00, // 1C00..1C37; LEPCHA
5647
0x1C38, // 1C38..1C3A; UNKNOWN
5648
0x1C3B, // 1C3B..1C49; LEPCHA
5649
0x1C4A, // 1C4A..1C4C; UNKNOWN
5650
0x1C4D, // 1C4D..1C4F; LEPCHA
5651
0x1C50, // 1C50..1C7F; OL_CHIKI
5652
0x1C80, // 1C80..1C88; CYRILLIC
5653
0x1C89, // 1C89..1C8F; UNKNOWN
5654
0x1C90, // 1C90..1CBA; GEORGIAN
5655
0x1CBB, // 1CBB..1CBC; UNKNOWN
5656
0x1CBD, // 1CBD..1CBF; GEORGIAN
5657
0x1CC0, // 1CC0..1CC7; SUNDANESE
5658
0x1CC8, // 1CC8..1CCF; UNKNOWN
5659
0x1CD0, // 1CD0..1CD2; INHERITED
5660
0x1CD3, // 1CD3 ; COMMON
5661
0x1CD4, // 1CD4..1CE0; INHERITED
5662
0x1CE1, // 1CE1 ; COMMON
5663
0x1CE2, // 1CE2..1CE8; INHERITED
5664
0x1CE9, // 1CE9..1CEC; COMMON
5665
0x1CED, // 1CED ; INHERITED
5666
0x1CEE, // 1CEE..1CF3; COMMON
5667
0x1CF4, // 1CF4 ; INHERITED
5668
0x1CF5, // 1CF5..1CF7; COMMON
5669
0x1CF8, // 1CF8..1CF9; INHERITED
5670
0x1CFA, // 1CFA ; COMMON
5671
0x1CFB, // 1CFB..1CFF; UNKNOWN
5672
0x1D00, // 1D00..1D25; LATIN
5673
0x1D26, // 1D26..1D2A; GREEK
5674
0x1D2B, // 1D2B ; CYRILLIC
5675
0x1D2C, // 1D2C..1D5C; LATIN
5676
0x1D5D, // 1D5D..1D61; GREEK
5677
0x1D62, // 1D62..1D65; LATIN
5678
0x1D66, // 1D66..1D6A; GREEK
5679
0x1D6B, // 1D6B..1D77; LATIN
5680
0x1D78, // 1D78 ; CYRILLIC
5681
0x1D79, // 1D79..1DBE; LATIN
5682
0x1DBF, // 1DBF ; GREEK
5683
0x1DC0, // 1DC0..1DF9; INHERITED
5684
0x1DFA, // 1DFA ; UNKNOWN
5685
0x1DFB, // 1DFB..1DFF; INHERITED
5686
0x1E00, // 1E00..1EFF; LATIN
5687
0x1F00, // 1F00..1F15; GREEK
5688
0x1F16, // 1F16..1F17; UNKNOWN
5689
0x1F18, // 1F18..1F1D; GREEK
5690
0x1F1E, // 1F1E..1F1F; UNKNOWN
5691
0x1F20, // 1F20..1F45; GREEK
5692
0x1F46, // 1F46..1F47; UNKNOWN
5693
0x1F48, // 1F48..1F4D; GREEK
5694
0x1F4E, // 1F4E..1F4F; UNKNOWN
5695
0x1F50, // 1F50..1F57; GREEK
5696
0x1F58, // 1F58 ; UNKNOWN
5697
0x1F59, // 1F59 ; GREEK
5698
0x1F5A, // 1F5A ; UNKNOWN
5699
0x1F5B, // 1F5B ; GREEK
5700
0x1F5C, // 1F5C ; UNKNOWN
5701
0x1F5D, // 1F5D ; GREEK
5702
0x1F5E, // 1F5E ; UNKNOWN
5703
0x1F5F, // 1F5F..1F7D; GREEK
5704
0x1F7E, // 1F7E..1F7F; UNKNOWN
5705
0x1F80, // 1F80..1FB4; GREEK
5706
0x1FB5, // 1FB5 ; UNKNOWN
5707
0x1FB6, // 1FB6..1FC4; GREEK
5708
0x1FC5, // 1FC5 ; UNKNOWN
5709
0x1FC6, // 1FC6..1FD3; GREEK
5710
0x1FD4, // 1FD4..1FD5; UNKNOWN
5711
0x1FD6, // 1FD6..1FDB; GREEK
5712
0x1FDC, // 1FDC ; UNKNOWN
5713
0x1FDD, // 1FDD..1FEF; GREEK
5714
0x1FF0, // 1FF0..1FF1; UNKNOWN
5715
0x1FF2, // 1FF2..1FF4; GREEK
5716
0x1FF5, // 1FF5 ; UNKNOWN
5717
0x1FF6, // 1FF6..1FFE; GREEK
5718
0x1FFF, // 1FFF ; UNKNOWN
5719
0x2000, // 2000..200B; COMMON
5720
0x200C, // 200C..200D; INHERITED
5721
0x200E, // 200E..2064; COMMON
5722
0x2065, // 2065 ; UNKNOWN
5723
0x2066, // 2066..2070; COMMON
5724
0x2071, // 2071 ; LATIN
5725
0x2072, // 2072..2073; UNKNOWN
5726
0x2074, // 2074..207E; COMMON
5727
0x207F, // 207F ; LATIN
5728
0x2080, // 2080..208E; COMMON
5729
0x208F, // 208F ; UNKNOWN
5730
0x2090, // 2090..209C; LATIN
5731
0x209D, // 209D..209F; UNKNOWN
5732
0x20A0, // 20A0..20BF; COMMON
5733
0x20C0, // 20C0..20CF; UNKNOWN
5734
0x20D0, // 20D0..20F0; INHERITED
5735
0x20F1, // 20F1..20FF; UNKNOWN
5736
0x2100, // 2100..2125; COMMON
5737
0x2126, // 2126 ; GREEK
5738
0x2127, // 2127..2129; COMMON
5739
0x212A, // 212A..212B; LATIN
5740
0x212C, // 212C..2131; COMMON
5741
0x2132, // 2132 ; LATIN
5742
0x2133, // 2133..214D; COMMON
5743
0x214E, // 214E ; LATIN
5744
0x214F, // 214F..215F; COMMON
5745
0x2160, // 2160..2188; LATIN
5746
0x2189, // 2189..218B; COMMON
5747
0x218C, // 218C..218F; UNKNOWN
5748
0x2190, // 2190..2426; COMMON
5749
0x2427, // 2427..243F; UNKNOWN
5750
0x2440, // 2440..244A; COMMON
5751
0x244B, // 244B..245F; UNKNOWN
5752
0x2460, // 2460..27FF; COMMON
5753
0x2800, // 2800..28FF; BRAILLE
5754
0x2900, // 2900..2B73; COMMON
5755
0x2B74, // 2B74..2B75; UNKNOWN
5756
0x2B76, // 2B76..2B95; COMMON
5757
0x2B96, // 2B96 ; UNKNOWN
5758
0x2B97, // 2B97..2BFF; COMMON
5759
0x2C00, // 2C00..2C2E; GLAGOLITIC
5760
0x2C2F, // 2C2F ; UNKNOWN
5761
0x2C30, // 2C30..2C5E; GLAGOLITIC
5762
0x2C5F, // 2C5F ; UNKNOWN
5763
0x2C60, // 2C60..2C7F; LATIN
5764
0x2C80, // 2C80..2CF3; COPTIC
5765
0x2CF4, // 2CF4..2CF8; UNKNOWN
5766
0x2CF9, // 2CF9..2CFF; COPTIC
5767
0x2D00, // 2D00..2D25; GEORGIAN
5768
0x2D26, // 2D26 ; UNKNOWN
5769
0x2D27, // 2D27 ; GEORGIAN
5770
0x2D28, // 2D28..2D2C; UNKNOWN
5771
0x2D2D, // 2D2D ; GEORGIAN
5772
0x2D2E, // 2D2E..2D2F; UNKNOWN
5773
0x2D30, // 2D30..2D67; TIFINAGH
5774
0x2D68, // 2D68..2D6E; UNKNOWN
5775
0x2D6F, // 2D6F..2D70; TIFINAGH
5776
0x2D71, // 2D71..2D7E; UNKNOWN
5777
0x2D7F, // 2D7F ; TIFINAGH
5778
0x2D80, // 2D80..2D96; ETHIOPIC
5779
0x2D97, // 2D97..2D9F; UNKNOWN
5780
0x2DA0, // 2DA0..2DA6; ETHIOPIC
5781
0x2DA7, // 2DA7 ; UNKNOWN
5782
0x2DA8, // 2DA8..2DAE; ETHIOPIC
5783
0x2DAF, // 2DAF ; UNKNOWN
5784
0x2DB0, // 2DB0..2DB6; ETHIOPIC
5785
0x2DB7, // 2DB7 ; UNKNOWN
5786
0x2DB8, // 2DB8..2DBE; ETHIOPIC
5787
0x2DBF, // 2DBF ; UNKNOWN
5788
0x2DC0, // 2DC0..2DC6; ETHIOPIC
5789
0x2DC7, // 2DC7 ; UNKNOWN
5790
0x2DC8, // 2DC8..2DCE; ETHIOPIC
5791
0x2DCF, // 2DCF ; UNKNOWN
5792
0x2DD0, // 2DD0..2DD6; ETHIOPIC
5793
0x2DD7, // 2DD7 ; UNKNOWN
5794
0x2DD8, // 2DD8..2DDE; ETHIOPIC
5795
0x2DDF, // 2DDF ; UNKNOWN
5796
0x2DE0, // 2DE0..2DFF; CYRILLIC
5797
0x2E00, // 2E00..2E52; COMMON
5798
0x2E53, // 2E53..2E7F; UNKNOWN
5799
0x2E80, // 2E80..2E99; HAN
5800
0x2E9A, // 2E9A ; UNKNOWN
5801
0x2E9B, // 2E9B..2EF3; HAN
5802
0x2EF4, // 2EF4..2EFF; UNKNOWN
5803
0x2F00, // 2F00..2FD5; HAN
5804
0x2FD6, // 2FD6..2FEF; UNKNOWN
5805
0x2FF0, // 2FF0..2FFB; COMMON
5806
0x2FFC, // 2FFC..2FFF; UNKNOWN
5807
0x3000, // 3000..3004; COMMON
5808
0x3005, // 3005 ; HAN
5809
0x3006, // 3006 ; COMMON
5810
0x3007, // 3007 ; HAN
5811
0x3008, // 3008..3020; COMMON
5812
0x3021, // 3021..3029; HAN
5813
0x302A, // 302A..302D; INHERITED
5814
0x302E, // 302E..302F; HANGUL
5815
0x3030, // 3030..3037; COMMON
5816
0x3038, // 3038..303B; HAN
5817
0x303C, // 303C..303F; COMMON
5818
0x3040, // 3040 ; UNKNOWN
5819
0x3041, // 3041..3096; HIRAGANA
5820
0x3097, // 3097..3098; UNKNOWN
5821
0x3099, // 3099..309A; INHERITED
5822
0x309B, // 309B..309C; COMMON
5823
0x309D, // 309D..309F; HIRAGANA
5824
0x30A0, // 30A0 ; COMMON
5825
0x30A1, // 30A1..30FA; KATAKANA
5826
0x30FB, // 30FB..30FC; COMMON
5827
0x30FD, // 30FD..30FF; KATAKANA
5828
0x3100, // 3100..3104; UNKNOWN
5829
0x3105, // 3105..312F; BOPOMOFO
5830
0x3130, // 3130 ; UNKNOWN
5831
0x3131, // 3131..318E; HANGUL
5832
0x318F, // 318F ; UNKNOWN
5833
0x3190, // 3190..319F; COMMON
5834
0x31A0, // 31A0..31BF; BOPOMOFO
5835
0x31C0, // 31C0..31E3; COMMON
5836
0x31E4, // 31E4..31EF; UNKNOWN
5837
0x31F0, // 31F0..31FF; KATAKANA
5838
0x3200, // 3200..321E; HANGUL
5839
0x321F, // 321F ; UNKNOWN
5840
0x3220, // 3220..325F; COMMON
5841
0x3260, // 3260..327E; HANGUL
5842
0x327F, // 327F..32CF; COMMON
5843
0x32D0, // 32D0..32FE; KATAKANA
5844
0x32FF, // 32FF ; COMMON
5845
0x3300, // 3300..3357; KATAKANA
5846
0x3358, // 3358..33FF; COMMON
5847
0x3400, // 3400..4DBF; HAN
5848
0x4DC0, // 4DC0..4DFF; COMMON
5849
0x4E00, // 4E00..9FFC; HAN
5850
0x9FFD, // 9FFD..9FFF; UNKNOWN
5851
0xA000, // A000..A48C; YI
5852
0xA48D, // A48D..A48F; UNKNOWN
5853
0xA490, // A490..A4C6; YI
5854
0xA4C7, // A4C7..A4CF; UNKNOWN
5855
0xA4D0, // A4D0..A4FF; LISU
5856
0xA500, // A500..A62B; VAI
5857
0xA62C, // A62C..A63F; UNKNOWN
5858
0xA640, // A640..A69F; CYRILLIC
5859
0xA6A0, // A6A0..A6F7; BAMUM
5860
0xA6F8, // A6F8..A6FF; UNKNOWN
5861
0xA700, // A700..A721; COMMON
5862
0xA722, // A722..A787; LATIN
5863
0xA788, // A788..A78A; COMMON
5864
0xA78B, // A78B..A7BF; LATIN
5865
0xA7C0, // A7C0..A7C1; UNKNOWN
5866
0xA7C2, // A7C2..A7CA; LATIN
5867
0xA7CB, // A7CB..A7F4; UNKNOWN
5868
0xA7F5, // A7F5..A7FF; LATIN
5869
0xA800, // A800..A82C; SYLOTI_NAGRI
5870
0xA82D, // A82D..A82F; UNKNOWN
5871
0xA830, // A830..A839; COMMON
5872
0xA83A, // A83A..A83F; UNKNOWN
5873
0xA840, // A840..A877; PHAGS_PA
5874
0xA878, // A878..A87F; UNKNOWN
5875
0xA880, // A880..A8C5; SAURASHTRA
5876
0xA8C6, // A8C6..A8CD; UNKNOWN
5877
0xA8CE, // A8CE..A8D9; SAURASHTRA
5878
0xA8DA, // A8DA..A8DF; UNKNOWN
5879
0xA8E0, // A8E0..A8FF; DEVANAGARI
5880
0xA900, // A900..A92D; KAYAH_LI
5881
0xA92E, // A92E ; COMMON
5882
0xA92F, // A92F ; KAYAH_LI
5883
0xA930, // A930..A953; REJANG
5884
0xA954, // A954..A95E; UNKNOWN
5885
0xA95F, // A95F ; REJANG
5886
0xA960, // A960..A97C; HANGUL
5887
0xA97D, // A97D..A97F; UNKNOWN
5888
0xA980, // A980..A9CD; JAVANESE
5889
0xA9CE, // A9CE ; UNKNOWN
5890
0xA9CF, // A9CF ; COMMON
5891
0xA9D0, // A9D0..A9D9; JAVANESE
5892
0xA9DA, // A9DA..A9DD; UNKNOWN
5893
0xA9DE, // A9DE..A9DF; JAVANESE
5894
0xA9E0, // A9E0..A9FE; MYANMAR
5895
0xA9FF, // A9FF ; UNKNOWN
5896
0xAA00, // AA00..AA36; CHAM
5897
0xAA37, // AA37..AA3F; UNKNOWN
5898
0xAA40, // AA40..AA4D; CHAM
5899
0xAA4E, // AA4E..AA4F; UNKNOWN
5900
0xAA50, // AA50..AA59; CHAM
5901
0xAA5A, // AA5A..AA5B; UNKNOWN
5902
0xAA5C, // AA5C..AA5F; CHAM
5903
0xAA60, // AA60..AA7F; MYANMAR
5904
0xAA80, // AA80..AAC2; TAI_VIET
5905
0xAAC3, // AAC3..AADA; UNKNOWN
5906
0xAADB, // AADB..AADF; TAI_VIET
5907
0xAAE0, // AAE0..AAF6; MEETEI_MAYEK
5908
0xAAF7, // AAF7..AB00; UNKNOWN
5909
0xAB01, // AB01..AB06; ETHIOPIC
5910
0xAB07, // AB07..AB08; UNKNOWN
5911
0xAB09, // AB09..AB0E; ETHIOPIC
5912
0xAB0F, // AB0F..AB10; UNKNOWN
5913
0xAB11, // AB11..AB16; ETHIOPIC
5914
0xAB17, // AB17..AB1F; UNKNOWN
5915
0xAB20, // AB20..AB26; ETHIOPIC
5916
0xAB27, // AB27 ; UNKNOWN
5917
0xAB28, // AB28..AB2E; ETHIOPIC
5918
0xAB2F, // AB2F ; UNKNOWN
5919
0xAB30, // AB30..AB5A; LATIN
5920
0xAB5B, // AB5B ; COMMON
5921
0xAB5C, // AB5C..AB64; LATIN
5922
0xAB65, // AB65 ; GREEK
5923
0xAB66, // AB66..AB69; LATIN
5924
0xAB6A, // AB6A..AB6B; COMMON
5925
0xAB6C, // AB6C..AB6F; UNKNOWN
5926
0xAB70, // AB70..ABBF; CHEROKEE
5927
0xABC0, // ABC0..ABED; MEETEI_MAYEK
5928
0xABEE, // ABEE..ABEF; UNKNOWN
5929
0xABF0, // ABF0..ABF9; MEETEI_MAYEK
5930
0xABFA, // ABFA..ABFF; UNKNOWN
5931
0xAC00, // AC00..D7A3; HANGUL
5932
0xD7A4, // D7A4..D7AF; UNKNOWN
5933
0xD7B0, // D7B0..D7C6; HANGUL
5934
0xD7C7, // D7C7..D7CA; UNKNOWN
5935
0xD7CB, // D7CB..D7FB; HANGUL
5936
0xD7FC, // D7FC..F8FF; UNKNOWN
5937
0xF900, // F900..FA6D; HAN
5938
0xFA6E, // FA6E..FA6F; UNKNOWN
5939
0xFA70, // FA70..FAD9; HAN
5940
0xFADA, // FADA..FAFF; UNKNOWN
5941
0xFB00, // FB00..FB06; LATIN
5942
0xFB07, // FB07..FB12; UNKNOWN
5943
0xFB13, // FB13..FB17; ARMENIAN
5944
0xFB18, // FB18..FB1C; UNKNOWN
5945
0xFB1D, // FB1D..FB36; HEBREW
5946
0xFB37, // FB37 ; UNKNOWN
5947
0xFB38, // FB38..FB3C; HEBREW
5948
0xFB3D, // FB3D ; UNKNOWN
5949
0xFB3E, // FB3E ; HEBREW
5950
0xFB3F, // FB3F ; UNKNOWN
5951
0xFB40, // FB40..FB41; HEBREW
5952
0xFB42, // FB42 ; UNKNOWN
5953
0xFB43, // FB43..FB44; HEBREW
5954
0xFB45, // FB45 ; UNKNOWN
5955
0xFB46, // FB46..FB4F; HEBREW
5956
0xFB50, // FB50..FBC1; ARABIC
5957
0xFBC2, // FBC2..FBD2; UNKNOWN
5958
0xFBD3, // FBD3..FD3D; ARABIC
5959
0xFD3E, // FD3E..FD3F; COMMON
5960
0xFD40, // FD40..FD4F; UNKNOWN
5961
0xFD50, // FD50..FD8F; ARABIC
5962
0xFD90, // FD90..FD91; UNKNOWN
5963
0xFD92, // FD92..FDC7; ARABIC
5964
0xFDC8, // FDC8..FDEF; UNKNOWN
5965
0xFDF0, // FDF0..FDFD; ARABIC
5966
0xFDFE, // FDFE..FDFF; UNKNOWN
5967
0xFE00, // FE00..FE0F; INHERITED
5968
0xFE10, // FE10..FE19; COMMON
5969
0xFE1A, // FE1A..FE1F; UNKNOWN
5970
0xFE20, // FE20..FE2D; INHERITED
5971
0xFE2E, // FE2E..FE2F; CYRILLIC
5972
0xFE30, // FE30..FE52; COMMON
5973
0xFE53, // FE53 ; UNKNOWN
5974
0xFE54, // FE54..FE66; COMMON
5975
0xFE67, // FE67 ; UNKNOWN
5976
0xFE68, // FE68..FE6B; COMMON
5977
0xFE6C, // FE6C..FE6F; UNKNOWN
5978
0xFE70, // FE70..FE74; ARABIC
5979
0xFE75, // FE75 ; UNKNOWN
5980
0xFE76, // FE76..FEFC; ARABIC
5981
0xFEFD, // FEFD..FEFE; UNKNOWN
5982
0xFEFF, // FEFF ; COMMON
5983
0xFF00, // FF00 ; UNKNOWN
5984
0xFF01, // FF01..FF20; COMMON
5985
0xFF21, // FF21..FF3A; LATIN
5986
0xFF3B, // FF3B..FF40; COMMON
5987
0xFF41, // FF41..FF5A; LATIN
5988
0xFF5B, // FF5B..FF65; COMMON
5989
0xFF66, // FF66..FF6F; KATAKANA
5990
0xFF70, // FF70 ; COMMON
5991
0xFF71, // FF71..FF9D; KATAKANA
5992
0xFF9E, // FF9E..FF9F; COMMON
5993
0xFFA0, // FFA0..FFBE; HANGUL
5994
0xFFBF, // FFBF..FFC1; UNKNOWN
5995
0xFFC2, // FFC2..FFC7; HANGUL
5996
0xFFC8, // FFC8..FFC9; UNKNOWN
5997
0xFFCA, // FFCA..FFCF; HANGUL
5998
0xFFD0, // FFD0..FFD1; UNKNOWN
5999
0xFFD2, // FFD2..FFD7; HANGUL
6000
0xFFD8, // FFD8..FFD9; UNKNOWN
6001
0xFFDA, // FFDA..FFDC; HANGUL
6002
0xFFDD, // FFDD..FFDF; UNKNOWN
6003
0xFFE0, // FFE0..FFE6; COMMON
6004
0xFFE7, // FFE7 ; UNKNOWN
6005
0xFFE8, // FFE8..FFEE; COMMON
6006
0xFFEF, // FFEF..FFF8; UNKNOWN
6007
0xFFF9, // FFF9..FFFD; COMMON
6008
0xFFFE, // FFFE..FFFF; UNKNOWN
6009
0x10000, // 10000..1000B; LINEAR_B
6010
0x1000C, // 1000C ; UNKNOWN
6011
0x1000D, // 1000D..10026; LINEAR_B
6012
0x10027, // 10027 ; UNKNOWN
6013
0x10028, // 10028..1003A; LINEAR_B
6014
0x1003B, // 1003B ; UNKNOWN
6015
0x1003C, // 1003C..1003D; LINEAR_B
6016
0x1003E, // 1003E ; UNKNOWN
6017
0x1003F, // 1003F..1004D; LINEAR_B
6018
0x1004E, // 1004E..1004F; UNKNOWN
6019
0x10050, // 10050..1005D; LINEAR_B
6020
0x1005E, // 1005E..1007F; UNKNOWN
6021
0x10080, // 10080..100FA; LINEAR_B
6022
0x100FB, // 100FB..100FF; UNKNOWN
6023
0x10100, // 10100..10102; COMMON
6024
0x10103, // 10103..10106; UNKNOWN
6025
0x10107, // 10107..10133; COMMON
6026
0x10134, // 10134..10136; UNKNOWN
6027
0x10137, // 10137..1013F; COMMON
6028
0x10140, // 10140..1018E; GREEK
6029
0x1018F, // 1018F ; UNKNOWN
6030
0x10190, // 10190..1019C; COMMON
6031
0x1019D, // 1019D..1019F; UNKNOWN
6032
0x101A0, // 101A0 ; GREEK
6033
0x101A1, // 101A1..101CF; UNKNOWN
6034
0x101D0, // 101D0..101FC; COMMON
6035
0x101FD, // 101FD ; INHERITED
6036
0x101FE, // 101FE..1027F; UNKNOWN
6037
0x10280, // 10280..1029C; LYCIAN
6038
0x1029D, // 1029D..1029F; UNKNOWN
6039
0x102A0, // 102A0..102D0; CARIAN
6040
0x102D1, // 102D1..102DF; UNKNOWN
6041
0x102E0, // 102E0 ; INHERITED
6042
0x102E1, // 102E1..102FB; COMMON
6043
0x102FC, // 102FC..102FF; UNKNOWN
6044
0x10300, // 10300..10323; OLD_ITALIC
6045
0x10324, // 10324..1032C; UNKNOWN
6046
0x1032D, // 1032D..1032F; OLD_ITALIC
6047
0x10330, // 10330..1034A; GOTHIC
6048
0x1034B, // 1034B..1034F; UNKNOWN
6049
0x10350, // 10350..1037A; OLD_PERMIC
6050
0x1037B, // 1037B..1037F; UNKNOWN
6051
0x10380, // 10380..1039D; UGARITIC
6052
0x1039E, // 1039E ; UNKNOWN
6053
0x1039F, // 1039F ; UGARITIC
6054
0x103A0, // 103A0..103C3; OLD_PERSIAN
6055
0x103C4, // 103C4..103C7; UNKNOWN
6056
0x103C8, // 103C8..103D5; OLD_PERSIAN
6057
0x103D6, // 103D6..103FF; UNKNOWN
6058
0x10400, // 10400..1044F; DESERET
6059
0x10450, // 10450..1047F; SHAVIAN
6060
0x10480, // 10480..1049D; OSMANYA
6061
0x1049E, // 1049E..1049F; UNKNOWN
6062
0x104A0, // 104A0..104A9; OSMANYA
6063
0x104AA, // 104AA..104AF; UNKNOWN
6064
0x104B0, // 104B0..104D3; OSAGE
6065
0x104D4, // 104D4..104D7; UNKNOWN
6066
0x104D8, // 104D8..104FB; OSAGE
6067
0x104FC, // 104FC..104FF; UNKNOWN
6068
0x10500, // 10500..10527; ELBASAN
6069
0x10528, // 10528..1052F; UNKNOWN
6070
0x10530, // 10530..10563; CAUCASIAN_ALBANIAN
6071
0x10564, // 10564..1056E; UNKNOWN
6072
0x1056F, // 1056F ; CAUCASIAN_ALBANIAN
6073
0x10570, // 10570..105FF; UNKNOWN
6074
0x10600, // 10600..10736; LINEAR_A
6075
0x10737, // 10737..1073F; UNKNOWN
6076
0x10740, // 10740..10755; LINEAR_A
6077
0x10756, // 10756..1075F; UNKNOWN
6078
0x10760, // 10760..10767; LINEAR_A
6079
0x10768, // 10768..107FF; UNKNOWN
6080
0x10800, // 10800..10805; CYPRIOT
6081
0x10806, // 10806..10807; UNKNOWN
6082
0x10808, // 10808 ; CYPRIOT
6083
0x10809, // 10809 ; UNKNOWN
6084
0x1080A, // 1080A..10835; CYPRIOT
6085
0x10836, // 10836 ; UNKNOWN
6086
0x10837, // 10837..10838; CYPRIOT
6087
0x10839, // 10839..1083B; UNKNOWN
6088
0x1083C, // 1083C ; CYPRIOT
6089
0x1083D, // 1083D..1083E; UNKNOWN
6090
0x1083F, // 1083F ; CYPRIOT
6091
0x10840, // 10840..10855; IMPERIAL_ARAMAIC
6092
0x10856, // 10856 ; UNKNOWN
6093
0x10857, // 10857..1085F; IMPERIAL_ARAMAIC
6094
0x10860, // 10860..1087F; PALMYRENE
6095
0x10880, // 10880..1089E; NABATAEAN
6096
0x1089F, // 1089F..108A6; UNKNOWN
6097
0x108A7, // 108A7..108AF; NABATAEAN
6098
0x108B0, // 108B0..108DF; UNKNOWN
6099
0x108E0, // 108E0..108F2; HATRAN
6100
0x108F3, // 108F3 ; UNKNOWN
6101
0x108F4, // 108F4..108F5; HATRAN
6102
0x108F6, // 108F6..108FA; UNKNOWN
6103
0x108FB, // 108FB..108FF; HATRAN
6104
0x10900, // 10900..1091B; PHOENICIAN
6105
0x1091C, // 1091C..1091E; UNKNOWN
6106
0x1091F, // 1091F ; PHOENICIAN
6107
0x10920, // 10920..10939; LYDIAN
6108
0x1093A, // 1093A..1093E; UNKNOWN
6109
0x1093F, // 1093F ; LYDIAN
6110
0x10940, // 10940..1097F; UNKNOWN
6111
0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS
6112
0x109A0, // 109A0..109B7; MEROITIC_CURSIVE
6113
0x109B8, // 109B8..109BB; UNKNOWN
6114
0x109BC, // 109BC..109CF; MEROITIC_CURSIVE
6115
0x109D0, // 109D0..109D1; UNKNOWN
6116
0x109D2, // 109D2..109FF; MEROITIC_CURSIVE
6117
0x10A00, // 10A00..10A03; KHAROSHTHI
6118
0x10A04, // 10A04 ; UNKNOWN
6119
0x10A05, // 10A05..10A06; KHAROSHTHI
6120
0x10A07, // 10A07..10A0B; UNKNOWN
6121
0x10A0C, // 10A0C..10A13; KHAROSHTHI
6122
0x10A14, // 10A14 ; UNKNOWN
6123
0x10A15, // 10A15..10A17; KHAROSHTHI
6124
0x10A18, // 10A18 ; UNKNOWN
6125
0x10A19, // 10A19..10A35; KHAROSHTHI
6126
0x10A36, // 10A36..10A37; UNKNOWN
6127
0x10A38, // 10A38..10A3A; KHAROSHTHI
6128
0x10A3B, // 10A3B..10A3E; UNKNOWN
6129
0x10A3F, // 10A3F..10A48; KHAROSHTHI
6130
0x10A49, // 10A49..10A4F; UNKNOWN
6131
0x10A50, // 10A50..10A58; KHAROSHTHI
6132
0x10A59, // 10A59..10A5F; UNKNOWN
6133
0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN
6134
0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN
6135
0x10AA0, // 10AA0..10ABF; UNKNOWN
6136
0x10AC0, // 10AC0..10AE6; MANICHAEAN
6137
0x10AE7, // 10AE7..10AEA; UNKNOWN
6138
0x10AEB, // 10AEB..10AF6; MANICHAEAN
6139
0x10AF7, // 10AF7..10AFF; UNKNOWN
6140
0x10B00, // 10B00..10B35; AVESTAN
6141
0x10B36, // 10B36..10B38; UNKNOWN
6142
0x10B39, // 10B39..10B3F; AVESTAN
6143
0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
6144
0x10B56, // 10B56..10B57; UNKNOWN
6145
0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
6146
0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
6147
0x10B73, // 10B73..10B77; UNKNOWN
6148
0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
6149
0x10B80, // 10B80..10B91; PSALTER_PAHLAVI
6150
0x10B92, // 10B92..10B98; UNKNOWN
6151
0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI
6152
0x10B9D, // 10B9D..10BA8; UNKNOWN
6153
0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI
6154
0x10BB0, // 10BB0..10BFF; UNKNOWN
6155
0x10C00, // 10C00..10C48; OLD_TURKIC
6156
0x10C49, // 10C49..10C7F; UNKNOWN
6157
0x10C80, // 10C80..10CB2; OLD_HUNGARIAN
6158
0x10CB3, // 10CB3..10CBF; UNKNOWN
6159
0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN
6160
0x10CF3, // 10CF3..10CF9; UNKNOWN
6161
0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN
6162
0x10D00, // 10D00..10D27; HANIFI_ROHINGYA
6163
0x10D28, // 10D28..10D2F; UNKNOWN
6164
0x10D30, // 10D30..10D39; HANIFI_ROHINGYA
6165
0x10D3A, // 10D3A..10E5F; UNKNOWN
6166
0x10E60, // 10E60..10E7E; ARABIC
6167
0x10E7F, // 10E7F ; UNKNOWN
6168
0x10E80, // 10E80..10EA9; YEZIDI
6169
0x10EAA, // 10EAA ; UNKNOWN
6170
0x10EAB, // 10EAB..10EAD; YEZIDI
6171
0x10EAE, // 10EAE..10EAF; UNKNOWN
6172
0x10EB0, // 10EB0..10EB1; YEZIDI
6173
0x10EB2, // 10EB2..10EFF; UNKNOWN
6174
0x10F00, // 10F00..10F27; OLD_SOGDIAN
6175
0x10F28, // 10F28..10F2F; UNKNOWN
6176
0x10F30, // 10F30..10F59; SOGDIAN
6177
0x10F5A, // 10F5A..10FAF; UNKNOWN
6178
0x10FB0, // 10FB0..10FCB; CHORASMIAN
6179
0x10FCC, // 10FCC..10FDF; UNKNOWN
6180
0x10FE0, // 10FE0..10FF6; ELYMAIC
6181
0x10FF7, // 10FF7..10FFF; UNKNOWN
6182
0x11000, // 11000..1104D; BRAHMI
6183
0x1104E, // 1104E..11051; UNKNOWN
6184
0x11052, // 11052..1106F; BRAHMI
6185
0x11070, // 11070..1107E; UNKNOWN
6186
0x1107F, // 1107F ; BRAHMI
6187
0x11080, // 11080..110C1; KAITHI
6188
0x110C2, // 110C2..110CC; UNKNOWN
6189
0x110CD, // 110CD ; KAITHI
6190
0x110CE, // 110CE..110CF; UNKNOWN
6191
0x110D0, // 110D0..110E8; SORA_SOMPENG
6192
0x110E9, // 110E9..110EF; UNKNOWN
6193
0x110F0, // 110F0..110F9; SORA_SOMPENG
6194
0x110FA, // 110FA..110FF; UNKNOWN
6195
0x11100, // 11100..11134; CHAKMA
6196
0x11135, // 11135 ; UNKNOWN
6197
0x11136, // 11136..11147; CHAKMA
6198
0x11148, // 11148..1114F; UNKNOWN
6199
0x11150, // 11150..11176; MAHAJANI
6200
0x11177, // 11177..1117F; UNKNOWN
6201
0x11180, // 11180..111DF; SHARADA
6202
0x111E0, // 111E0 ; UNKNOWN
6203
0x111E1, // 111E1..111F4; SINHALA
6204
0x111F5, // 111F5..111FF; UNKNOWN
6205
0x11200, // 11200..11211; KHOJKI
6206
0x11212, // 11212 ; UNKNOWN
6207
0x11213, // 11213..1123E; KHOJKI
6208
0x1123F, // 1123F..1127F; UNKNOWN
6209
0x11280, // 11280..11286; MULTANI
6210
0x11287, // 11287 ; UNKNOWN
6211
0x11288, // 11288 ; MULTANI
6212
0x11289, // 11289 ; UNKNOWN
6213
0x1128A, // 1128A..1128D; MULTANI
6214
0x1128E, // 1128E ; UNKNOWN
6215
0x1128F, // 1128F..1129D; MULTANI
6216
0x1129E, // 1129E ; UNKNOWN
6217
0x1129F, // 1129F..112A9; MULTANI
6218
0x112AA, // 112AA..112AF; UNKNOWN
6219
0x112B0, // 112B0..112EA; KHUDAWADI
6220
0x112EB, // 112EB..112EF; UNKNOWN
6221
0x112F0, // 112F0..112F9; KHUDAWADI
6222
0x112FA, // 112FA..112FF; UNKNOWN
6223
0x11300, // 11300..11303; GRANTHA
6224
0x11304, // 11304 ; UNKNOWN
6225
0x11305, // 11305..1130C; GRANTHA
6226
0x1130D, // 1130D..1130E; UNKNOWN
6227
0x1130F, // 1130F..11310; GRANTHA
6228
0x11311, // 11311..11312; UNKNOWN
6229
0x11313, // 11313..11328; GRANTHA
6230
0x11329, // 11329 ; UNKNOWN
6231
0x1132A, // 1132A..11330; GRANTHA
6232
0x11331, // 11331 ; UNKNOWN
6233
0x11332, // 11332..11333; GRANTHA
6234
0x11334, // 11334 ; UNKNOWN
6235
0x11335, // 11335..11339; GRANTHA
6236
0x1133A, // 1133A ; UNKNOWN
6237
0x1133B, // 1133B ; INHERITED
6238
0x1133C, // 1133C..11344; GRANTHA
6239
0x11345, // 11345..11346; UNKNOWN
6240
0x11347, // 11347..11348; GRANTHA
6241
0x11349, // 11349..1134A; UNKNOWN
6242
0x1134B, // 1134B..1134D; GRANTHA
6243
0x1134E, // 1134E..1134F; UNKNOWN
6244
0x11350, // 11350 ; GRANTHA
6245
0x11351, // 11351..11356; UNKNOWN
6246
0x11357, // 11357 ; GRANTHA
6247
0x11358, // 11358..1135C; UNKNOWN
6248
0x1135D, // 1135D..11363; GRANTHA
6249
0x11364, // 11364..11365; UNKNOWN
6250
0x11366, // 11366..1136C; GRANTHA
6251
0x1136D, // 1136D..1136F; UNKNOWN
6252
0x11370, // 11370..11374; GRANTHA
6253
0x11375, // 11375..113FF; UNKNOWN
6254
0x11400, // 11400..1145B; NEWA
6255
0x1145C, // 1145C ; UNKNOWN
6256
0x1145D, // 1145D..11461; NEWA
6257
0x11462, // 11462..1147F; UNKNOWN
6258
0x11480, // 11480..114C7; TIRHUTA
6259
0x114C8, // 114C8..114CF; UNKNOWN
6260
0x114D0, // 114D0..114D9; TIRHUTA
6261
0x114DA, // 114DA..1157F; UNKNOWN
6262
0x11580, // 11580..115B5; SIDDHAM
6263
0x115B6, // 115B6..115B7; UNKNOWN
6264
0x115B8, // 115B8..115DD; SIDDHAM
6265
0x115DE, // 115DE..115FF; UNKNOWN
6266
0x11600, // 11600..11644; MODI
6267
0x11645, // 11645..1164F; UNKNOWN
6268
0x11650, // 11650..11659; MODI
6269
0x1165A, // 1165A..1165F; UNKNOWN
6270
0x11660, // 11660..1166C; MONGOLIAN
6271
0x1166D, // 1166D..1167F; UNKNOWN
6272
0x11680, // 11680..116B8; TAKRI
6273
0x116B9, // 116B9..116BF; UNKNOWN
6274
0x116C0, // 116C0..116C9; TAKRI
6275
0x116CA, // 116CA..116FF; UNKNOWN
6276
0x11700, // 11700..1171A; AHOM
6277
0x1171B, // 1171B..1171C; UNKNOWN
6278
0x1171D, // 1171D..1172B; AHOM
6279
0x1172C, // 1172C..1172F; UNKNOWN
6280
0x11730, // 11730..1173F; AHOM
6281
0x11740, // 11740..117FF; UNKNOWN
6282
0x11800, // 11800..1183B; DOGRA
6283
0x1183C, // 1183C..1189F; UNKNOWN
6284
0x118A0, // 118A0..118F2; WARANG_CITI
6285
0x118F3, // 118F3..118FE; UNKNOWN
6286
0x118FF, // 118FF ; WARANG_CITI
6287
0x11900, // 11900..11906; DIVES_AKURU
6288
0x11907, // 11907..11908; UNKNOWN
6289
0x11909, // 11909 ; DIVES_AKURU
6290
0x1190A, // 1190A..1190B; UNKNOWN
6291
0x1190C, // 1190C..11913; DIVES_AKURU
6292
0x11914, // 11914 ; UNKNOWN
6293
0x11915, // 11915..11916; DIVES_AKURU
6294
0x11917, // 11917 ; UNKNOWN
6295
0x11918, // 11918..11935; DIVES_AKURU
6296
0x11936, // 11936 ; UNKNOWN
6297
0x11937, // 11937..11938; DIVES_AKURU
6298
0x11939, // 11939..1193A; UNKNOWN
6299
0x1193B, // 1193B..11946; DIVES_AKURU
6300
0x11947, // 11947..1194F; UNKNOWN
6301
0x11950, // 11950..11959; DIVES_AKURU
6302
0x1195A, // 1195A..1199F; UNKNOWN
6303
0x119A0, // 119A0..119A7; NANDINAGARI
6304
0x119A8, // 119A8..119A9; UNKNOWN
6305
0x119AA, // 119AA..119D7; NANDINAGARI
6306
0x119D8, // 119D8..119D9; UNKNOWN
6307
0x119DA, // 119DA..119E4; NANDINAGARI
6308
0x119E5, // 119E5..119FF; UNKNOWN
6309
0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE
6310
0x11A48, // 11A48..11A4F; UNKNOWN
6311
0x11A50, // 11A50..11AA2; SOYOMBO
6312
0x11AA3, // 11AA3..11ABF; UNKNOWN
6313
0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU
6314
0x11AF9, // 11AF9..11BFF; UNKNOWN
6315
0x11C00, // 11C00..11C08; BHAIKSUKI
6316
0x11C09, // 11C09 ; UNKNOWN
6317
0x11C0A, // 11C0A..11C36; BHAIKSUKI
6318
0x11C37, // 11C37 ; UNKNOWN
6319
0x11C38, // 11C38..11C45; BHAIKSUKI
6320
0x11C46, // 11C46..11C4F; UNKNOWN
6321
0x11C50, // 11C50..11C6C; BHAIKSUKI
6322
0x11C6D, // 11C6D..11C6F; UNKNOWN
6323
0x11C70, // 11C70..11C8F; MARCHEN
6324
0x11C90, // 11C90..11C91; UNKNOWN
6325
0x11C92, // 11C92..11CA7; MARCHEN
6326
0x11CA8, // 11CA8 ; UNKNOWN
6327
0x11CA9, // 11CA9..11CB6; MARCHEN
6328
0x11CB7, // 11CB7..11CFF; UNKNOWN
6329
0x11D00, // 11D00..11D06; MASARAM_GONDI
6330
0x11D07, // 11D07 ; UNKNOWN
6331
0x11D08, // 11D08..11D09; MASARAM_GONDI
6332
0x11D0A, // 11D0A ; UNKNOWN
6333
0x11D0B, // 11D0B..11D36; MASARAM_GONDI
6334
0x11D37, // 11D37..11D39; UNKNOWN
6335
0x11D3A, // 11D3A ; MASARAM_GONDI
6336
0x11D3B, // 11D3B ; UNKNOWN
6337
0x11D3C, // 11D3C..11D3D; MASARAM_GONDI
6338
0x11D3E, // 11D3E ; UNKNOWN
6339
0x11D3F, // 11D3F..11D47; MASARAM_GONDI
6340
0x11D48, // 11D48..11D4F; UNKNOWN
6341
0x11D50, // 11D50..11D59; MASARAM_GONDI
6342
0x11D5A, // 11D5A..11D5F; UNKNOWN
6343
0x11D60, // 11D60..11D65; GUNJALA_GONDI
6344
0x11D66, // 11D66 ; UNKNOWN
6345
0x11D67, // 11D67..11D68; GUNJALA_GONDI
6346
0x11D69, // 11D69 ; UNKNOWN
6347
0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI
6348
0x11D8F, // 11D8F ; UNKNOWN
6349
0x11D90, // 11D90..11D91; GUNJALA_GONDI
6350
0x11D92, // 11D92 ; UNKNOWN
6351
0x11D93, // 11D93..11D98; GUNJALA_GONDI
6352
0x11D99, // 11D99..11D9F; UNKNOWN
6353
0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI
6354
0x11DAA, // 11DAA..11EDF; UNKNOWN
6355
0x11EE0, // 11EE0..11EF8; MAKASAR
6356
0x11EF9, // 11EF9..11FAF; UNKNOWN
6357
0x11FB0, // 11FB0 ; LISU
6358
0x11FB1, // 11FB1..11FBF; UNKNOWN
6359
0x11FC0, // 11FC0..11FF1; TAMIL
6360
0x11FF2, // 11FF2..11FFE; UNKNOWN
6361
0x11FFF, // 11FFF ; TAMIL
6362
0x12000, // 12000..12399; CUNEIFORM
6363
0x1239A, // 1239A..123FF; UNKNOWN
6364
0x12400, // 12400..1246E; CUNEIFORM
6365
0x1246F, // 1246F ; UNKNOWN
6366
0x12470, // 12470..12474; CUNEIFORM
6367
0x12475, // 12475..1247F; UNKNOWN
6368
0x12480, // 12480..12543; CUNEIFORM
6369
0x12544, // 12544..12FFF; UNKNOWN
6370
0x13000, // 13000..1342E; EGYPTIAN_HIEROGLYPHS
6371
0x1342F, // 1342F ; UNKNOWN
6372
0x13430, // 13430..13438; EGYPTIAN_HIEROGLYPHS
6373
0x13439, // 13439..143FF; UNKNOWN
6374
0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS
6375
0x14647, // 14647..167FF; UNKNOWN
6376
0x16800, // 16800..16A38; BAMUM
6377
0x16A39, // 16A39..16A3F; UNKNOWN
6378
0x16A40, // 16A40..16A5E; MRO
6379
0x16A5F, // 16A5F ; UNKNOWN
6380
0x16A60, // 16A60..16A69; MRO
6381
0x16A6A, // 16A6A..16A6D; UNKNOWN
6382
0x16A6E, // 16A6E..16A6F; MRO
6383
0x16A70, // 16A70..16ACF; UNKNOWN
6384
0x16AD0, // 16AD0..16AED; BASSA_VAH
6385
0x16AEE, // 16AEE..16AEF; UNKNOWN
6386
0x16AF0, // 16AF0..16AF5; BASSA_VAH
6387
0x16AF6, // 16AF6..16AFF; UNKNOWN
6388
0x16B00, // 16B00..16B45; PAHAWH_HMONG
6389
0x16B46, // 16B46..16B4F; UNKNOWN
6390
0x16B50, // 16B50..16B59; PAHAWH_HMONG
6391
0x16B5A, // 16B5A ; UNKNOWN
6392
0x16B5B, // 16B5B..16B61; PAHAWH_HMONG
6393
0x16B62, // 16B62 ; UNKNOWN
6394
0x16B63, // 16B63..16B77; PAHAWH_HMONG
6395
0x16B78, // 16B78..16B7C; UNKNOWN
6396
0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG
6397
0x16B90, // 16B90..16E3F; UNKNOWN
6398
0x16E40, // 16E40..16E9A; MEDEFAIDRIN
6399
0x16E9B, // 16E9B..16EFF; UNKNOWN
6400
0x16F00, // 16F00..16F4A; MIAO
6401
0x16F4B, // 16F4B..16F4E; UNKNOWN
6402
0x16F4F, // 16F4F..16F87; MIAO
6403
0x16F88, // 16F88..16F8E; UNKNOWN
6404
0x16F8F, // 16F8F..16F9F; MIAO
6405
0x16FA0, // 16FA0..16FDF; UNKNOWN
6406
0x16FE0, // 16FE0 ; TANGUT
6407
0x16FE1, // 16FE1 ; NUSHU
6408
0x16FE2, // 16FE2..16FE3; COMMON
6409
0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT
6410
0x16FE5, // 16FE5..16FEF; UNKNOWN
6411
0x16FF0, // 16FF0..16FF1; HAN
6412
0x16FF2, // 16FF2..16FFF; UNKNOWN
6413
0x17000, // 17000..187F7; TANGUT
6414
0x187F8, // 187F8..187FF; UNKNOWN
6415
0x18800, // 18800..18AFF; TANGUT
6416
0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT
6417
0x18CD6, // 18CD6..18CFF; UNKNOWN
6418
0x18D00, // 18D00..18D08; TANGUT
6419
0x18D09, // 18D09..1AFFF; UNKNOWN
6420
0x1B000, // 1B000 ; KATAKANA
6421
0x1B001, // 1B001..1B11E; HIRAGANA
6422
0x1B11F, // 1B11F..1B14F; UNKNOWN
6423
0x1B150, // 1B150..1B152; HIRAGANA
6424
0x1B153, // 1B153..1B163; UNKNOWN
6425
0x1B164, // 1B164..1B167; KATAKANA
6426
0x1B168, // 1B168..1B16F; UNKNOWN
6427
0x1B170, // 1B170..1B2FB; NUSHU
6428
0x1B2FC, // 1B2FC..1BBFF; UNKNOWN
6429
0x1BC00, // 1BC00..1BC6A; DUPLOYAN
6430
0x1BC6B, // 1BC6B..1BC6F; UNKNOWN
6431
0x1BC70, // 1BC70..1BC7C; DUPLOYAN
6432
0x1BC7D, // 1BC7D..1BC7F; UNKNOWN
6433
0x1BC80, // 1BC80..1BC88; DUPLOYAN
6434
0x1BC89, // 1BC89..1BC8F; UNKNOWN
6435
0x1BC90, // 1BC90..1BC99; DUPLOYAN
6436
0x1BC9A, // 1BC9A..1BC9B; UNKNOWN
6437
0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN
6438
0x1BCA0, // 1BCA0..1BCA3; COMMON
6439
0x1BCA4, // 1BCA4..1CFFF; UNKNOWN
6440
0x1D000, // 1D000..1D0F5; COMMON
6441
0x1D0F6, // 1D0F6..1D0FF; UNKNOWN
6442
0x1D100, // 1D100..1D126; COMMON
6443
0x1D127, // 1D127..1D128; UNKNOWN
6444
0x1D129, // 1D129..1D166; COMMON
6445
0x1D167, // 1D167..1D169; INHERITED
6446
0x1D16A, // 1D16A..1D17A; COMMON
6447
0x1D17B, // 1D17B..1D182; INHERITED
6448
0x1D183, // 1D183..1D184; COMMON
6449
0x1D185, // 1D185..1D18B; INHERITED
6450
0x1D18C, // 1D18C..1D1A9; COMMON
6451
0x1D1AA, // 1D1AA..1D1AD; INHERITED
6452
0x1D1AE, // 1D1AE..1D1E8; COMMON
6453
0x1D1E9, // 1D1E9..1D1FF; UNKNOWN
6454
0x1D200, // 1D200..1D245; GREEK
6455
0x1D246, // 1D246..1D2DF; UNKNOWN
6456
0x1D2E0, // 1D2E0..1D2F3; COMMON
6457
0x1D2F4, // 1D2F4..1D2FF; UNKNOWN
6458
0x1D300, // 1D300..1D356; COMMON
6459
0x1D357, // 1D357..1D35F; UNKNOWN
6460
0x1D360, // 1D360..1D378; COMMON
6461
0x1D379, // 1D379..1D3FF; UNKNOWN
6462
0x1D400, // 1D400..1D454; COMMON
6463
0x1D455, // 1D455 ; UNKNOWN
6464
0x1D456, // 1D456..1D49C; COMMON
6465
0x1D49D, // 1D49D ; UNKNOWN
6466
0x1D49E, // 1D49E..1D49F; COMMON
6467
0x1D4A0, // 1D4A0..1D4A1; UNKNOWN
6468
0x1D4A2, // 1D4A2 ; COMMON
6469
0x1D4A3, // 1D4A3..1D4A4; UNKNOWN
6470
0x1D4A5, // 1D4A5..1D4A6; COMMON
6471
0x1D4A7, // 1D4A7..1D4A8; UNKNOWN
6472
0x1D4A9, // 1D4A9..1D4AC; COMMON
6473
0x1D4AD, // 1D4AD ; UNKNOWN
6474
0x1D4AE, // 1D4AE..1D4B9; COMMON
6475
0x1D4BA, // 1D4BA ; UNKNOWN
6476
0x1D4BB, // 1D4BB ; COMMON
6477
0x1D4BC, // 1D4BC ; UNKNOWN
6478
0x1D4BD, // 1D4BD..1D4C3; COMMON
6479
0x1D4C4, // 1D4C4 ; UNKNOWN
6480
0x1D4C5, // 1D4C5..1D505; COMMON
6481
0x1D506, // 1D506 ; UNKNOWN
6482
0x1D507, // 1D507..1D50A; COMMON
6483
0x1D50B, // 1D50B..1D50C; UNKNOWN
6484
0x1D50D, // 1D50D..1D514; COMMON
6485
0x1D515, // 1D515 ; UNKNOWN
6486
0x1D516, // 1D516..1D51C; COMMON
6487
0x1D51D, // 1D51D ; UNKNOWN
6488
0x1D51E, // 1D51E..1D539; COMMON
6489
0x1D53A, // 1D53A ; UNKNOWN
6490
0x1D53B, // 1D53B..1D53E; COMMON
6491
0x1D53F, // 1D53F ; UNKNOWN
6492
0x1D540, // 1D540..1D544; COMMON
6493
0x1D545, // 1D545 ; UNKNOWN
6494
0x1D546, // 1D546 ; COMMON
6495
0x1D547, // 1D547..1D549; UNKNOWN
6496
0x1D54A, // 1D54A..1D550; COMMON
6497
0x1D551, // 1D551 ; UNKNOWN
6498
0x1D552, // 1D552..1D6A5; COMMON
6499
0x1D6A6, // 1D6A6..1D6A7; UNKNOWN
6500
0x1D6A8, // 1D6A8..1D7CB; COMMON
6501
0x1D7CC, // 1D7CC..1D7CD; UNKNOWN
6502
0x1D7CE, // 1D7CE..1D7FF; COMMON
6503
0x1D800, // 1D800..1DA8B; SIGNWRITING
6504
0x1DA8C, // 1DA8C..1DA9A; UNKNOWN
6505
0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING
6506
0x1DAA0, // 1DAA0 ; UNKNOWN
6507
0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING
6508
0x1DAB0, // 1DAB0..1DFFF; UNKNOWN
6509
0x1E000, // 1E000..1E006; GLAGOLITIC
6510
0x1E007, // 1E007 ; UNKNOWN
6511
0x1E008, // 1E008..1E018; GLAGOLITIC
6512
0x1E019, // 1E019..1E01A; UNKNOWN
6513
0x1E01B, // 1E01B..1E021; GLAGOLITIC
6514
0x1E022, // 1E022 ; UNKNOWN
6515
0x1E023, // 1E023..1E024; GLAGOLITIC
6516
0x1E025, // 1E025 ; UNKNOWN
6517
0x1E026, // 1E026..1E02A; GLAGOLITIC
6518
0x1E02B, // 1E02B..1E0FF; UNKNOWN
6519
0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG
6520
0x1E12D, // 1E12D..1E12F; UNKNOWN
6521
0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG
6522
0x1E13E, // 1E13E..1E13F; UNKNOWN
6523
0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG
6524
0x1E14A, // 1E14A..1E14D; UNKNOWN
6525
0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG
6526
0x1E150, // 1E150..1E2BF; UNKNOWN
6527
0x1E2C0, // 1E2C0..1E2F9; WANCHO
6528
0x1E2FA, // 1E2FA..1E2FE; UNKNOWN
6529
0x1E2FF, // 1E2FF ; WANCHO
6530
0x1E300, // 1E300..1E7FF; UNKNOWN
6531
0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI
6532
0x1E8C5, // 1E8C5..1E8C6; UNKNOWN
6533
0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI
6534
0x1E8D7, // 1E8D7..1E8FF; UNKNOWN
6535
0x1E900, // 1E900..1E94B; ADLAM
6536
0x1E94C, // 1E94C..1E94F; UNKNOWN
6537
0x1E950, // 1E950..1E959; ADLAM
6538
0x1E95A, // 1E95A..1E95D; UNKNOWN
6539
0x1E95E, // 1E95E..1E95F; ADLAM
6540
0x1E960, // 1E960..1EC70; UNKNOWN
6541
0x1EC71, // 1EC71..1ECB4; COMMON
6542
0x1ECB5, // 1ECB5..1ED00; UNKNOWN
6543
0x1ED01, // 1ED01..1ED3D; COMMON
6544
0x1ED3E, // 1ED3E..1EDFF; UNKNOWN
6545
0x1EE00, // 1EE00..1EE03; ARABIC
6546
0x1EE04, // 1EE04 ; UNKNOWN
6547
0x1EE05, // 1EE05..1EE1F; ARABIC
6548
0x1EE20, // 1EE20 ; UNKNOWN
6549
0x1EE21, // 1EE21..1EE22; ARABIC
6550
0x1EE23, // 1EE23 ; UNKNOWN
6551
0x1EE24, // 1EE24 ; ARABIC
6552
0x1EE25, // 1EE25..1EE26; UNKNOWN
6553
0x1EE27, // 1EE27 ; ARABIC
6554
0x1EE28, // 1EE28 ; UNKNOWN
6555
0x1EE29, // 1EE29..1EE32; ARABIC
6556
0x1EE33, // 1EE33 ; UNKNOWN
6557
0x1EE34, // 1EE34..1EE37; ARABIC
6558
0x1EE38, // 1EE38 ; UNKNOWN
6559
0x1EE39, // 1EE39 ; ARABIC
6560
0x1EE3A, // 1EE3A ; UNKNOWN
6561
0x1EE3B, // 1EE3B ; ARABIC
6562
0x1EE3C, // 1EE3C..1EE41; UNKNOWN
6563
0x1EE42, // 1EE42 ; ARABIC
6564
0x1EE43, // 1EE43..1EE46; UNKNOWN
6565
0x1EE47, // 1EE47 ; ARABIC
6566
0x1EE48, // 1EE48 ; UNKNOWN
6567
0x1EE49, // 1EE49 ; ARABIC
6568
0x1EE4A, // 1EE4A ; UNKNOWN
6569
0x1EE4B, // 1EE4B ; ARABIC
6570
0x1EE4C, // 1EE4C ; UNKNOWN
6571
0x1EE4D, // 1EE4D..1EE4F; ARABIC
6572
0x1EE50, // 1EE50 ; UNKNOWN
6573
0x1EE51, // 1EE51..1EE52; ARABIC
6574
0x1EE53, // 1EE53 ; UNKNOWN
6575
0x1EE54, // 1EE54 ; ARABIC
6576
0x1EE55, // 1EE55..1EE56; UNKNOWN
6577
0x1EE57, // 1EE57 ; ARABIC
6578
0x1EE58, // 1EE58 ; UNKNOWN
6579
0x1EE59, // 1EE59 ; ARABIC
6580
0x1EE5A, // 1EE5A ; UNKNOWN
6581
0x1EE5B, // 1EE5B ; ARABIC
6582
0x1EE5C, // 1EE5C ; UNKNOWN
6583
0x1EE5D, // 1EE5D ; ARABIC
6584
0x1EE5E, // 1EE5E ; UNKNOWN
6585
0x1EE5F, // 1EE5F ; ARABIC
6586
0x1EE60, // 1EE60 ; UNKNOWN
6587
0x1EE61, // 1EE61..1EE62; ARABIC
6588
0x1EE63, // 1EE63 ; UNKNOWN
6589
0x1EE64, // 1EE64 ; ARABIC
6590
0x1EE65, // 1EE65..1EE66; UNKNOWN
6591
0x1EE67, // 1EE67..1EE6A; ARABIC
6592
0x1EE6B, // 1EE6B ; UNKNOWN
6593
0x1EE6C, // 1EE6C..1EE72; ARABIC
6594
0x1EE73, // 1EE73 ; UNKNOWN
6595
0x1EE74, // 1EE74..1EE77; ARABIC
6596
0x1EE78, // 1EE78 ; UNKNOWN
6597
0x1EE79, // 1EE79..1EE7C; ARABIC
6598
0x1EE7D, // 1EE7D ; UNKNOWN
6599
0x1EE7E, // 1EE7E ; ARABIC
6600
0x1EE7F, // 1EE7F ; UNKNOWN
6601
0x1EE80, // 1EE80..1EE89; ARABIC
6602
0x1EE8A, // 1EE8A ; UNKNOWN
6603
0x1EE8B, // 1EE8B..1EE9B; ARABIC
6604
0x1EE9C, // 1EE9C..1EEA0; UNKNOWN
6605
0x1EEA1, // 1EEA1..1EEA3; ARABIC
6606
0x1EEA4, // 1EEA4 ; UNKNOWN
6607
0x1EEA5, // 1EEA5..1EEA9; ARABIC
6608
0x1EEAA, // 1EEAA ; UNKNOWN
6609
0x1EEAB, // 1EEAB..1EEBB; ARABIC
6610
0x1EEBC, // 1EEBC..1EEEF; UNKNOWN
6611
0x1EEF0, // 1EEF0..1EEF1; ARABIC
6612
0x1EEF2, // 1EEF2..1EFFF; UNKNOWN
6613
0x1F000, // 1F000..1F02B; COMMON
6614
0x1F02C, // 1F02C..1F02F; UNKNOWN
6615
0x1F030, // 1F030..1F093; COMMON
6616
0x1F094, // 1F094..1F09F; UNKNOWN
6617
0x1F0A0, // 1F0A0..1F0AE; COMMON
6618
0x1F0AF, // 1F0AF..1F0B0; UNKNOWN
6619
0x1F0B1, // 1F0B1..1F0BF; COMMON
6620
0x1F0C0, // 1F0C0 ; UNKNOWN
6621
0x1F0C1, // 1F0C1..1F0CF; COMMON
6622
0x1F0D0, // 1F0D0 ; UNKNOWN
6623
0x1F0D1, // 1F0D1..1F0F5; COMMON
6624
0x1F0F6, // 1F0F6..1F0FF; UNKNOWN
6625
0x1F100, // 1F100..1F1AD; COMMON
6626
0x1F1AE, // 1F1AE..1F1E5; UNKNOWN
6627
0x1F1E6, // 1F1E6..1F1FF; COMMON
6628
0x1F200, // 1F200 ; HIRAGANA
6629
0x1F201, // 1F201..1F202; COMMON
6630
0x1F203, // 1F203..1F20F; UNKNOWN
6631
0x1F210, // 1F210..1F23B; COMMON
6632
0x1F23C, // 1F23C..1F23F; UNKNOWN
6633
0x1F240, // 1F240..1F248; COMMON
6634
0x1F249, // 1F249..1F24F; UNKNOWN
6635
0x1F250, // 1F250..1F251; COMMON
6636
0x1F252, // 1F252..1F25F; UNKNOWN
6637
0x1F260, // 1F260..1F265; COMMON
6638
0x1F266, // 1F266..1F2FF; UNKNOWN
6639
0x1F300, // 1F300..1F6D7; COMMON
6640
0x1F6D8, // 1F6D8..1F6DF; UNKNOWN
6641
0x1F6E0, // 1F6E0..1F6EC; COMMON
6642
0x1F6ED, // 1F6ED..1F6EF; UNKNOWN
6643
0x1F6F0, // 1F6F0..1F6FC; COMMON
6644
0x1F6FD, // 1F6FD..1F6FF; UNKNOWN
6645
0x1F700, // 1F700..1F773; COMMON
6646
0x1F774, // 1F774..1F77F; UNKNOWN
6647
0x1F780, // 1F780..1F7D8; COMMON
6648
0x1F7D9, // 1F7D9..1F7DF; UNKNOWN
6649
0x1F7E0, // 1F7E0..1F7EB; COMMON
6650
0x1F7EC, // 1F7EC..1F7FF; UNKNOWN
6651
0x1F800, // 1F800..1F80B; COMMON
6652
0x1F80C, // 1F80C..1F80F; UNKNOWN
6653
0x1F810, // 1F810..1F847; COMMON
6654
0x1F848, // 1F848..1F84F; UNKNOWN
6655
0x1F850, // 1F850..1F859; COMMON
6656
0x1F85A, // 1F85A..1F85F; UNKNOWN
6657
0x1F860, // 1F860..1F887; COMMON
6658
0x1F888, // 1F888..1F88F; UNKNOWN
6659
0x1F890, // 1F890..1F8AD; COMMON
6660
0x1F8AE, // 1F8AE..1F8AF; UNKNOWN
6661
0x1F8B0, // 1F8B0..1F8B1; COMMON
6662
0x1F8B2, // 1F8B2..1F8FF; UNKNOWN
6663
0x1F900, // 1F900..1F978; COMMON
6664
0x1F979, // 1F979 ; UNKNOWN
6665
0x1F97A, // 1F97A..1F9CB; COMMON
6666
0x1F9CC, // 1F9CC ; UNKNOWN
6667
0x1F9CD, // 1F9CD..1FA53; COMMON
6668
0x1FA54, // 1FA54..1FA5F; UNKNOWN
6669
0x1FA60, // 1FA60..1FA6D; COMMON
6670
0x1FA6E, // 1FA6E..1FA6F; UNKNOWN
6671
0x1FA70, // 1FA70..1FA74; COMMON
6672
0x1FA75, // 1FA75..1FA77; UNKNOWN
6673
0x1FA78, // 1FA78..1FA7A; COMMON
6674
0x1FA7B, // 1FA7B..1FA7F; UNKNOWN
6675
0x1FA80, // 1FA80..1FA86; COMMON
6676
0x1FA87, // 1FA87..1FA8F; UNKNOWN
6677
0x1FA90, // 1FA90..1FAA8; COMMON
6678
0x1FAA9, // 1FAA9..1FAAF; UNKNOWN
6679
0x1FAB0, // 1FAB0..1FAB6; COMMON
6680
0x1FAB7, // 1FAB7..1FABF; UNKNOWN
6681
0x1FAC0, // 1FAC0..1FAC2; COMMON
6682
0x1FAC3, // 1FAC3..1FACF; UNKNOWN
6683
0x1FAD0, // 1FAD0..1FAD6; COMMON
6684
0x1FAD7, // 1FAD7..1FAFF; UNKNOWN
6685
0x1FB00, // 1FB00..1FB92; COMMON
6686
0x1FB93, // 1FB93 ; UNKNOWN
6687
0x1FB94, // 1FB94..1FBCA; COMMON
6688
0x1FBCB, // 1FBCB..1FBEF; UNKNOWN
6689
0x1FBF0, // 1FBF0..1FBF9; COMMON
6690
0x1FBFA, // 1FBFA..1FFFF; UNKNOWN
6691
0x20000, // 20000..2A6DD; HAN
6692
0x2A6DE, // 2A6DE..2A6FF; UNKNOWN
6693
0x2A700, // 2A700..2B734; HAN
6694
0x2B735, // 2B735..2B73F; UNKNOWN
6695
0x2B740, // 2B740..2B81D; HAN
6696
0x2B81E, // 2B81E..2B81F; UNKNOWN
6697
0x2B820, // 2B820..2CEA1; HAN
6698
0x2CEA2, // 2CEA2..2CEAF; UNKNOWN
6699
0x2CEB0, // 2CEB0..2EBE0; HAN
6700
0x2EBE1, // 2EBE1..2F7FF; UNKNOWN
6701
0x2F800, // 2F800..2FA1D; HAN
6702
0x2FA1E, // 2FA1E..2FFFF; UNKNOWN
6703
0x30000, // 30000..3134A; HAN
6704
0x3134B, // 3134B..E0000; UNKNOWN
6705
0xE0001, // E0001 ; COMMON
6706
0xE0002, // E0002..E001F; UNKNOWN
6707
0xE0020, // E0020..E007F; COMMON
6708
0xE0080, // E0080..E00FF; UNKNOWN
6709
0xE0100, // E0100..E01EF; INHERITED
6710
0xE01F0, // E01F0..10FFFF; UNKNOWN
6711
};
6712
6713
private static final UnicodeScript[] scripts = {
6714
COMMON, // 0000..0040
6715
LATIN, // 0041..005A
6716
COMMON, // 005B..0060
6717
LATIN, // 0061..007A
6718
COMMON, // 007B..00A9
6719
LATIN, // 00AA
6720
COMMON, // 00AB..00B9
6721
LATIN, // 00BA
6722
COMMON, // 00BB..00BF
6723
LATIN, // 00C0..00D6
6724
COMMON, // 00D7
6725
LATIN, // 00D8..00F6
6726
COMMON, // 00F7
6727
LATIN, // 00F8..02B8
6728
COMMON, // 02B9..02DF
6729
LATIN, // 02E0..02E4
6730
COMMON, // 02E5..02E9
6731
BOPOMOFO, // 02EA..02EB
6732
COMMON, // 02EC..02FF
6733
INHERITED, // 0300..036F
6734
GREEK, // 0370..0373
6735
COMMON, // 0374
6736
GREEK, // 0375..0377
6737
UNKNOWN, // 0378..0379
6738
GREEK, // 037A..037D
6739
COMMON, // 037E
6740
GREEK, // 037F
6741
UNKNOWN, // 0380..0383
6742
GREEK, // 0384
6743
COMMON, // 0385
6744
GREEK, // 0386
6745
COMMON, // 0387
6746
GREEK, // 0388..038A
6747
UNKNOWN, // 038B
6748
GREEK, // 038C
6749
UNKNOWN, // 038D
6750
GREEK, // 038E..03A1
6751
UNKNOWN, // 03A2
6752
GREEK, // 03A3..03E1
6753
COPTIC, // 03E2..03EF
6754
GREEK, // 03F0..03FF
6755
CYRILLIC, // 0400..0484
6756
INHERITED, // 0485..0486
6757
CYRILLIC, // 0487..052F
6758
UNKNOWN, // 0530
6759
ARMENIAN, // 0531..0556
6760
UNKNOWN, // 0557..0558
6761
ARMENIAN, // 0559..058A
6762
UNKNOWN, // 058B..058C
6763
ARMENIAN, // 058D..058F
6764
UNKNOWN, // 0590
6765
HEBREW, // 0591..05C7
6766
UNKNOWN, // 05C8..05CF
6767
HEBREW, // 05D0..05EA
6768
UNKNOWN, // 05EB..05EE
6769
HEBREW, // 05EF..05F4
6770
UNKNOWN, // 05F5..05FF
6771
ARABIC, // 0600..0604
6772
COMMON, // 0605
6773
ARABIC, // 0606..060B
6774
COMMON, // 060C
6775
ARABIC, // 060D..061A
6776
COMMON, // 061B
6777
ARABIC, // 061C
6778
UNKNOWN, // 061D
6779
ARABIC, // 061E
6780
COMMON, // 061F
6781
ARABIC, // 0620..063F
6782
COMMON, // 0640
6783
ARABIC, // 0641..064A
6784
INHERITED, // 064B..0655
6785
ARABIC, // 0656..066F
6786
INHERITED, // 0670
6787
ARABIC, // 0671..06DC
6788
COMMON, // 06DD
6789
ARABIC, // 06DE..06FF
6790
SYRIAC, // 0700..070D
6791
UNKNOWN, // 070E
6792
SYRIAC, // 070F..074A
6793
UNKNOWN, // 074B..074C
6794
SYRIAC, // 074D..074F
6795
ARABIC, // 0750..077F
6796
THAANA, // 0780..07B1
6797
UNKNOWN, // 07B2..07BF
6798
NKO, // 07C0..07FA
6799
UNKNOWN, // 07FB..07FC
6800
NKO, // 07FD..07FF
6801
SAMARITAN, // 0800..082D
6802
UNKNOWN, // 082E..082F
6803
SAMARITAN, // 0830..083E
6804
UNKNOWN, // 083F
6805
MANDAIC, // 0840..085B
6806
UNKNOWN, // 085C..085D
6807
MANDAIC, // 085E
6808
UNKNOWN, // 085F
6809
SYRIAC, // 0860..086A
6810
UNKNOWN, // 086B..089F
6811
ARABIC, // 08A0..08B4
6812
UNKNOWN, // 08B5
6813
ARABIC, // 08B6..08C7
6814
UNKNOWN, // 08C8..08D2
6815
ARABIC, // 08D3..08E1
6816
COMMON, // 08E2
6817
ARABIC, // 08E3..08FF
6818
DEVANAGARI, // 0900..0950
6819
INHERITED, // 0951..0954
6820
DEVANAGARI, // 0955..0963
6821
COMMON, // 0964..0965
6822
DEVANAGARI, // 0966..097F
6823
BENGALI, // 0980..0983
6824
UNKNOWN, // 0984
6825
BENGALI, // 0985..098C
6826
UNKNOWN, // 098D..098E
6827
BENGALI, // 098F..0990
6828
UNKNOWN, // 0991..0992
6829
BENGALI, // 0993..09A8
6830
UNKNOWN, // 09A9
6831
BENGALI, // 09AA..09B0
6832
UNKNOWN, // 09B1
6833
BENGALI, // 09B2
6834
UNKNOWN, // 09B3..09B5
6835
BENGALI, // 09B6..09B9
6836
UNKNOWN, // 09BA..09BB
6837
BENGALI, // 09BC..09C4
6838
UNKNOWN, // 09C5..09C6
6839
BENGALI, // 09C7..09C8
6840
UNKNOWN, // 09C9..09CA
6841
BENGALI, // 09CB..09CE
6842
UNKNOWN, // 09CF..09D6
6843
BENGALI, // 09D7
6844
UNKNOWN, // 09D8..09DB
6845
BENGALI, // 09DC..09DD
6846
UNKNOWN, // 09DE
6847
BENGALI, // 09DF..09E3
6848
UNKNOWN, // 09E4..09E5
6849
BENGALI, // 09E6..09FE
6850
UNKNOWN, // 09FF..0A00
6851
GURMUKHI, // 0A01..0A03
6852
UNKNOWN, // 0A04
6853
GURMUKHI, // 0A05..0A0A
6854
UNKNOWN, // 0A0B..0A0E
6855
GURMUKHI, // 0A0F..0A10
6856
UNKNOWN, // 0A11..0A12
6857
GURMUKHI, // 0A13..0A28
6858
UNKNOWN, // 0A29
6859
GURMUKHI, // 0A2A..0A30
6860
UNKNOWN, // 0A31
6861
GURMUKHI, // 0A32..0A33
6862
UNKNOWN, // 0A34
6863
GURMUKHI, // 0A35..0A36
6864
UNKNOWN, // 0A37
6865
GURMUKHI, // 0A38..0A39
6866
UNKNOWN, // 0A3A..0A3B
6867
GURMUKHI, // 0A3C
6868
UNKNOWN, // 0A3D
6869
GURMUKHI, // 0A3E..0A42
6870
UNKNOWN, // 0A43..0A46
6871
GURMUKHI, // 0A47..0A48
6872
UNKNOWN, // 0A49..0A4A
6873
GURMUKHI, // 0A4B..0A4D
6874
UNKNOWN, // 0A4E..0A50
6875
GURMUKHI, // 0A51
6876
UNKNOWN, // 0A52..0A58
6877
GURMUKHI, // 0A59..0A5C
6878
UNKNOWN, // 0A5D
6879
GURMUKHI, // 0A5E
6880
UNKNOWN, // 0A5F..0A65
6881
GURMUKHI, // 0A66..0A76
6882
UNKNOWN, // 0A77..0A80
6883
GUJARATI, // 0A81..0A83
6884
UNKNOWN, // 0A84
6885
GUJARATI, // 0A85..0A8D
6886
UNKNOWN, // 0A8E
6887
GUJARATI, // 0A8F..0A91
6888
UNKNOWN, // 0A92
6889
GUJARATI, // 0A93..0AA8
6890
UNKNOWN, // 0AA9
6891
GUJARATI, // 0AAA..0AB0
6892
UNKNOWN, // 0AB1
6893
GUJARATI, // 0AB2..0AB3
6894
UNKNOWN, // 0AB4
6895
GUJARATI, // 0AB5..0AB9
6896
UNKNOWN, // 0ABA..0ABB
6897
GUJARATI, // 0ABC..0AC5
6898
UNKNOWN, // 0AC6
6899
GUJARATI, // 0AC7..0AC9
6900
UNKNOWN, // 0ACA
6901
GUJARATI, // 0ACB..0ACD
6902
UNKNOWN, // 0ACE..0ACF
6903
GUJARATI, // 0AD0
6904
UNKNOWN, // 0AD1..0ADF
6905
GUJARATI, // 0AE0..0AE3
6906
UNKNOWN, // 0AE4..0AE5
6907
GUJARATI, // 0AE6..0AF1
6908
UNKNOWN, // 0AF2..0AF8
6909
GUJARATI, // 0AF9..0AFF
6910
UNKNOWN, // 0B00
6911
ORIYA, // 0B01..0B03
6912
UNKNOWN, // 0B04
6913
ORIYA, // 0B05..0B0C
6914
UNKNOWN, // 0B0D..0B0E
6915
ORIYA, // 0B0F..0B10
6916
UNKNOWN, // 0B11..0B12
6917
ORIYA, // 0B13..0B28
6918
UNKNOWN, // 0B29
6919
ORIYA, // 0B2A..0B30
6920
UNKNOWN, // 0B31
6921
ORIYA, // 0B32..0B33
6922
UNKNOWN, // 0B34
6923
ORIYA, // 0B35..0B39
6924
UNKNOWN, // 0B3A..0B3B
6925
ORIYA, // 0B3C..0B44
6926
UNKNOWN, // 0B45..0B46
6927
ORIYA, // 0B47..0B48
6928
UNKNOWN, // 0B49..0B4A
6929
ORIYA, // 0B4B..0B4D
6930
UNKNOWN, // 0B4E..0B54
6931
ORIYA, // 0B55..0B57
6932
UNKNOWN, // 0B58..0B5B
6933
ORIYA, // 0B5C..0B5D
6934
UNKNOWN, // 0B5E
6935
ORIYA, // 0B5F..0B63
6936
UNKNOWN, // 0B64..0B65
6937
ORIYA, // 0B66..0B77
6938
UNKNOWN, // 0B78..0B81
6939
TAMIL, // 0B82..0B83
6940
UNKNOWN, // 0B84
6941
TAMIL, // 0B85..0B8A
6942
UNKNOWN, // 0B8B..0B8D
6943
TAMIL, // 0B8E..0B90
6944
UNKNOWN, // 0B91
6945
TAMIL, // 0B92..0B95
6946
UNKNOWN, // 0B96..0B98
6947
TAMIL, // 0B99..0B9A
6948
UNKNOWN, // 0B9B
6949
TAMIL, // 0B9C
6950
UNKNOWN, // 0B9D
6951
TAMIL, // 0B9E..0B9F
6952
UNKNOWN, // 0BA0..0BA2
6953
TAMIL, // 0BA3..0BA4
6954
UNKNOWN, // 0BA5..0BA7
6955
TAMIL, // 0BA8..0BAA
6956
UNKNOWN, // 0BAB..0BAD
6957
TAMIL, // 0BAE..0BB9
6958
UNKNOWN, // 0BBA..0BBD
6959
TAMIL, // 0BBE..0BC2
6960
UNKNOWN, // 0BC3..0BC5
6961
TAMIL, // 0BC6..0BC8
6962
UNKNOWN, // 0BC9
6963
TAMIL, // 0BCA..0BCD
6964
UNKNOWN, // 0BCE..0BCF
6965
TAMIL, // 0BD0
6966
UNKNOWN, // 0BD1..0BD6
6967
TAMIL, // 0BD7
6968
UNKNOWN, // 0BD8..0BE5
6969
TAMIL, // 0BE6..0BFA
6970
UNKNOWN, // 0BFB..0BFF
6971
TELUGU, // 0C00..0C0C
6972
UNKNOWN, // 0C0D
6973
TELUGU, // 0C0E..0C10
6974
UNKNOWN, // 0C11
6975
TELUGU, // 0C12..0C28
6976
UNKNOWN, // 0C29
6977
TELUGU, // 0C2A..0C39
6978
UNKNOWN, // 0C3A..0C3C
6979
TELUGU, // 0C3D..0C44
6980
UNKNOWN, // 0C45
6981
TELUGU, // 0C46..0C48
6982
UNKNOWN, // 0C49
6983
TELUGU, // 0C4A..0C4D
6984
UNKNOWN, // 0C4E..0C54
6985
TELUGU, // 0C55..0C56
6986
UNKNOWN, // 0C57
6987
TELUGU, // 0C58..0C5A
6988
UNKNOWN, // 0C5B..0C5F
6989
TELUGU, // 0C60..0C63
6990
UNKNOWN, // 0C64..0C65
6991
TELUGU, // 0C66..0C6F
6992
UNKNOWN, // 0C70..0C76
6993
TELUGU, // 0C77..0C7F
6994
KANNADA, // 0C80..0C8C
6995
UNKNOWN, // 0C8D
6996
KANNADA, // 0C8E..0C90
6997
UNKNOWN, // 0C91
6998
KANNADA, // 0C92..0CA8
6999
UNKNOWN, // 0CA9
7000
KANNADA, // 0CAA..0CB3
7001
UNKNOWN, // 0CB4
7002
KANNADA, // 0CB5..0CB9
7003
UNKNOWN, // 0CBA..0CBB
7004
KANNADA, // 0CBC..0CC4
7005
UNKNOWN, // 0CC5
7006
KANNADA, // 0CC6..0CC8
7007
UNKNOWN, // 0CC9
7008
KANNADA, // 0CCA..0CCD
7009
UNKNOWN, // 0CCE..0CD4
7010
KANNADA, // 0CD5..0CD6
7011
UNKNOWN, // 0CD7..0CDD
7012
KANNADA, // 0CDE
7013
UNKNOWN, // 0CDF
7014
KANNADA, // 0CE0..0CE3
7015
UNKNOWN, // 0CE4..0CE5
7016
KANNADA, // 0CE6..0CEF
7017
UNKNOWN, // 0CF0
7018
KANNADA, // 0CF1..0CF2
7019
UNKNOWN, // 0CF3..0CFF
7020
MALAYALAM, // 0D00..0D0C
7021
UNKNOWN, // 0D0D
7022
MALAYALAM, // 0D0E..0D10
7023
UNKNOWN, // 0D11
7024
MALAYALAM, // 0D12..0D44
7025
UNKNOWN, // 0D45
7026
MALAYALAM, // 0D46..0D48
7027
UNKNOWN, // 0D49
7028
MALAYALAM, // 0D4A..0D4F
7029
UNKNOWN, // 0D50..0D53
7030
MALAYALAM, // 0D54..0D63
7031
UNKNOWN, // 0D64..0D65
7032
MALAYALAM, // 0D66..0D7F
7033
UNKNOWN, // 0D80
7034
SINHALA, // 0D81..0D83
7035
UNKNOWN, // 0D84
7036
SINHALA, // 0D85..0D96
7037
UNKNOWN, // 0D97..0D99
7038
SINHALA, // 0D9A..0DB1
7039
UNKNOWN, // 0DB2
7040
SINHALA, // 0DB3..0DBB
7041
UNKNOWN, // 0DBC
7042
SINHALA, // 0DBD
7043
UNKNOWN, // 0DBE..0DBF
7044
SINHALA, // 0DC0..0DC6
7045
UNKNOWN, // 0DC7..0DC9
7046
SINHALA, // 0DCA
7047
UNKNOWN, // 0DCB..0DCE
7048
SINHALA, // 0DCF..0DD4
7049
UNKNOWN, // 0DD5
7050
SINHALA, // 0DD6
7051
UNKNOWN, // 0DD7
7052
SINHALA, // 0DD8..0DDF
7053
UNKNOWN, // 0DE0..0DE5
7054
SINHALA, // 0DE6..0DEF
7055
UNKNOWN, // 0DF0..0DF1
7056
SINHALA, // 0DF2..0DF4
7057
UNKNOWN, // 0DF5..0E00
7058
THAI, // 0E01..0E3A
7059
UNKNOWN, // 0E3B..0E3E
7060
COMMON, // 0E3F
7061
THAI, // 0E40..0E5B
7062
UNKNOWN, // 0E5C..0E80
7063
LAO, // 0E81..0E82
7064
UNKNOWN, // 0E83
7065
LAO, // 0E84
7066
UNKNOWN, // 0E85
7067
LAO, // 0E86..0E8A
7068
UNKNOWN, // 0E8B
7069
LAO, // 0E8C..0EA3
7070
UNKNOWN, // 0EA4
7071
LAO, // 0EA5
7072
UNKNOWN, // 0EA6
7073
LAO, // 0EA7..0EBD
7074
UNKNOWN, // 0EBE..0EBF
7075
LAO, // 0EC0..0EC4
7076
UNKNOWN, // 0EC5
7077
LAO, // 0EC6
7078
UNKNOWN, // 0EC7
7079
LAO, // 0EC8..0ECD
7080
UNKNOWN, // 0ECE..0ECF
7081
LAO, // 0ED0..0ED9
7082
UNKNOWN, // 0EDA..0EDB
7083
LAO, // 0EDC..0EDF
7084
UNKNOWN, // 0EE0..0EFF
7085
TIBETAN, // 0F00..0F47
7086
UNKNOWN, // 0F48
7087
TIBETAN, // 0F49..0F6C
7088
UNKNOWN, // 0F6D..0F70
7089
TIBETAN, // 0F71..0F97
7090
UNKNOWN, // 0F98
7091
TIBETAN, // 0F99..0FBC
7092
UNKNOWN, // 0FBD
7093
TIBETAN, // 0FBE..0FCC
7094
UNKNOWN, // 0FCD
7095
TIBETAN, // 0FCE..0FD4
7096
COMMON, // 0FD5..0FD8
7097
TIBETAN, // 0FD9..0FDA
7098
UNKNOWN, // 0FDB..0FFF
7099
MYANMAR, // 1000..109F
7100
GEORGIAN, // 10A0..10C5
7101
UNKNOWN, // 10C6
7102
GEORGIAN, // 10C7
7103
UNKNOWN, // 10C8..10CC
7104
GEORGIAN, // 10CD
7105
UNKNOWN, // 10CE..10CF
7106
GEORGIAN, // 10D0..10FA
7107
COMMON, // 10FB
7108
GEORGIAN, // 10FC..10FF
7109
HANGUL, // 1100..11FF
7110
ETHIOPIC, // 1200..1248
7111
UNKNOWN, // 1249
7112
ETHIOPIC, // 124A..124D
7113
UNKNOWN, // 124E..124F
7114
ETHIOPIC, // 1250..1256
7115
UNKNOWN, // 1257
7116
ETHIOPIC, // 1258
7117
UNKNOWN, // 1259
7118
ETHIOPIC, // 125A..125D
7119
UNKNOWN, // 125E..125F
7120
ETHIOPIC, // 1260..1288
7121
UNKNOWN, // 1289
7122
ETHIOPIC, // 128A..128D
7123
UNKNOWN, // 128E..128F
7124
ETHIOPIC, // 1290..12B0
7125
UNKNOWN, // 12B1
7126
ETHIOPIC, // 12B2..12B5
7127
UNKNOWN, // 12B6..12B7
7128
ETHIOPIC, // 12B8..12BE
7129
UNKNOWN, // 12BF
7130
ETHIOPIC, // 12C0
7131
UNKNOWN, // 12C1
7132
ETHIOPIC, // 12C2..12C5
7133
UNKNOWN, // 12C6..12C7
7134
ETHIOPIC, // 12C8..12D6
7135
UNKNOWN, // 12D7
7136
ETHIOPIC, // 12D8..1310
7137
UNKNOWN, // 1311
7138
ETHIOPIC, // 1312..1315
7139
UNKNOWN, // 1316..1317
7140
ETHIOPIC, // 1318..135A
7141
UNKNOWN, // 135B..135C
7142
ETHIOPIC, // 135D..137C
7143
UNKNOWN, // 137D..137F
7144
ETHIOPIC, // 1380..1399
7145
UNKNOWN, // 139A..139F
7146
CHEROKEE, // 13A0..13F5
7147
UNKNOWN, // 13F6..13F7
7148
CHEROKEE, // 13F8..13FD
7149
UNKNOWN, // 13FE..13FF
7150
CANADIAN_ABORIGINAL, // 1400..167F
7151
OGHAM, // 1680..169C
7152
UNKNOWN, // 169D..169F
7153
RUNIC, // 16A0..16EA
7154
COMMON, // 16EB..16ED
7155
RUNIC, // 16EE..16F8
7156
UNKNOWN, // 16F9..16FF
7157
TAGALOG, // 1700..170C
7158
UNKNOWN, // 170D
7159
TAGALOG, // 170E..1714
7160
UNKNOWN, // 1715..171F
7161
HANUNOO, // 1720..1734
7162
COMMON, // 1735..1736
7163
UNKNOWN, // 1737..173F
7164
BUHID, // 1740..1753
7165
UNKNOWN, // 1754..175F
7166
TAGBANWA, // 1760..176C
7167
UNKNOWN, // 176D
7168
TAGBANWA, // 176E..1770
7169
UNKNOWN, // 1771
7170
TAGBANWA, // 1772..1773
7171
UNKNOWN, // 1774..177F
7172
KHMER, // 1780..17DD
7173
UNKNOWN, // 17DE..17DF
7174
KHMER, // 17E0..17E9
7175
UNKNOWN, // 17EA..17EF
7176
KHMER, // 17F0..17F9
7177
UNKNOWN, // 17FA..17FF
7178
MONGOLIAN, // 1800..1801
7179
COMMON, // 1802..1803
7180
MONGOLIAN, // 1804
7181
COMMON, // 1805
7182
MONGOLIAN, // 1806..180E
7183
UNKNOWN, // 180F
7184
MONGOLIAN, // 1810..1819
7185
UNKNOWN, // 181A..181F
7186
MONGOLIAN, // 1820..1878
7187
UNKNOWN, // 1879..187F
7188
MONGOLIAN, // 1880..18AA
7189
UNKNOWN, // 18AB..18AF
7190
CANADIAN_ABORIGINAL, // 18B0..18F5
7191
UNKNOWN, // 18F6..18FF
7192
LIMBU, // 1900..191E
7193
UNKNOWN, // 191F
7194
LIMBU, // 1920..192B
7195
UNKNOWN, // 192C..192F
7196
LIMBU, // 1930..193B
7197
UNKNOWN, // 193C..193F
7198
LIMBU, // 1940
7199
UNKNOWN, // 1941..1943
7200
LIMBU, // 1944..194F
7201
TAI_LE, // 1950..196D
7202
UNKNOWN, // 196E..196F
7203
TAI_LE, // 1970..1974
7204
UNKNOWN, // 1975..197F
7205
NEW_TAI_LUE, // 1980..19AB
7206
UNKNOWN, // 19AC..19AF
7207
NEW_TAI_LUE, // 19B0..19C9
7208
UNKNOWN, // 19CA..19CF
7209
NEW_TAI_LUE, // 19D0..19DA
7210
UNKNOWN, // 19DB..19DD
7211
NEW_TAI_LUE, // 19DE..19DF
7212
KHMER, // 19E0..19FF
7213
BUGINESE, // 1A00..1A1B
7214
UNKNOWN, // 1A1C..1A1D
7215
BUGINESE, // 1A1E..1A1F
7216
TAI_THAM, // 1A20..1A5E
7217
UNKNOWN, // 1A5F
7218
TAI_THAM, // 1A60..1A7C
7219
UNKNOWN, // 1A7D..1A7E
7220
TAI_THAM, // 1A7F..1A89
7221
UNKNOWN, // 1A8A..1A8F
7222
TAI_THAM, // 1A90..1A99
7223
UNKNOWN, // 1A9A..1A9F
7224
TAI_THAM, // 1AA0..1AAD
7225
UNKNOWN, // 1AAE..1AAF
7226
INHERITED, // 1AB0..1AC0
7227
UNKNOWN, // 1AC1..1AFF
7228
BALINESE, // 1B00..1B4B
7229
UNKNOWN, // 1B4C..1B4F
7230
BALINESE, // 1B50..1B7C
7231
UNKNOWN, // 1B7D..1B7F
7232
SUNDANESE, // 1B80..1BBF
7233
BATAK, // 1BC0..1BF3
7234
UNKNOWN, // 1BF4..1BFB
7235
BATAK, // 1BFC..1BFF
7236
LEPCHA, // 1C00..1C37
7237
UNKNOWN, // 1C38..1C3A
7238
LEPCHA, // 1C3B..1C49
7239
UNKNOWN, // 1C4A..1C4C
7240
LEPCHA, // 1C4D..1C4F
7241
OL_CHIKI, // 1C50..1C7F
7242
CYRILLIC, // 1C80..1C88
7243
UNKNOWN, // 1C89..1C8F
7244
GEORGIAN, // 1C90..1CBA
7245
UNKNOWN, // 1CBB..1CBC
7246
GEORGIAN, // 1CBD..1CBF
7247
SUNDANESE, // 1CC0..1CC7
7248
UNKNOWN, // 1CC8..1CCF
7249
INHERITED, // 1CD0..1CD2
7250
COMMON, // 1CD3
7251
INHERITED, // 1CD4..1CE0
7252
COMMON, // 1CE1
7253
INHERITED, // 1CE2..1CE8
7254
COMMON, // 1CE9..1CEC
7255
INHERITED, // 1CED
7256
COMMON, // 1CEE..1CF3
7257
INHERITED, // 1CF4
7258
COMMON, // 1CF5..1CF7
7259
INHERITED, // 1CF8..1CF9
7260
COMMON, // 1CFA
7261
UNKNOWN, // 1CFB..1CFF
7262
LATIN, // 1D00..1D25
7263
GREEK, // 1D26..1D2A
7264
CYRILLIC, // 1D2B
7265
LATIN, // 1D2C..1D5C
7266
GREEK, // 1D5D..1D61
7267
LATIN, // 1D62..1D65
7268
GREEK, // 1D66..1D6A
7269
LATIN, // 1D6B..1D77
7270
CYRILLIC, // 1D78
7271
LATIN, // 1D79..1DBE
7272
GREEK, // 1DBF
7273
INHERITED, // 1DC0..1DF9
7274
UNKNOWN, // 1DFA
7275
INHERITED, // 1DFB..1DFF
7276
LATIN, // 1E00..1EFF
7277
GREEK, // 1F00..1F15
7278
UNKNOWN, // 1F16..1F17
7279
GREEK, // 1F18..1F1D
7280
UNKNOWN, // 1F1E..1F1F
7281
GREEK, // 1F20..1F45
7282
UNKNOWN, // 1F46..1F47
7283
GREEK, // 1F48..1F4D
7284
UNKNOWN, // 1F4E..1F4F
7285
GREEK, // 1F50..1F57
7286
UNKNOWN, // 1F58
7287
GREEK, // 1F59
7288
UNKNOWN, // 1F5A
7289
GREEK, // 1F5B
7290
UNKNOWN, // 1F5C
7291
GREEK, // 1F5D
7292
UNKNOWN, // 1F5E
7293
GREEK, // 1F5F..1F7D
7294
UNKNOWN, // 1F7E..1F7F
7295
GREEK, // 1F80..1FB4
7296
UNKNOWN, // 1FB5
7297
GREEK, // 1FB6..1FC4
7298
UNKNOWN, // 1FC5
7299
GREEK, // 1FC6..1FD3
7300
UNKNOWN, // 1FD4..1FD5
7301
GREEK, // 1FD6..1FDB
7302
UNKNOWN, // 1FDC
7303
GREEK, // 1FDD..1FEF
7304
UNKNOWN, // 1FF0..1FF1
7305
GREEK, // 1FF2..1FF4
7306
UNKNOWN, // 1FF5
7307
GREEK, // 1FF6..1FFE
7308
UNKNOWN, // 1FFF
7309
COMMON, // 2000..200B
7310
INHERITED, // 200C..200D
7311
COMMON, // 200E..2064
7312
UNKNOWN, // 2065
7313
COMMON, // 2066..2070
7314
LATIN, // 2071
7315
UNKNOWN, // 2072..2073
7316
COMMON, // 2074..207E
7317
LATIN, // 207F
7318
COMMON, // 2080..208E
7319
UNKNOWN, // 208F
7320
LATIN, // 2090..209C
7321
UNKNOWN, // 209D..209F
7322
COMMON, // 20A0..20BF
7323
UNKNOWN, // 20C0..20CF
7324
INHERITED, // 20D0..20F0
7325
UNKNOWN, // 20F1..20FF
7326
COMMON, // 2100..2125
7327
GREEK, // 2126
7328
COMMON, // 2127..2129
7329
LATIN, // 212A..212B
7330
COMMON, // 212C..2131
7331
LATIN, // 2132
7332
COMMON, // 2133..214D
7333
LATIN, // 214E
7334
COMMON, // 214F..215F
7335
LATIN, // 2160..2188
7336
COMMON, // 2189..218B
7337
UNKNOWN, // 218C..218F
7338
COMMON, // 2190..2426
7339
UNKNOWN, // 2427..243F
7340
COMMON, // 2440..244A
7341
UNKNOWN, // 244B..245F
7342
COMMON, // 2460..27FF
7343
BRAILLE, // 2800..28FF
7344
COMMON, // 2900..2B73
7345
UNKNOWN, // 2B74..2B75
7346
COMMON, // 2B76..2B95
7347
UNKNOWN, // 2B96
7348
COMMON, // 2B97..2BFF
7349
GLAGOLITIC, // 2C00..2C2E
7350
UNKNOWN, // 2C2F
7351
GLAGOLITIC, // 2C30..2C5E
7352
UNKNOWN, // 2C5F
7353
LATIN, // 2C60..2C7F
7354
COPTIC, // 2C80..2CF3
7355
UNKNOWN, // 2CF4..2CF8
7356
COPTIC, // 2CF9..2CFF
7357
GEORGIAN, // 2D00..2D25
7358
UNKNOWN, // 2D26
7359
GEORGIAN, // 2D27
7360
UNKNOWN, // 2D28..2D2C
7361
GEORGIAN, // 2D2D
7362
UNKNOWN, // 2D2E..2D2F
7363
TIFINAGH, // 2D30..2D67
7364
UNKNOWN, // 2D68..2D6E
7365
TIFINAGH, // 2D6F..2D70
7366
UNKNOWN, // 2D71..2D7E
7367
TIFINAGH, // 2D7F
7368
ETHIOPIC, // 2D80..2D96
7369
UNKNOWN, // 2D97..2D9F
7370
ETHIOPIC, // 2DA0..2DA6
7371
UNKNOWN, // 2DA7
7372
ETHIOPIC, // 2DA8..2DAE
7373
UNKNOWN, // 2DAF
7374
ETHIOPIC, // 2DB0..2DB6
7375
UNKNOWN, // 2DB7
7376
ETHIOPIC, // 2DB8..2DBE
7377
UNKNOWN, // 2DBF
7378
ETHIOPIC, // 2DC0..2DC6
7379
UNKNOWN, // 2DC7
7380
ETHIOPIC, // 2DC8..2DCE
7381
UNKNOWN, // 2DCF
7382
ETHIOPIC, // 2DD0..2DD6
7383
UNKNOWN, // 2DD7
7384
ETHIOPIC, // 2DD8..2DDE
7385
UNKNOWN, // 2DDF
7386
CYRILLIC, // 2DE0..2DFF
7387
COMMON, // 2E00..2E52
7388
UNKNOWN, // 2E53..2E7F
7389
HAN, // 2E80..2E99
7390
UNKNOWN, // 2E9A
7391
HAN, // 2E9B..2EF3
7392
UNKNOWN, // 2EF4..2EFF
7393
HAN, // 2F00..2FD5
7394
UNKNOWN, // 2FD6..2FEF
7395
COMMON, // 2FF0..2FFB
7396
UNKNOWN, // 2FFC..2FFF
7397
COMMON, // 3000..3004
7398
HAN, // 3005
7399
COMMON, // 3006
7400
HAN, // 3007
7401
COMMON, // 3008..3020
7402
HAN, // 3021..3029
7403
INHERITED, // 302A..302D
7404
HANGUL, // 302E..302F
7405
COMMON, // 3030..3037
7406
HAN, // 3038..303B
7407
COMMON, // 303C..303F
7408
UNKNOWN, // 3040
7409
HIRAGANA, // 3041..3096
7410
UNKNOWN, // 3097..3098
7411
INHERITED, // 3099..309A
7412
COMMON, // 309B..309C
7413
HIRAGANA, // 309D..309F
7414
COMMON, // 30A0
7415
KATAKANA, // 30A1..30FA
7416
COMMON, // 30FB..30FC
7417
KATAKANA, // 30FD..30FF
7418
UNKNOWN, // 3100..3104
7419
BOPOMOFO, // 3105..312F
7420
UNKNOWN, // 3130
7421
HANGUL, // 3131..318E
7422
UNKNOWN, // 318F
7423
COMMON, // 3190..319F
7424
BOPOMOFO, // 31A0..31BF
7425
COMMON, // 31C0..31E3
7426
UNKNOWN, // 31E4..31EF
7427
KATAKANA, // 31F0..31FF
7428
HANGUL, // 3200..321E
7429
UNKNOWN, // 321F
7430
COMMON, // 3220..325F
7431
HANGUL, // 3260..327E
7432
COMMON, // 327F..32CF
7433
KATAKANA, // 32D0..32FE
7434
COMMON, // 32FF
7435
KATAKANA, // 3300..3357
7436
COMMON, // 3358..33FF
7437
HAN, // 3400..4DBF
7438
COMMON, // 4DC0..4DFF
7439
HAN, // 4E00..9FFC
7440
UNKNOWN, // 9FFD..9FFF
7441
YI, // A000..A48C
7442
UNKNOWN, // A48D..A48F
7443
YI, // A490..A4C6
7444
UNKNOWN, // A4C7..A4CF
7445
LISU, // A4D0..A4FF
7446
VAI, // A500..A62B
7447
UNKNOWN, // A62C..A63F
7448
CYRILLIC, // A640..A69F
7449
BAMUM, // A6A0..A6F7
7450
UNKNOWN, // A6F8..A6FF
7451
COMMON, // A700..A721
7452
LATIN, // A722..A787
7453
COMMON, // A788..A78A
7454
LATIN, // A78B..A7BF
7455
UNKNOWN, // A7C0..A7C1
7456
LATIN, // A7C2..A7CA
7457
UNKNOWN, // A7CB..A7F4
7458
LATIN, // A7F5..A7FF
7459
SYLOTI_NAGRI, // A800..A82C
7460
UNKNOWN, // A82D..A82F
7461
COMMON, // A830..A839
7462
UNKNOWN, // A83A..A83F
7463
PHAGS_PA, // A840..A877
7464
UNKNOWN, // A878..A87F
7465
SAURASHTRA, // A880..A8C5
7466
UNKNOWN, // A8C6..A8CD
7467
SAURASHTRA, // A8CE..A8D9
7468
UNKNOWN, // A8DA..A8DF
7469
DEVANAGARI, // A8E0..A8FF
7470
KAYAH_LI, // A900..A92D
7471
COMMON, // A92E
7472
KAYAH_LI, // A92F
7473
REJANG, // A930..A953
7474
UNKNOWN, // A954..A95E
7475
REJANG, // A95F
7476
HANGUL, // A960..A97C
7477
UNKNOWN, // A97D..A97F
7478
JAVANESE, // A980..A9CD
7479
UNKNOWN, // A9CE
7480
COMMON, // A9CF
7481
JAVANESE, // A9D0..A9D9
7482
UNKNOWN, // A9DA..A9DD
7483
JAVANESE, // A9DE..A9DF
7484
MYANMAR, // A9E0..A9FE
7485
UNKNOWN, // A9FF
7486
CHAM, // AA00..AA36
7487
UNKNOWN, // AA37..AA3F
7488
CHAM, // AA40..AA4D
7489
UNKNOWN, // AA4E..AA4F
7490
CHAM, // AA50..AA59
7491
UNKNOWN, // AA5A..AA5B
7492
CHAM, // AA5C..AA5F
7493
MYANMAR, // AA60..AA7F
7494
TAI_VIET, // AA80..AAC2
7495
UNKNOWN, // AAC3..AADA
7496
TAI_VIET, // AADB..AADF
7497
MEETEI_MAYEK, // AAE0..AAF6
7498
UNKNOWN, // AAF7..AB00
7499
ETHIOPIC, // AB01..AB06
7500
UNKNOWN, // AB07..AB08
7501
ETHIOPIC, // AB09..AB0E
7502
UNKNOWN, // AB0F..AB10
7503
ETHIOPIC, // AB11..AB16
7504
UNKNOWN, // AB17..AB1F
7505
ETHIOPIC, // AB20..AB26
7506
UNKNOWN, // AB27
7507
ETHIOPIC, // AB28..AB2E
7508
UNKNOWN, // AB2F
7509
LATIN, // AB30..AB5A
7510
COMMON, // AB5B
7511
LATIN, // AB5C..AB64
7512
GREEK, // AB65
7513
LATIN, // AB66..AB69
7514
COMMON, // AB6A..AB6B
7515
UNKNOWN, // AB6C..AB6F
7516
CHEROKEE, // AB70..ABBF
7517
MEETEI_MAYEK, // ABC0..ABED
7518
UNKNOWN, // ABEE..ABEF
7519
MEETEI_MAYEK, // ABF0..ABF9
7520
UNKNOWN, // ABFA..ABFF
7521
HANGUL, // AC00..D7A3
7522
UNKNOWN, // D7A4..D7AF
7523
HANGUL, // D7B0..D7C6
7524
UNKNOWN, // D7C7..D7CA
7525
HANGUL, // D7CB..D7FB
7526
UNKNOWN, // D7FC..F8FF
7527
HAN, // F900..FA6D
7528
UNKNOWN, // FA6E..FA6F
7529
HAN, // FA70..FAD9
7530
UNKNOWN, // FADA..FAFF
7531
LATIN, // FB00..FB06
7532
UNKNOWN, // FB07..FB12
7533
ARMENIAN, // FB13..FB17
7534
UNKNOWN, // FB18..FB1C
7535
HEBREW, // FB1D..FB36
7536
UNKNOWN, // FB37
7537
HEBREW, // FB38..FB3C
7538
UNKNOWN, // FB3D
7539
HEBREW, // FB3E
7540
UNKNOWN, // FB3F
7541
HEBREW, // FB40..FB41
7542
UNKNOWN, // FB42
7543
HEBREW, // FB43..FB44
7544
UNKNOWN, // FB45
7545
HEBREW, // FB46..FB4F
7546
ARABIC, // FB50..FBC1
7547
UNKNOWN, // FBC2..FBD2
7548
ARABIC, // FBD3..FD3D
7549
COMMON, // FD3E..FD3F
7550
UNKNOWN, // FD40..FD4F
7551
ARABIC, // FD50..FD8F
7552
UNKNOWN, // FD90..FD91
7553
ARABIC, // FD92..FDC7
7554
UNKNOWN, // FDC8..FDEF
7555
ARABIC, // FDF0..FDFD
7556
UNKNOWN, // FDFE..FDFF
7557
INHERITED, // FE00..FE0F
7558
COMMON, // FE10..FE19
7559
UNKNOWN, // FE1A..FE1F
7560
INHERITED, // FE20..FE2D
7561
CYRILLIC, // FE2E..FE2F
7562
COMMON, // FE30..FE52
7563
UNKNOWN, // FE53
7564
COMMON, // FE54..FE66
7565
UNKNOWN, // FE67
7566
COMMON, // FE68..FE6B
7567
UNKNOWN, // FE6C..FE6F
7568
ARABIC, // FE70..FE74
7569
UNKNOWN, // FE75
7570
ARABIC, // FE76..FEFC
7571
UNKNOWN, // FEFD..FEFE
7572
COMMON, // FEFF
7573
UNKNOWN, // FF00
7574
COMMON, // FF01..FF20
7575
LATIN, // FF21..FF3A
7576
COMMON, // FF3B..FF40
7577
LATIN, // FF41..FF5A
7578
COMMON, // FF5B..FF65
7579
KATAKANA, // FF66..FF6F
7580
COMMON, // FF70
7581
KATAKANA, // FF71..FF9D
7582
COMMON, // FF9E..FF9F
7583
HANGUL, // FFA0..FFBE
7584
UNKNOWN, // FFBF..FFC1
7585
HANGUL, // FFC2..FFC7
7586
UNKNOWN, // FFC8..FFC9
7587
HANGUL, // FFCA..FFCF
7588
UNKNOWN, // FFD0..FFD1
7589
HANGUL, // FFD2..FFD7
7590
UNKNOWN, // FFD8..FFD9
7591
HANGUL, // FFDA..FFDC
7592
UNKNOWN, // FFDD..FFDF
7593
COMMON, // FFE0..FFE6
7594
UNKNOWN, // FFE7
7595
COMMON, // FFE8..FFEE
7596
UNKNOWN, // FFEF..FFF8
7597
COMMON, // FFF9..FFFD
7598
UNKNOWN, // FFFE..FFFF
7599
LINEAR_B, // 10000..1000B
7600
UNKNOWN, // 1000C
7601
LINEAR_B, // 1000D..10026
7602
UNKNOWN, // 10027
7603
LINEAR_B, // 10028..1003A
7604
UNKNOWN, // 1003B
7605
LINEAR_B, // 1003C..1003D
7606
UNKNOWN, // 1003E
7607
LINEAR_B, // 1003F..1004D
7608
UNKNOWN, // 1004E..1004F
7609
LINEAR_B, // 10050..1005D
7610
UNKNOWN, // 1005E..1007F
7611
LINEAR_B, // 10080..100FA
7612
UNKNOWN, // 100FB..100FF
7613
COMMON, // 10100..10102
7614
UNKNOWN, // 10103..10106
7615
COMMON, // 10107..10133
7616
UNKNOWN, // 10134..10136
7617
COMMON, // 10137..1013F
7618
GREEK, // 10140..1018E
7619
UNKNOWN, // 1018F
7620
COMMON, // 10190..1019C
7621
UNKNOWN, // 1019D..1019F
7622
GREEK, // 101A0
7623
UNKNOWN, // 101A1..101CF
7624
COMMON, // 101D0..101FC
7625
INHERITED, // 101FD
7626
UNKNOWN, // 101FE..1027F
7627
LYCIAN, // 10280..1029C
7628
UNKNOWN, // 1029D..1029F
7629
CARIAN, // 102A0..102D0
7630
UNKNOWN, // 102D1..102DF
7631
INHERITED, // 102E0
7632
COMMON, // 102E1..102FB
7633
UNKNOWN, // 102FC..102FF
7634
OLD_ITALIC, // 10300..10323
7635
UNKNOWN, // 10324..1032C
7636
OLD_ITALIC, // 1032D..1032F
7637
GOTHIC, // 10330..1034A
7638
UNKNOWN, // 1034B..1034F
7639
OLD_PERMIC, // 10350..1037A
7640
UNKNOWN, // 1037B..1037F
7641
UGARITIC, // 10380..1039D
7642
UNKNOWN, // 1039E
7643
UGARITIC, // 1039F
7644
OLD_PERSIAN, // 103A0..103C3
7645
UNKNOWN, // 103C4..103C7
7646
OLD_PERSIAN, // 103C8..103D5
7647
UNKNOWN, // 103D6..103FF
7648
DESERET, // 10400..1044F
7649
SHAVIAN, // 10450..1047F
7650
OSMANYA, // 10480..1049D
7651
UNKNOWN, // 1049E..1049F
7652
OSMANYA, // 104A0..104A9
7653
UNKNOWN, // 104AA..104AF
7654
OSAGE, // 104B0..104D3
7655
UNKNOWN, // 104D4..104D7
7656
OSAGE, // 104D8..104FB
7657
UNKNOWN, // 104FC..104FF
7658
ELBASAN, // 10500..10527
7659
UNKNOWN, // 10528..1052F
7660
CAUCASIAN_ALBANIAN, // 10530..10563
7661
UNKNOWN, // 10564..1056E
7662
CAUCASIAN_ALBANIAN, // 1056F
7663
UNKNOWN, // 10570..105FF
7664
LINEAR_A, // 10600..10736
7665
UNKNOWN, // 10737..1073F
7666
LINEAR_A, // 10740..10755
7667
UNKNOWN, // 10756..1075F
7668
LINEAR_A, // 10760..10767
7669
UNKNOWN, // 10768..107FF
7670
CYPRIOT, // 10800..10805
7671
UNKNOWN, // 10806..10807
7672
CYPRIOT, // 10808
7673
UNKNOWN, // 10809
7674
CYPRIOT, // 1080A..10835
7675
UNKNOWN, // 10836
7676
CYPRIOT, // 10837..10838
7677
UNKNOWN, // 10839..1083B
7678
CYPRIOT, // 1083C
7679
UNKNOWN, // 1083D..1083E
7680
CYPRIOT, // 1083F
7681
IMPERIAL_ARAMAIC, // 10840..10855
7682
UNKNOWN, // 10856
7683
IMPERIAL_ARAMAIC, // 10857..1085F
7684
PALMYRENE, // 10860..1087F
7685
NABATAEAN, // 10880..1089E
7686
UNKNOWN, // 1089F..108A6
7687
NABATAEAN, // 108A7..108AF
7688
UNKNOWN, // 108B0..108DF
7689
HATRAN, // 108E0..108F2
7690
UNKNOWN, // 108F3
7691
HATRAN, // 108F4..108F5
7692
UNKNOWN, // 108F6..108FA
7693
HATRAN, // 108FB..108FF
7694
PHOENICIAN, // 10900..1091B
7695
UNKNOWN, // 1091C..1091E
7696
PHOENICIAN, // 1091F
7697
LYDIAN, // 10920..10939
7698
UNKNOWN, // 1093A..1093E
7699
LYDIAN, // 1093F
7700
UNKNOWN, // 10940..1097F
7701
MEROITIC_HIEROGLYPHS, // 10980..1099F
7702
MEROITIC_CURSIVE, // 109A0..109B7
7703
UNKNOWN, // 109B8..109BB
7704
MEROITIC_CURSIVE, // 109BC..109CF
7705
UNKNOWN, // 109D0..109D1
7706
MEROITIC_CURSIVE, // 109D2..109FF
7707
KHAROSHTHI, // 10A00..10A03
7708
UNKNOWN, // 10A04
7709
KHAROSHTHI, // 10A05..10A06
7710
UNKNOWN, // 10A07..10A0B
7711
KHAROSHTHI, // 10A0C..10A13
7712
UNKNOWN, // 10A14
7713
KHAROSHTHI, // 10A15..10A17
7714
UNKNOWN, // 10A18
7715
KHAROSHTHI, // 10A19..10A35
7716
UNKNOWN, // 10A36..10A37
7717
KHAROSHTHI, // 10A38..10A3A
7718
UNKNOWN, // 10A3B..10A3E
7719
KHAROSHTHI, // 10A3F..10A48
7720
UNKNOWN, // 10A49..10A4F
7721
KHAROSHTHI, // 10A50..10A58
7722
UNKNOWN, // 10A59..10A5F
7723
OLD_SOUTH_ARABIAN, // 10A60..10A7F
7724
OLD_NORTH_ARABIAN, // 10A80..10A9F
7725
UNKNOWN, // 10AA0..10ABF
7726
MANICHAEAN, // 10AC0..10AE6
7727
UNKNOWN, // 10AE7..10AEA
7728
MANICHAEAN, // 10AEB..10AF6
7729
UNKNOWN, // 10AF7..10AFF
7730
AVESTAN, // 10B00..10B35
7731
UNKNOWN, // 10B36..10B38
7732
AVESTAN, // 10B39..10B3F
7733
INSCRIPTIONAL_PARTHIAN, // 10B40..10B55
7734
UNKNOWN, // 10B56..10B57
7735
INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F
7736
INSCRIPTIONAL_PAHLAVI, // 10B60..10B72
7737
UNKNOWN, // 10B73..10B77
7738
INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F
7739
PSALTER_PAHLAVI, // 10B80..10B91
7740
UNKNOWN, // 10B92..10B98
7741
PSALTER_PAHLAVI, // 10B99..10B9C
7742
UNKNOWN, // 10B9D..10BA8
7743
PSALTER_PAHLAVI, // 10BA9..10BAF
7744
UNKNOWN, // 10BB0..10BFF
7745
OLD_TURKIC, // 10C00..10C48
7746
UNKNOWN, // 10C49..10C7F
7747
OLD_HUNGARIAN, // 10C80..10CB2
7748
UNKNOWN, // 10CB3..10CBF
7749
OLD_HUNGARIAN, // 10CC0..10CF2
7750
UNKNOWN, // 10CF3..10CF9
7751
OLD_HUNGARIAN, // 10CFA..10CFF
7752
HANIFI_ROHINGYA, // 10D00..10D27
7753
UNKNOWN, // 10D28..10D2F
7754
HANIFI_ROHINGYA, // 10D30..10D39
7755
UNKNOWN, // 10D3A..10E5F
7756
ARABIC, // 10E60..10E7E
7757
UNKNOWN, // 10E7F
7758
YEZIDI, // 10E80..10EA9
7759
UNKNOWN, // 10EAA
7760
YEZIDI, // 10EAB..10EAD
7761
UNKNOWN, // 10EAE..10EAF
7762
YEZIDI, // 10EB0..10EB1
7763
UNKNOWN, // 10EB2..10EFF
7764
OLD_SOGDIAN, // 10F00..10F27
7765
UNKNOWN, // 10F28..10F2F
7766
SOGDIAN, // 10F30..10F59
7767
UNKNOWN, // 10F5A..10FAF
7768
CHORASMIAN, // 10FB0..10FCB
7769
UNKNOWN, // 10FCC..10FDF
7770
ELYMAIC, // 10FE0..10FF6
7771
UNKNOWN, // 10FF7..10FFF
7772
BRAHMI, // 11000..1104D
7773
UNKNOWN, // 1104E..11051
7774
BRAHMI, // 11052..1106F
7775
UNKNOWN, // 11070..1107E
7776
BRAHMI, // 1107F
7777
KAITHI, // 11080..110C1
7778
UNKNOWN, // 110C2..110CC
7779
KAITHI, // 110CD
7780
UNKNOWN, // 110CE..110CF
7781
SORA_SOMPENG, // 110D0..110E8
7782
UNKNOWN, // 110E9..110EF
7783
SORA_SOMPENG, // 110F0..110F9
7784
UNKNOWN, // 110FA..110FF
7785
CHAKMA, // 11100..11134
7786
UNKNOWN, // 11135
7787
CHAKMA, // 11136..11147
7788
UNKNOWN, // 11148..1114F
7789
MAHAJANI, // 11150..11176
7790
UNKNOWN, // 11177..1117F
7791
SHARADA, // 11180..111DF
7792
UNKNOWN, // 111E0
7793
SINHALA, // 111E1..111F4
7794
UNKNOWN, // 111F5..111FF
7795
KHOJKI, // 11200..11211
7796
UNKNOWN, // 11212
7797
KHOJKI, // 11213..1123E
7798
UNKNOWN, // 1123F..1127F
7799
MULTANI, // 11280..11286
7800
UNKNOWN, // 11287
7801
MULTANI, // 11288
7802
UNKNOWN, // 11289
7803
MULTANI, // 1128A..1128D
7804
UNKNOWN, // 1128E
7805
MULTANI, // 1128F..1129D
7806
UNKNOWN, // 1129E
7807
MULTANI, // 1129F..112A9
7808
UNKNOWN, // 112AA..112AF
7809
KHUDAWADI, // 112B0..112EA
7810
UNKNOWN, // 112EB..112EF
7811
KHUDAWADI, // 112F0..112F9
7812
UNKNOWN, // 112FA..112FF
7813
GRANTHA, // 11300..11303
7814
UNKNOWN, // 11304
7815
GRANTHA, // 11305..1130C
7816
UNKNOWN, // 1130D..1130E
7817
GRANTHA, // 1130F..11310
7818
UNKNOWN, // 11311..11312
7819
GRANTHA, // 11313..11328
7820
UNKNOWN, // 11329
7821
GRANTHA, // 1132A..11330
7822
UNKNOWN, // 11331
7823
GRANTHA, // 11332..11333
7824
UNKNOWN, // 11334
7825
GRANTHA, // 11335..11339
7826
UNKNOWN, // 1133A
7827
INHERITED, // 1133B
7828
GRANTHA, // 1133C..11344
7829
UNKNOWN, // 11345..11346
7830
GRANTHA, // 11347..11348
7831
UNKNOWN, // 11349..1134A
7832
GRANTHA, // 1134B..1134D
7833
UNKNOWN, // 1134E..1134F
7834
GRANTHA, // 11350
7835
UNKNOWN, // 11351..11356
7836
GRANTHA, // 11357
7837
UNKNOWN, // 11358..1135C
7838
GRANTHA, // 1135D..11363
7839
UNKNOWN, // 11364..11365
7840
GRANTHA, // 11366..1136C
7841
UNKNOWN, // 1136D..1136F
7842
GRANTHA, // 11370..11374
7843
UNKNOWN, // 11375..113FF
7844
NEWA, // 11400..1145B
7845
UNKNOWN, // 1145C
7846
NEWA, // 1145D..11461
7847
UNKNOWN, // 11462..1147F
7848
TIRHUTA, // 11480..114C7
7849
UNKNOWN, // 114C8..114CF
7850
TIRHUTA, // 114D0..114D9
7851
UNKNOWN, // 114DA..1157F
7852
SIDDHAM, // 11580..115B5
7853
UNKNOWN, // 115B6..115B7
7854
SIDDHAM, // 115B8..115DD
7855
UNKNOWN, // 115DE..115FF
7856
MODI, // 11600..11644
7857
UNKNOWN, // 11645..1164F
7858
MODI, // 11650..11659
7859
UNKNOWN, // 1165A..1165F
7860
MONGOLIAN, // 11660..1166C
7861
UNKNOWN, // 1166D..1167F
7862
TAKRI, // 11680..116B8
7863
UNKNOWN, // 116B9..116BF
7864
TAKRI, // 116C0..116C9
7865
UNKNOWN, // 116CA..116FF
7866
AHOM, // 11700..1171A
7867
UNKNOWN, // 1171B..1171C
7868
AHOM, // 1171D..1172B
7869
UNKNOWN, // 1172C..1172F
7870
AHOM, // 11730..1173F
7871
UNKNOWN, // 11740..117FF
7872
DOGRA, // 11800..1183B
7873
UNKNOWN, // 1183C..1189F
7874
WARANG_CITI, // 118A0..118F2
7875
UNKNOWN, // 118F3..118FE
7876
WARANG_CITI, // 118FF
7877
DIVES_AKURU, // 11900..11906
7878
UNKNOWN, // 11907..11908
7879
DIVES_AKURU, // 11909
7880
UNKNOWN, // 1190A..1190B
7881
DIVES_AKURU, // 1190C..11913
7882
UNKNOWN, // 11914
7883
DIVES_AKURU, // 11915..11916
7884
UNKNOWN, // 11917
7885
DIVES_AKURU, // 11918..11935
7886
UNKNOWN, // 11936
7887
DIVES_AKURU, // 11937..11938
7888
UNKNOWN, // 11939..1193A
7889
DIVES_AKURU, // 1193B..11946
7890
UNKNOWN, // 11947..1194F
7891
DIVES_AKURU, // 11950..11959
7892
UNKNOWN, // 1195A..1199F
7893
NANDINAGARI, // 119A0..119A7
7894
UNKNOWN, // 119A8..119A9
7895
NANDINAGARI, // 119AA..119D7
7896
UNKNOWN, // 119D8..119D9
7897
NANDINAGARI, // 119DA..119E4
7898
UNKNOWN, // 119E5..119FF
7899
ZANABAZAR_SQUARE, // 11A00..11A47
7900
UNKNOWN, // 11A48..11A4F
7901
SOYOMBO, // 11A50..11AA2
7902
UNKNOWN, // 11AA3..11ABF
7903
PAU_CIN_HAU, // 11AC0..11AF8
7904
UNKNOWN, // 11AF9..11BFF
7905
BHAIKSUKI, // 11C00..11C08
7906
UNKNOWN, // 11C09
7907
BHAIKSUKI, // 11C0A..11C36
7908
UNKNOWN, // 11C37
7909
BHAIKSUKI, // 11C38..11C45
7910
UNKNOWN, // 11C46..11C4F
7911
BHAIKSUKI, // 11C50..11C6C
7912
UNKNOWN, // 11C6D..11C6F
7913
MARCHEN, // 11C70..11C8F
7914
UNKNOWN, // 11C90..11C91
7915
MARCHEN, // 11C92..11CA7
7916
UNKNOWN, // 11CA8
7917
MARCHEN, // 11CA9..11CB6
7918
UNKNOWN, // 11CB7..11CFF
7919
MASARAM_GONDI, // 11D00..11D06
7920
UNKNOWN, // 11D07
7921
MASARAM_GONDI, // 11D08..11D09
7922
UNKNOWN, // 11D0A
7923
MASARAM_GONDI, // 11D0B..11D36
7924
UNKNOWN, // 11D37..11D39
7925
MASARAM_GONDI, // 11D3A
7926
UNKNOWN, // 11D3B
7927
MASARAM_GONDI, // 11D3C..11D3D
7928
UNKNOWN, // 11D3E
7929
MASARAM_GONDI, // 11D3F..11D47
7930
UNKNOWN, // 11D48..11D4F
7931
MASARAM_GONDI, // 11D50..11D59
7932
UNKNOWN, // 11D5A..11D5F
7933
GUNJALA_GONDI, // 11D60..11D65
7934
UNKNOWN, // 11D66
7935
GUNJALA_GONDI, // 11D67..11D68
7936
UNKNOWN, // 11D69
7937
GUNJALA_GONDI, // 11D6A..11D8E
7938
UNKNOWN, // 11D8F
7939
GUNJALA_GONDI, // 11D90..11D91
7940
UNKNOWN, // 11D92
7941
GUNJALA_GONDI, // 11D93..11D98
7942
UNKNOWN, // 11D99..11D9F
7943
GUNJALA_GONDI, // 11DA0..11DA9
7944
UNKNOWN, // 11DAA..11EDF
7945
MAKASAR, // 11EE0..11EF8
7946
UNKNOWN, // 11EF9..11FAF
7947
LISU, // 11FB0
7948
UNKNOWN, // 11FB1..11FBF
7949
TAMIL, // 11FC0..11FF1
7950
UNKNOWN, // 11FF2..11FFE
7951
TAMIL, // 11FFF
7952
CUNEIFORM, // 12000..12399
7953
UNKNOWN, // 1239A..123FF
7954
CUNEIFORM, // 12400..1246E
7955
UNKNOWN, // 1246F
7956
CUNEIFORM, // 12470..12474
7957
UNKNOWN, // 12475..1247F
7958
CUNEIFORM, // 12480..12543
7959
UNKNOWN, // 12544..12FFF
7960
EGYPTIAN_HIEROGLYPHS, // 13000..1342E
7961
UNKNOWN, // 1342F
7962
EGYPTIAN_HIEROGLYPHS, // 13430..13438
7963
UNKNOWN, // 13439..143FF
7964
ANATOLIAN_HIEROGLYPHS, // 14400..14646
7965
UNKNOWN, // 14647..167FF
7966
BAMUM, // 16800..16A38
7967
UNKNOWN, // 16A39..16A3F
7968
MRO, // 16A40..16A5E
7969
UNKNOWN, // 16A5F
7970
MRO, // 16A60..16A69
7971
UNKNOWN, // 16A6A..16A6D
7972
MRO, // 16A6E..16A6F
7973
UNKNOWN, // 16A70..16ACF
7974
BASSA_VAH, // 16AD0..16AED
7975
UNKNOWN, // 16AEE..16AEF
7976
BASSA_VAH, // 16AF0..16AF5
7977
UNKNOWN, // 16AF6..16AFF
7978
PAHAWH_HMONG, // 16B00..16B45
7979
UNKNOWN, // 16B46..16B4F
7980
PAHAWH_HMONG, // 16B50..16B59
7981
UNKNOWN, // 16B5A
7982
PAHAWH_HMONG, // 16B5B..16B61
7983
UNKNOWN, // 16B62
7984
PAHAWH_HMONG, // 16B63..16B77
7985
UNKNOWN, // 16B78..16B7C
7986
PAHAWH_HMONG, // 16B7D..16B8F
7987
UNKNOWN, // 16B90..16E3F
7988
MEDEFAIDRIN, // 16E40..16E9A
7989
UNKNOWN, // 16E9B..16EFF
7990
MIAO, // 16F00..16F4A
7991
UNKNOWN, // 16F4B..16F4E
7992
MIAO, // 16F4F..16F87
7993
UNKNOWN, // 16F88..16F8E
7994
MIAO, // 16F8F..16F9F
7995
UNKNOWN, // 16FA0..16FDF
7996
TANGUT, // 16FE0
7997
NUSHU, // 16FE1
7998
COMMON, // 16FE2..16FE3
7999
KHITAN_SMALL_SCRIPT, // 16FE4
8000
UNKNOWN, // 16FE5..16FEF
8001
HAN, // 16FF0..16FF1
8002
UNKNOWN, // 16FF2..16FFF
8003
TANGUT, // 17000..187F7
8004
UNKNOWN, // 187F8..187FF
8005
TANGUT, // 18800..18AFF
8006
KHITAN_SMALL_SCRIPT, // 18B00..18CD5
8007
UNKNOWN, // 18CD6..18CFF
8008
TANGUT, // 18D00..18D08
8009
UNKNOWN, // 18D09..1AFFF
8010
KATAKANA, // 1B000
8011
HIRAGANA, // 1B001..1B11E
8012
UNKNOWN, // 1B11F..1B14F
8013
HIRAGANA, // 1B150..1B152
8014
UNKNOWN, // 1B153..1B163
8015
KATAKANA, // 1B164..1B167
8016
UNKNOWN, // 1B168..1B16F
8017
NUSHU, // 1B170..1B2FB
8018
UNKNOWN, // 1B2FC..1BBFF
8019
DUPLOYAN, // 1BC00..1BC6A
8020
UNKNOWN, // 1BC6B..1BC6F
8021
DUPLOYAN, // 1BC70..1BC7C
8022
UNKNOWN, // 1BC7D..1BC7F
8023
DUPLOYAN, // 1BC80..1BC88
8024
UNKNOWN, // 1BC89..1BC8F
8025
DUPLOYAN, // 1BC90..1BC99
8026
UNKNOWN, // 1BC9A..1BC9B
8027
DUPLOYAN, // 1BC9C..1BC9F
8028
COMMON, // 1BCA0..1BCA3
8029
UNKNOWN, // 1BCA4..1CFFF
8030
COMMON, // 1D000..1D0F5
8031
UNKNOWN, // 1D0F6..1D0FF
8032
COMMON, // 1D100..1D126
8033
UNKNOWN, // 1D127..1D128
8034
COMMON, // 1D129..1D166
8035
INHERITED, // 1D167..1D169
8036
COMMON, // 1D16A..1D17A
8037
INHERITED, // 1D17B..1D182
8038
COMMON, // 1D183..1D184
8039
INHERITED, // 1D185..1D18B
8040
COMMON, // 1D18C..1D1A9
8041
INHERITED, // 1D1AA..1D1AD
8042
COMMON, // 1D1AE..1D1E8
8043
UNKNOWN, // 1D1E9..1D1FF
8044
GREEK, // 1D200..1D245
8045
UNKNOWN, // 1D246..1D2DF
8046
COMMON, // 1D2E0..1D2F3
8047
UNKNOWN, // 1D2F4..1D2FF
8048
COMMON, // 1D300..1D356
8049
UNKNOWN, // 1D357..1D35F
8050
COMMON, // 1D360..1D378
8051
UNKNOWN, // 1D379..1D3FF
8052
COMMON, // 1D400..1D454
8053
UNKNOWN, // 1D455
8054
COMMON, // 1D456..1D49C
8055
UNKNOWN, // 1D49D
8056
COMMON, // 1D49E..1D49F
8057
UNKNOWN, // 1D4A0..1D4A1
8058
COMMON, // 1D4A2
8059
UNKNOWN, // 1D4A3..1D4A4
8060
COMMON, // 1D4A5..1D4A6
8061
UNKNOWN, // 1D4A7..1D4A8
8062
COMMON, // 1D4A9..1D4AC
8063
UNKNOWN, // 1D4AD
8064
COMMON, // 1D4AE..1D4B9
8065
UNKNOWN, // 1D4BA
8066
COMMON, // 1D4BB
8067
UNKNOWN, // 1D4BC
8068
COMMON, // 1D4BD..1D4C3
8069
UNKNOWN, // 1D4C4
8070
COMMON, // 1D4C5..1D505
8071
UNKNOWN, // 1D506
8072
COMMON, // 1D507..1D50A
8073
UNKNOWN, // 1D50B..1D50C
8074
COMMON, // 1D50D..1D514
8075
UNKNOWN, // 1D515
8076
COMMON, // 1D516..1D51C
8077
UNKNOWN, // 1D51D
8078
COMMON, // 1D51E..1D539
8079
UNKNOWN, // 1D53A
8080
COMMON, // 1D53B..1D53E
8081
UNKNOWN, // 1D53F
8082
COMMON, // 1D540..1D544
8083
UNKNOWN, // 1D545
8084
COMMON, // 1D546
8085
UNKNOWN, // 1D547..1D549
8086
COMMON, // 1D54A..1D550
8087
UNKNOWN, // 1D551
8088
COMMON, // 1D552..1D6A5
8089
UNKNOWN, // 1D6A6..1D6A7
8090
COMMON, // 1D6A8..1D7CB
8091
UNKNOWN, // 1D7CC..1D7CD
8092
COMMON, // 1D7CE..1D7FF
8093
SIGNWRITING, // 1D800..1DA8B
8094
UNKNOWN, // 1DA8C..1DA9A
8095
SIGNWRITING, // 1DA9B..1DA9F
8096
UNKNOWN, // 1DAA0
8097
SIGNWRITING, // 1DAA1..1DAAF
8098
UNKNOWN, // 1DAB0..1DFFF
8099
GLAGOLITIC, // 1E000..1E006
8100
UNKNOWN, // 1E007
8101
GLAGOLITIC, // 1E008..1E018
8102
UNKNOWN, // 1E019..1E01A
8103
GLAGOLITIC, // 1E01B..1E021
8104
UNKNOWN, // 1E022
8105
GLAGOLITIC, // 1E023..1E024
8106
UNKNOWN, // 1E025
8107
GLAGOLITIC, // 1E026..1E02A
8108
UNKNOWN, // 1E02B..1E0FF
8109
NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C
8110
UNKNOWN, // 1E12D..1E12F
8111
NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D
8112
UNKNOWN, // 1E13E..1E13F
8113
NYIAKENG_PUACHUE_HMONG, // 1E140..1E149
8114
UNKNOWN, // 1E14A..1E14D
8115
NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F
8116
UNKNOWN, // 1E150..1E2BF
8117
WANCHO, // 1E2C0..1E2F9
8118
UNKNOWN, // 1E2FA..1E2FE
8119
WANCHO, // 1E2FF
8120
UNKNOWN, // 1E300..1E7FF
8121
MENDE_KIKAKUI, // 1E800..1E8C4
8122
UNKNOWN, // 1E8C5..1E8C6
8123
MENDE_KIKAKUI, // 1E8C7..1E8D6
8124
UNKNOWN, // 1E8D7..1E8FF
8125
ADLAM, // 1E900..1E94B
8126
UNKNOWN, // 1E94C..1E94F
8127
ADLAM, // 1E950..1E959
8128
UNKNOWN, // 1E95A..1E95D
8129
ADLAM, // 1E95E..1E95F
8130
UNKNOWN, // 1E960..1EC70
8131
COMMON, // 1EC71..1ECB4
8132
UNKNOWN, // 1ECB5..1ED00
8133
COMMON, // 1ED01..1ED3D
8134
UNKNOWN, // 1ED3E..1EDFF
8135
ARABIC, // 1EE00..1EE03
8136
UNKNOWN, // 1EE04
8137
ARABIC, // 1EE05..1EE1F
8138
UNKNOWN, // 1EE20
8139
ARABIC, // 1EE21..1EE22
8140
UNKNOWN, // 1EE23
8141
ARABIC, // 1EE24
8142
UNKNOWN, // 1EE25..1EE26
8143
ARABIC, // 1EE27
8144
UNKNOWN, // 1EE28
8145
ARABIC, // 1EE29..1EE32
8146
UNKNOWN, // 1EE33
8147
ARABIC, // 1EE34..1EE37
8148
UNKNOWN, // 1EE38
8149
ARABIC, // 1EE39
8150
UNKNOWN, // 1EE3A
8151
ARABIC, // 1EE3B
8152
UNKNOWN, // 1EE3C..1EE41
8153
ARABIC, // 1EE42
8154
UNKNOWN, // 1EE43..1EE46
8155
ARABIC, // 1EE47
8156
UNKNOWN, // 1EE48
8157
ARABIC, // 1EE49
8158
UNKNOWN, // 1EE4A
8159
ARABIC, // 1EE4B
8160
UNKNOWN, // 1EE4C
8161
ARABIC, // 1EE4D..1EE4F
8162
UNKNOWN, // 1EE50
8163
ARABIC, // 1EE51..1EE52
8164
UNKNOWN, // 1EE53
8165
ARABIC, // 1EE54
8166
UNKNOWN, // 1EE55..1EE56
8167
ARABIC, // 1EE57
8168
UNKNOWN, // 1EE58
8169
ARABIC, // 1EE59
8170
UNKNOWN, // 1EE5A
8171
ARABIC, // 1EE5B
8172
UNKNOWN, // 1EE5C
8173
ARABIC, // 1EE5D
8174
UNKNOWN, // 1EE5E
8175
ARABIC, // 1EE5F
8176
UNKNOWN, // 1EE60
8177
ARABIC, // 1EE61..1EE62
8178
UNKNOWN, // 1EE63
8179
ARABIC, // 1EE64
8180
UNKNOWN, // 1EE65..1EE66
8181
ARABIC, // 1EE67..1EE6A
8182
UNKNOWN, // 1EE6B
8183
ARABIC, // 1EE6C..1EE72
8184
UNKNOWN, // 1EE73
8185
ARABIC, // 1EE74..1EE77
8186
UNKNOWN, // 1EE78
8187
ARABIC, // 1EE79..1EE7C
8188
UNKNOWN, // 1EE7D
8189
ARABIC, // 1EE7E
8190
UNKNOWN, // 1EE7F
8191
ARABIC, // 1EE80..1EE89
8192
UNKNOWN, // 1EE8A
8193
ARABIC, // 1EE8B..1EE9B
8194
UNKNOWN, // 1EE9C..1EEA0
8195
ARABIC, // 1EEA1..1EEA3
8196
UNKNOWN, // 1EEA4
8197
ARABIC, // 1EEA5..1EEA9
8198
UNKNOWN, // 1EEAA
8199
ARABIC, // 1EEAB..1EEBB
8200
UNKNOWN, // 1EEBC..1EEEF
8201
ARABIC, // 1EEF0..1EEF1
8202
UNKNOWN, // 1EEF2..1EFFF
8203
COMMON, // 1F000..1F02B
8204
UNKNOWN, // 1F02C..1F02F
8205
COMMON, // 1F030..1F093
8206
UNKNOWN, // 1F094..1F09F
8207
COMMON, // 1F0A0..1F0AE
8208
UNKNOWN, // 1F0AF..1F0B0
8209
COMMON, // 1F0B1..1F0BF
8210
UNKNOWN, // 1F0C0
8211
COMMON, // 1F0C1..1F0CF
8212
UNKNOWN, // 1F0D0
8213
COMMON, // 1F0D1..1F0F5
8214
UNKNOWN, // 1F0F6..1F0FF
8215
COMMON, // 1F100..1F1AD
8216
UNKNOWN, // 1F1AE..1F1E5
8217
COMMON, // 1F1E6..1F1FF
8218
HIRAGANA, // 1F200
8219
COMMON, // 1F201..1F202
8220
UNKNOWN, // 1F203..1F20F
8221
COMMON, // 1F210..1F23B
8222
UNKNOWN, // 1F23C..1F23F
8223
COMMON, // 1F240..1F248
8224
UNKNOWN, // 1F249..1F24F
8225
COMMON, // 1F250..1F251
8226
UNKNOWN, // 1F252..1F25F
8227
COMMON, // 1F260..1F265
8228
UNKNOWN, // 1F266..1F2FF
8229
COMMON, // 1F300..1F6D7
8230
UNKNOWN, // 1F6D8..1F6DF
8231
COMMON, // 1F6E0..1F6EC
8232
UNKNOWN, // 1F6ED..1F6EF
8233
COMMON, // 1F6F0..1F6FC
8234
UNKNOWN, // 1F6FD..1F6FF
8235
COMMON, // 1F700..1F773
8236
UNKNOWN, // 1F774..1F77F
8237
COMMON, // 1F780..1F7D8
8238
UNKNOWN, // 1F7D9..1F7DF
8239
COMMON, // 1F7E0..1F7EB
8240
UNKNOWN, // 1F7EC..1F7FF
8241
COMMON, // 1F800..1F80B
8242
UNKNOWN, // 1F80C..1F80F
8243
COMMON, // 1F810..1F847
8244
UNKNOWN, // 1F848..1F84F
8245
COMMON, // 1F850..1F859
8246
UNKNOWN, // 1F85A..1F85F
8247
COMMON, // 1F860..1F887
8248
UNKNOWN, // 1F888..1F88F
8249
COMMON, // 1F890..1F8AD
8250
UNKNOWN, // 1F8AE..1F8AF
8251
COMMON, // 1F8B0..1F8B1
8252
UNKNOWN, // 1F8B2..1F8FF
8253
COMMON, // 1F900..1F978
8254
UNKNOWN, // 1F979
8255
COMMON, // 1F97A..1F9CB
8256
UNKNOWN, // 1F9CC
8257
COMMON, // 1F9CD..1FA53
8258
UNKNOWN, // 1FA54..1FA5F
8259
COMMON, // 1FA60..1FA6D
8260
UNKNOWN, // 1FA6E..1FA6F
8261
COMMON, // 1FA70..1FA74
8262
UNKNOWN, // 1FA75..1FA77
8263
COMMON, // 1FA78..1FA7A
8264
UNKNOWN, // 1FA7B..1FA7F
8265
COMMON, // 1FA80..1FA86
8266
UNKNOWN, // 1FA87..1FA8F
8267
COMMON, // 1FA90..1FAA8
8268
UNKNOWN, // 1FAA9..1FAAF
8269
COMMON, // 1FAB0..1FAB6
8270
UNKNOWN, // 1FAB7..1FABF
8271
COMMON, // 1FAC0..1FAC2
8272
UNKNOWN, // 1FAC3..1FACF
8273
COMMON, // 1FAD0..1FAD6
8274
UNKNOWN, // 1FAD7..1FAFF
8275
COMMON, // 1FB00..1FB92
8276
UNKNOWN, // 1FB93
8277
COMMON, // 1FB94..1FBCA
8278
UNKNOWN, // 1FBCB..1FBEF
8279
COMMON, // 1FBF0..1FBF9
8280
UNKNOWN, // 1FBFA..1FFFF
8281
HAN, // 20000..2A6DD
8282
UNKNOWN, // 2A6DE..2A6FF
8283
HAN, // 2A700..2B734
8284
UNKNOWN, // 2B735..2B73F
8285
HAN, // 2B740..2B81D
8286
UNKNOWN, // 2B81E..2B81F
8287
HAN, // 2B820..2CEA1
8288
UNKNOWN, // 2CEA2..2CEAF
8289
HAN, // 2CEB0..2EBE0
8290
UNKNOWN, // 2EBE1..2F7FF
8291
HAN, // 2F800..2FA1D
8292
UNKNOWN, // 2FA1E..2FFFF
8293
HAN, // 30000..3134A
8294
UNKNOWN, // 3134B..E0000
8295
COMMON, // E0001
8296
UNKNOWN, // E0002..E001F
8297
COMMON, // E0020..E007F
8298
UNKNOWN, // E0080..E00FF
8299
INHERITED, // E0100..E01EF
8300
UNKNOWN, // E01F0..10FFFF
8301
};
8302
8303
private static final HashMap<String, Character.UnicodeScript> aliases;
8304
static {
8305
aliases = new HashMap<>((int)(157 / 0.75f + 1.0f));
8306
aliases.put("ADLM", ADLAM);
8307
aliases.put("AGHB", CAUCASIAN_ALBANIAN);
8308
aliases.put("AHOM", AHOM);
8309
aliases.put("ARAB", ARABIC);
8310
aliases.put("ARMI", IMPERIAL_ARAMAIC);
8311
aliases.put("ARMN", ARMENIAN);
8312
aliases.put("AVST", AVESTAN);
8313
aliases.put("BALI", BALINESE);
8314
aliases.put("BAMU", BAMUM);
8315
aliases.put("BASS", BASSA_VAH);
8316
aliases.put("BATK", BATAK);
8317
aliases.put("BENG", BENGALI);
8318
aliases.put("BHKS", BHAIKSUKI);
8319
aliases.put("BOPO", BOPOMOFO);
8320
aliases.put("BRAH", BRAHMI);
8321
aliases.put("BRAI", BRAILLE);
8322
aliases.put("BUGI", BUGINESE);
8323
aliases.put("BUHD", BUHID);
8324
aliases.put("CAKM", CHAKMA);
8325
aliases.put("CANS", CANADIAN_ABORIGINAL);
8326
aliases.put("CARI", CARIAN);
8327
aliases.put("CHAM", CHAM);
8328
aliases.put("CHER", CHEROKEE);
8329
aliases.put("CHRS", CHORASMIAN);
8330
aliases.put("COPT", COPTIC);
8331
aliases.put("CPRT", CYPRIOT);
8332
aliases.put("CYRL", CYRILLIC);
8333
aliases.put("DEVA", DEVANAGARI);
8334
aliases.put("DIAK", DIVES_AKURU);
8335
aliases.put("DOGR", DOGRA);
8336
aliases.put("DSRT", DESERET);
8337
aliases.put("DUPL", DUPLOYAN);
8338
aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
8339
aliases.put("ELBA", ELBASAN);
8340
aliases.put("ELYM", ELYMAIC);
8341
aliases.put("ETHI", ETHIOPIC);
8342
aliases.put("GEOR", GEORGIAN);
8343
aliases.put("GLAG", GLAGOLITIC);
8344
aliases.put("GONM", MASARAM_GONDI);
8345
aliases.put("GOTH", GOTHIC);
8346
aliases.put("GONG", GUNJALA_GONDI);
8347
aliases.put("GRAN", GRANTHA);
8348
aliases.put("GREK", GREEK);
8349
aliases.put("GUJR", GUJARATI);
8350
aliases.put("GURU", GURMUKHI);
8351
aliases.put("HANG", HANGUL);
8352
aliases.put("HANI", HAN);
8353
aliases.put("HANO", HANUNOO);
8354
aliases.put("HATR", HATRAN);
8355
aliases.put("HEBR", HEBREW);
8356
aliases.put("HIRA", HIRAGANA);
8357
aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
8358
aliases.put("HMNG", PAHAWH_HMONG);
8359
aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG);
8360
// it appears we don't have the KATAKANA_OR_HIRAGANA
8361
//aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
8362
aliases.put("HUNG", OLD_HUNGARIAN);
8363
aliases.put("ITAL", OLD_ITALIC);
8364
aliases.put("JAVA", JAVANESE);
8365
aliases.put("KALI", KAYAH_LI);
8366
aliases.put("KANA", KATAKANA);
8367
aliases.put("KHAR", KHAROSHTHI);
8368
aliases.put("KHMR", KHMER);
8369
aliases.put("KHOJ", KHOJKI);
8370
aliases.put("KITS", KHITAN_SMALL_SCRIPT);
8371
aliases.put("KNDA", KANNADA);
8372
aliases.put("KTHI", KAITHI);
8373
aliases.put("LANA", TAI_THAM);
8374
aliases.put("LAOO", LAO);
8375
aliases.put("LATN", LATIN);
8376
aliases.put("LEPC", LEPCHA);
8377
aliases.put("LIMB", LIMBU);
8378
aliases.put("LINA", LINEAR_A);
8379
aliases.put("LINB", LINEAR_B);
8380
aliases.put("LISU", LISU);
8381
aliases.put("LYCI", LYCIAN);
8382
aliases.put("LYDI", LYDIAN);
8383
aliases.put("MAHJ", MAHAJANI);
8384
aliases.put("MAKA", MAKASAR);
8385
aliases.put("MARC", MARCHEN);
8386
aliases.put("MAND", MANDAIC);
8387
aliases.put("MANI", MANICHAEAN);
8388
aliases.put("MEDF", MEDEFAIDRIN);
8389
aliases.put("MEND", MENDE_KIKAKUI);
8390
aliases.put("MERC", MEROITIC_CURSIVE);
8391
aliases.put("MERO", MEROITIC_HIEROGLYPHS);
8392
aliases.put("MLYM", MALAYALAM);
8393
aliases.put("MODI", MODI);
8394
aliases.put("MONG", MONGOLIAN);
8395
aliases.put("MROO", MRO);
8396
aliases.put("MTEI", MEETEI_MAYEK);
8397
aliases.put("MULT", MULTANI);
8398
aliases.put("MYMR", MYANMAR);
8399
aliases.put("NAND", NANDINAGARI);
8400
aliases.put("NARB", OLD_NORTH_ARABIAN);
8401
aliases.put("NBAT", NABATAEAN);
8402
aliases.put("NEWA", NEWA);
8403
aliases.put("NKOO", NKO);
8404
aliases.put("NSHU", NUSHU);
8405
aliases.put("OGAM", OGHAM);
8406
aliases.put("OLCK", OL_CHIKI);
8407
aliases.put("ORKH", OLD_TURKIC);
8408
aliases.put("ORYA", ORIYA);
8409
aliases.put("OSGE", OSAGE);
8410
aliases.put("OSMA", OSMANYA);
8411
aliases.put("PALM", PALMYRENE);
8412
aliases.put("PAUC", PAU_CIN_HAU);
8413
aliases.put("PERM", OLD_PERMIC);
8414
aliases.put("PHAG", PHAGS_PA);
8415
aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
8416
aliases.put("PHLP", PSALTER_PAHLAVI);
8417
aliases.put("PHNX", PHOENICIAN);
8418
aliases.put("PLRD", MIAO);
8419
aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
8420
aliases.put("RJNG", REJANG);
8421
aliases.put("ROHG", HANIFI_ROHINGYA);
8422
aliases.put("RUNR", RUNIC);
8423
aliases.put("SAMR", SAMARITAN);
8424
aliases.put("SARB", OLD_SOUTH_ARABIAN);
8425
aliases.put("SAUR", SAURASHTRA);
8426
aliases.put("SGNW", SIGNWRITING);
8427
aliases.put("SHAW", SHAVIAN);
8428
aliases.put("SHRD", SHARADA);
8429
aliases.put("SIDD", SIDDHAM);
8430
aliases.put("SIND", KHUDAWADI);
8431
aliases.put("SINH", SINHALA);
8432
aliases.put("SOGD", SOGDIAN);
8433
aliases.put("SOGO", OLD_SOGDIAN);
8434
aliases.put("SORA", SORA_SOMPENG);
8435
aliases.put("SOYO", SOYOMBO);
8436
aliases.put("SUND", SUNDANESE);
8437
aliases.put("SYLO", SYLOTI_NAGRI);
8438
aliases.put("SYRC", SYRIAC);
8439
aliases.put("TAGB", TAGBANWA);
8440
aliases.put("TAKR", TAKRI);
8441
aliases.put("TALE", TAI_LE);
8442
aliases.put("TALU", NEW_TAI_LUE);
8443
aliases.put("TAML", TAMIL);
8444
aliases.put("TANG", TANGUT);
8445
aliases.put("TAVT", TAI_VIET);
8446
aliases.put("TELU", TELUGU);
8447
aliases.put("TFNG", TIFINAGH);
8448
aliases.put("TGLG", TAGALOG);
8449
aliases.put("THAA", THAANA);
8450
aliases.put("THAI", THAI);
8451
aliases.put("TIBT", TIBETAN);
8452
aliases.put("TIRH", TIRHUTA);
8453
aliases.put("UGAR", UGARITIC);
8454
aliases.put("VAII", VAI);
8455
aliases.put("WARA", WARANG_CITI);
8456
aliases.put("WCHO", WANCHO);
8457
aliases.put("XPEO", OLD_PERSIAN);
8458
aliases.put("XSUX", CUNEIFORM);
8459
aliases.put("YIII", YI);
8460
aliases.put("YEZI", YEZIDI);
8461
aliases.put("ZANB", ZANABAZAR_SQUARE);
8462
aliases.put("ZINH", INHERITED);
8463
aliases.put("ZYYY", COMMON);
8464
aliases.put("ZZZZ", UNKNOWN);
8465
}
8466
8467
/**
8468
* Returns the enum constant representing the Unicode script of which
8469
* the given character (Unicode code point) is assigned to.
8470
*
8471
* @param codePoint the character (Unicode code point) in question.
8472
* @return The {@code UnicodeScript} constant representing the
8473
* Unicode script of which this character is assigned to.
8474
*
8475
* @throws IllegalArgumentException if the specified
8476
* {@code codePoint} is an invalid Unicode code point.
8477
* @see Character#isValidCodePoint(int)
8478
*
8479
*/
8480
public static UnicodeScript of(int codePoint) {
8481
if (!isValidCodePoint(codePoint))
8482
throw new IllegalArgumentException(
8483
String.format("Not a valid Unicode code point: 0x%X", codePoint));
8484
int type = getType(codePoint);
8485
// leave SURROGATE and PRIVATE_USE for table lookup
8486
if (type == UNASSIGNED)
8487
return UNKNOWN;
8488
int index = Arrays.binarySearch(scriptStarts, codePoint);
8489
if (index < 0)
8490
index = -index - 2;
8491
return scripts[index];
8492
}
8493
8494
/**
8495
* Returns the UnicodeScript constant with the given Unicode script
8496
* name or the script name alias. Script names and their aliases are
8497
* determined by The Unicode Standard. The files {@code Scripts<version>.txt}
8498
* and {@code PropertyValueAliases<version>.txt} define script names
8499
* and the script name aliases for a particular version of the
8500
* standard. The {@link Character} class specifies the version of
8501
* the standard that it supports.
8502
* <p>
8503
* Character case is ignored for all of the valid script names.
8504
* The en_US locale's case mapping rules are used to provide
8505
* case-insensitive string comparisons for script name validation.
8506
*
8507
* @param scriptName A {@code UnicodeScript} name.
8508
* @return The {@code UnicodeScript} constant identified
8509
* by {@code scriptName}
8510
* @throws IllegalArgumentException if {@code scriptName} is an
8511
* invalid name
8512
* @throws NullPointerException if {@code scriptName} is null
8513
*/
8514
public static final UnicodeScript forName(String scriptName) {
8515
scriptName = scriptName.toUpperCase(Locale.ENGLISH);
8516
//.replace(' ', '_'));
8517
UnicodeScript sc = aliases.get(scriptName);
8518
if (sc != null)
8519
return sc;
8520
return valueOf(scriptName);
8521
}
8522
}
8523
8524
/**
8525
* The value of the {@code Character}.
8526
*
8527
* @serial
8528
*/
8529
private final char value;
8530
8531
/** use serialVersionUID from JDK 1.0.2 for interoperability */
8532
@java.io.Serial
8533
private static final long serialVersionUID = 3786198910865385080L;
8534
8535
/**
8536
* Constructs a newly allocated {@code Character} object that
8537
* represents the specified {@code char} value.
8538
*
8539
* @param value the value to be represented by the
8540
* {@code Character} object.
8541
*
8542
* @deprecated
8543
* It is rarely appropriate to use this constructor. The static factory
8544
* {@link #valueOf(char)} is generally a better choice, as it is
8545
* likely to yield significantly better space and time performance.
8546
*/
8547
@Deprecated(since="9", forRemoval = true)
8548
public Character(char value) {
8549
this.value = value;
8550
}
8551
8552
private static class CharacterCache {
8553
private CharacterCache(){}
8554
8555
static final Character[] cache;
8556
static Character[] archivedCache;
8557
8558
static {
8559
int size = 127 + 1;
8560
8561
// Load and use the archived cache if it exists
8562
CDS.initializeFromArchive(CharacterCache.class);
8563
if (archivedCache == null || archivedCache.length != size) {
8564
Character[] c = new Character[size];
8565
for (int i = 0; i < size; i++) {
8566
c[i] = new Character((char) i);
8567
}
8568
archivedCache = c;
8569
}
8570
cache = archivedCache;
8571
}
8572
}
8573
8574
/**
8575
* Returns a {@code Character} instance representing the specified
8576
* {@code char} value.
8577
* If a new {@code Character} instance is not required, this method
8578
* should generally be used in preference to the constructor
8579
* {@link #Character(char)}, as this method is likely to yield
8580
* significantly better space and time performance by caching
8581
* frequently requested values.
8582
*
8583
* This method will always cache values in the range {@code
8584
* '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
8585
* cache other values outside of this range.
8586
*
8587
* @param c a char value.
8588
* @return a {@code Character} instance representing {@code c}.
8589
* @since 1.5
8590
*/
8591
@IntrinsicCandidate
8592
public static Character valueOf(char c) {
8593
if (c <= 127) { // must cache
8594
return CharacterCache.cache[(int)c];
8595
}
8596
return new Character(c);
8597
}
8598
8599
/**
8600
* Returns the value of this {@code Character} object.
8601
* @return the primitive {@code char} value represented by
8602
* this object.
8603
*/
8604
@IntrinsicCandidate
8605
public char charValue() {
8606
return value;
8607
}
8608
8609
/**
8610
* Returns a hash code for this {@code Character}; equal to the result
8611
* of invoking {@code charValue()}.
8612
*
8613
* @return a hash code value for this {@code Character}
8614
*/
8615
@Override
8616
public int hashCode() {
8617
return Character.hashCode(value);
8618
}
8619
8620
/**
8621
* Returns a hash code for a {@code char} value; compatible with
8622
* {@code Character.hashCode()}.
8623
*
8624
* @since 1.8
8625
*
8626
* @param value The {@code char} for which to return a hash code.
8627
* @return a hash code value for a {@code char} value.
8628
*/
8629
public static int hashCode(char value) {
8630
return (int)value;
8631
}
8632
8633
/**
8634
* Compares this object against the specified object.
8635
* The result is {@code true} if and only if the argument is not
8636
* {@code null} and is a {@code Character} object that
8637
* represents the same {@code char} value as this object.
8638
*
8639
* @param obj the object to compare with.
8640
* @return {@code true} if the objects are the same;
8641
* {@code false} otherwise.
8642
*/
8643
public boolean equals(Object obj) {
8644
if (obj instanceof Character) {
8645
return value == ((Character)obj).charValue();
8646
}
8647
return false;
8648
}
8649
8650
/**
8651
* Returns a {@code String} object representing this
8652
* {@code Character}'s value. The result is a string of
8653
* length 1 whose sole component is the primitive
8654
* {@code char} value represented by this
8655
* {@code Character} object.
8656
*
8657
* @return a string representation of this object.
8658
*/
8659
public String toString() {
8660
return String.valueOf(value);
8661
}
8662
8663
/**
8664
* Returns a {@code String} object representing the
8665
* specified {@code char}. The result is a string of length
8666
* 1 consisting solely of the specified {@code char}.
8667
*
8668
* @apiNote This method cannot handle <a
8669
* href="#supplementary"> supplementary characters</a>. To support
8670
* all Unicode characters, including supplementary characters, use
8671
* the {@link #toString(int)} method.
8672
*
8673
* @param c the {@code char} to be converted
8674
* @return the string representation of the specified {@code char}
8675
* @since 1.4
8676
*/
8677
public static String toString(char c) {
8678
return String.valueOf(c);
8679
}
8680
8681
/**
8682
* Returns a {@code String} object representing the
8683
* specified character (Unicode code point). The result is a string of
8684
* length 1 or 2, consisting solely of the specified {@code codePoint}.
8685
*
8686
* @param codePoint the {@code codePoint} to be converted
8687
* @return the string representation of the specified {@code codePoint}
8688
* @throws IllegalArgumentException if the specified
8689
* {@code codePoint} is not a {@linkplain #isValidCodePoint
8690
* valid Unicode code point}.
8691
* @since 11
8692
*/
8693
public static String toString(int codePoint) {
8694
return String.valueOfCodePoint(codePoint);
8695
}
8696
8697
/**
8698
* Determines whether the specified code point is a valid
8699
* <a href="http://www.unicode.org/glossary/#code_point">
8700
* Unicode code point value</a>.
8701
*
8702
* @param codePoint the Unicode code point to be tested
8703
* @return {@code true} if the specified code point value is between
8704
* {@link #MIN_CODE_POINT} and
8705
* {@link #MAX_CODE_POINT} inclusive;
8706
* {@code false} otherwise.
8707
* @since 1.5
8708
*/
8709
public static boolean isValidCodePoint(int codePoint) {
8710
// Optimized form of:
8711
// codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
8712
int plane = codePoint >>> 16;
8713
return plane < ((MAX_CODE_POINT + 1) >>> 16);
8714
}
8715
8716
/**
8717
* Determines whether the specified character (Unicode code point)
8718
* is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
8719
* Such code points can be represented using a single {@code char}.
8720
*
8721
* @param codePoint the character (Unicode code point) to be tested
8722
* @return {@code true} if the specified code point is between
8723
* {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
8724
* {@code false} otherwise.
8725
* @since 1.7
8726
*/
8727
public static boolean isBmpCodePoint(int codePoint) {
8728
return codePoint >>> 16 == 0;
8729
// Optimized form of:
8730
// codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
8731
// We consistently use logical shift (>>>) to facilitate
8732
// additional runtime optimizations.
8733
}
8734
8735
/**
8736
* Determines whether the specified character (Unicode code point)
8737
* is in the <a href="#supplementary">supplementary character</a> range.
8738
*
8739
* @param codePoint the character (Unicode code point) to be tested
8740
* @return {@code true} if the specified code point is between
8741
* {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
8742
* {@link #MAX_CODE_POINT} inclusive;
8743
* {@code false} otherwise.
8744
* @since 1.5
8745
*/
8746
public static boolean isSupplementaryCodePoint(int codePoint) {
8747
return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
8748
&& codePoint < MAX_CODE_POINT + 1;
8749
}
8750
8751
/**
8752
* Determines if the given {@code char} value is a
8753
* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8754
* Unicode high-surrogate code unit</a>
8755
* (also known as <i>leading-surrogate code unit</i>).
8756
*
8757
* <p>Such values do not represent characters by themselves,
8758
* but are used in the representation of
8759
* <a href="#supplementary">supplementary characters</a>
8760
* in the UTF-16 encoding.
8761
*
8762
* @param ch the {@code char} value to be tested.
8763
* @return {@code true} if the {@code char} value is between
8764
* {@link #MIN_HIGH_SURROGATE} and
8765
* {@link #MAX_HIGH_SURROGATE} inclusive;
8766
* {@code false} otherwise.
8767
* @see Character#isLowSurrogate(char)
8768
* @see Character.UnicodeBlock#of(int)
8769
* @since 1.5
8770
*/
8771
public static boolean isHighSurrogate(char ch) {
8772
// Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
8773
return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
8774
}
8775
8776
/**
8777
* Determines if the given {@code char} value is a
8778
* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8779
* Unicode low-surrogate code unit</a>
8780
* (also known as <i>trailing-surrogate code unit</i>).
8781
*
8782
* <p>Such values do not represent characters by themselves,
8783
* but are used in the representation of
8784
* <a href="#supplementary">supplementary characters</a>
8785
* in the UTF-16 encoding.
8786
*
8787
* @param ch the {@code char} value to be tested.
8788
* @return {@code true} if the {@code char} value is between
8789
* {@link #MIN_LOW_SURROGATE} and
8790
* {@link #MAX_LOW_SURROGATE} inclusive;
8791
* {@code false} otherwise.
8792
* @see Character#isHighSurrogate(char)
8793
* @since 1.5
8794
*/
8795
public static boolean isLowSurrogate(char ch) {
8796
return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
8797
}
8798
8799
/**
8800
* Determines if the given {@code char} value is a Unicode
8801
* <i>surrogate code unit</i>.
8802
*
8803
* <p>Such values do not represent characters by themselves,
8804
* but are used in the representation of
8805
* <a href="#supplementary">supplementary characters</a>
8806
* in the UTF-16 encoding.
8807
*
8808
* <p>A char value is a surrogate code unit if and only if it is either
8809
* a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
8810
* a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
8811
*
8812
* @param ch the {@code char} value to be tested.
8813
* @return {@code true} if the {@code char} value is between
8814
* {@link #MIN_SURROGATE} and
8815
* {@link #MAX_SURROGATE} inclusive;
8816
* {@code false} otherwise.
8817
* @since 1.7
8818
*/
8819
public static boolean isSurrogate(char ch) {
8820
return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
8821
}
8822
8823
/**
8824
* Determines whether the specified pair of {@code char}
8825
* values is a valid
8826
* <a href="http://www.unicode.org/glossary/#surrogate_pair">
8827
* Unicode surrogate pair</a>.
8828
*
8829
* <p>This method is equivalent to the expression:
8830
* <blockquote><pre>{@code
8831
* isHighSurrogate(high) && isLowSurrogate(low)
8832
* }</pre></blockquote>
8833
*
8834
* @param high the high-surrogate code value to be tested
8835
* @param low the low-surrogate code value to be tested
8836
* @return {@code true} if the specified high and
8837
* low-surrogate code values represent a valid surrogate pair;
8838
* {@code false} otherwise.
8839
* @since 1.5
8840
*/
8841
public static boolean isSurrogatePair(char high, char low) {
8842
return isHighSurrogate(high) && isLowSurrogate(low);
8843
}
8844
8845
/**
8846
* Determines the number of {@code char} values needed to
8847
* represent the specified character (Unicode code point). If the
8848
* specified character is equal to or greater than 0x10000, then
8849
* the method returns 2. Otherwise, the method returns 1.
8850
*
8851
* <p>This method doesn't validate the specified character to be a
8852
* valid Unicode code point. The caller must validate the
8853
* character value using {@link #isValidCodePoint(int) isValidCodePoint}
8854
* if necessary.
8855
*
8856
* @param codePoint the character (Unicode code point) to be tested.
8857
* @return 2 if the character is a valid supplementary character; 1 otherwise.
8858
* @see Character#isSupplementaryCodePoint(int)
8859
* @since 1.5
8860
*/
8861
public static int charCount(int codePoint) {
8862
return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
8863
}
8864
8865
/**
8866
* Converts the specified surrogate pair to its supplementary code
8867
* point value. This method does not validate the specified
8868
* surrogate pair. The caller must validate it using {@link
8869
* #isSurrogatePair(char, char) isSurrogatePair} if necessary.
8870
*
8871
* @param high the high-surrogate code unit
8872
* @param low the low-surrogate code unit
8873
* @return the supplementary code point composed from the
8874
* specified surrogate pair.
8875
* @since 1.5
8876
*/
8877
public static int toCodePoint(char high, char low) {
8878
// Optimized form of:
8879
// return ((high - MIN_HIGH_SURROGATE) << 10)
8880
// + (low - MIN_LOW_SURROGATE)
8881
// + MIN_SUPPLEMENTARY_CODE_POINT;
8882
return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
8883
- (MIN_HIGH_SURROGATE << 10)
8884
- MIN_LOW_SURROGATE);
8885
}
8886
8887
/**
8888
* Returns the code point at the given index of the
8889
* {@code CharSequence}. If the {@code char} value at
8890
* the given index in the {@code CharSequence} is in the
8891
* high-surrogate range, the following index is less than the
8892
* length of the {@code CharSequence}, and the
8893
* {@code char} value at the following index is in the
8894
* low-surrogate range, then the supplementary code point
8895
* corresponding to this surrogate pair is returned. Otherwise,
8896
* the {@code char} value at the given index is returned.
8897
*
8898
* @param seq a sequence of {@code char} values (Unicode code
8899
* units)
8900
* @param index the index to the {@code char} values (Unicode
8901
* code units) in {@code seq} to be converted
8902
* @return the Unicode code point at the given index
8903
* @throws NullPointerException if {@code seq} is null.
8904
* @throws IndexOutOfBoundsException if the value
8905
* {@code index} is negative or not less than
8906
* {@link CharSequence#length() seq.length()}.
8907
* @since 1.5
8908
*/
8909
public static int codePointAt(CharSequence seq, int index) {
8910
char c1 = seq.charAt(index);
8911
if (isHighSurrogate(c1) && ++index < seq.length()) {
8912
char c2 = seq.charAt(index);
8913
if (isLowSurrogate(c2)) {
8914
return toCodePoint(c1, c2);
8915
}
8916
}
8917
return c1;
8918
}
8919
8920
/**
8921
* Returns the code point at the given index of the
8922
* {@code char} array. If the {@code char} value at
8923
* the given index in the {@code char} array is in the
8924
* high-surrogate range, the following index is less than the
8925
* length of the {@code char} array, and the
8926
* {@code char} value at the following index is in the
8927
* low-surrogate range, then the supplementary code point
8928
* corresponding to this surrogate pair is returned. Otherwise,
8929
* the {@code char} value at the given index is returned.
8930
*
8931
* @param a the {@code char} array
8932
* @param index the index to the {@code char} values (Unicode
8933
* code units) in the {@code char} array to be converted
8934
* @return the Unicode code point at the given index
8935
* @throws NullPointerException if {@code a} is null.
8936
* @throws IndexOutOfBoundsException if the value
8937
* {@code index} is negative or not less than
8938
* the length of the {@code char} array.
8939
* @since 1.5
8940
*/
8941
public static int codePointAt(char[] a, int index) {
8942
return codePointAtImpl(a, index, a.length);
8943
}
8944
8945
/**
8946
* Returns the code point at the given index of the
8947
* {@code char} array, where only array elements with
8948
* {@code index} less than {@code limit} can be used. If
8949
* the {@code char} value at the given index in the
8950
* {@code char} array is in the high-surrogate range, the
8951
* following index is less than the {@code limit}, and the
8952
* {@code char} value at the following index is in the
8953
* low-surrogate range, then the supplementary code point
8954
* corresponding to this surrogate pair is returned. Otherwise,
8955
* the {@code char} value at the given index is returned.
8956
*
8957
* @param a the {@code char} array
8958
* @param index the index to the {@code char} values (Unicode
8959
* code units) in the {@code char} array to be converted
8960
* @param limit the index after the last array element that
8961
* can be used in the {@code char} array
8962
* @return the Unicode code point at the given index
8963
* @throws NullPointerException if {@code a} is null.
8964
* @throws IndexOutOfBoundsException if the {@code index}
8965
* argument is negative or not less than the {@code limit}
8966
* argument, or if the {@code limit} argument is negative or
8967
* greater than the length of the {@code char} array.
8968
* @since 1.5
8969
*/
8970
public static int codePointAt(char[] a, int index, int limit) {
8971
if (index >= limit || limit < 0 || limit > a.length) {
8972
throw new IndexOutOfBoundsException();
8973
}
8974
return codePointAtImpl(a, index, limit);
8975
}
8976
8977
// throws ArrayIndexOutOfBoundsException if index out of bounds
8978
static int codePointAtImpl(char[] a, int index, int limit) {
8979
char c1 = a[index];
8980
if (isHighSurrogate(c1) && ++index < limit) {
8981
char c2 = a[index];
8982
if (isLowSurrogate(c2)) {
8983
return toCodePoint(c1, c2);
8984
}
8985
}
8986
return c1;
8987
}
8988
8989
/**
8990
* Returns the code point preceding the given index of the
8991
* {@code CharSequence}. If the {@code char} value at
8992
* {@code (index - 1)} in the {@code CharSequence} is in
8993
* the low-surrogate range, {@code (index - 2)} is not
8994
* negative, and the {@code char} value at {@code (index - 2)}
8995
* in the {@code CharSequence} is in the
8996
* high-surrogate range, then the supplementary code point
8997
* corresponding to this surrogate pair is returned. Otherwise,
8998
* the {@code char} value at {@code (index - 1)} is
8999
* returned.
9000
*
9001
* @param seq the {@code CharSequence} instance
9002
* @param index the index following the code point that should be returned
9003
* @return the Unicode code point value before the given index.
9004
* @throws NullPointerException if {@code seq} is null.
9005
* @throws IndexOutOfBoundsException if the {@code index}
9006
* argument is less than 1 or greater than {@link
9007
* CharSequence#length() seq.length()}.
9008
* @since 1.5
9009
*/
9010
public static int codePointBefore(CharSequence seq, int index) {
9011
char c2 = seq.charAt(--index);
9012
if (isLowSurrogate(c2) && index > 0) {
9013
char c1 = seq.charAt(--index);
9014
if (isHighSurrogate(c1)) {
9015
return toCodePoint(c1, c2);
9016
}
9017
}
9018
return c2;
9019
}
9020
9021
/**
9022
* Returns the code point preceding the given index of the
9023
* {@code char} array. If the {@code char} value at
9024
* {@code (index - 1)} in the {@code char} array is in
9025
* the low-surrogate range, {@code (index - 2)} is not
9026
* negative, and the {@code char} value at {@code (index - 2)}
9027
* in the {@code char} array is in the
9028
* high-surrogate range, then the supplementary code point
9029
* corresponding to this surrogate pair is returned. Otherwise,
9030
* the {@code char} value at {@code (index - 1)} is
9031
* returned.
9032
*
9033
* @param a the {@code char} array
9034
* @param index the index following the code point that should be returned
9035
* @return the Unicode code point value before the given index.
9036
* @throws NullPointerException if {@code a} is null.
9037
* @throws IndexOutOfBoundsException if the {@code index}
9038
* argument is less than 1 or greater than the length of the
9039
* {@code char} array
9040
* @since 1.5
9041
*/
9042
public static int codePointBefore(char[] a, int index) {
9043
return codePointBeforeImpl(a, index, 0);
9044
}
9045
9046
/**
9047
* Returns the code point preceding the given index of the
9048
* {@code char} array, where only array elements with
9049
* {@code index} greater than or equal to {@code start}
9050
* can be used. If the {@code char} value at {@code (index - 1)}
9051
* in the {@code char} array is in the
9052
* low-surrogate range, {@code (index - 2)} is not less than
9053
* {@code start}, and the {@code char} value at
9054
* {@code (index - 2)} in the {@code char} array is in
9055
* the high-surrogate range, then the supplementary code point
9056
* corresponding to this surrogate pair is returned. Otherwise,
9057
* the {@code char} value at {@code (index - 1)} is
9058
* returned.
9059
*
9060
* @param a the {@code char} array
9061
* @param index the index following the code point that should be returned
9062
* @param start the index of the first array element in the
9063
* {@code char} array
9064
* @return the Unicode code point value before the given index.
9065
* @throws NullPointerException if {@code a} is null.
9066
* @throws IndexOutOfBoundsException if the {@code index}
9067
* argument is not greater than the {@code start} argument or
9068
* is greater than the length of the {@code char} array, or
9069
* if the {@code start} argument is negative or not less than
9070
* the length of the {@code char} array.
9071
* @since 1.5
9072
*/
9073
public static int codePointBefore(char[] a, int index, int start) {
9074
if (index <= start || start < 0 || start >= a.length) {
9075
throw new IndexOutOfBoundsException();
9076
}
9077
return codePointBeforeImpl(a, index, start);
9078
}
9079
9080
// throws ArrayIndexOutOfBoundsException if index-1 out of bounds
9081
static int codePointBeforeImpl(char[] a, int index, int start) {
9082
char c2 = a[--index];
9083
if (isLowSurrogate(c2) && index > start) {
9084
char c1 = a[--index];
9085
if (isHighSurrogate(c1)) {
9086
return toCodePoint(c1, c2);
9087
}
9088
}
9089
return c2;
9090
}
9091
9092
/**
9093
* Returns the leading surrogate (a
9094
* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
9095
* high surrogate code unit</a>) of the
9096
* <a href="http://www.unicode.org/glossary/#surrogate_pair">
9097
* surrogate pair</a>
9098
* representing the specified supplementary character (Unicode
9099
* code point) in the UTF-16 encoding. If the specified character
9100
* is not a
9101
* <a href="Character.html#supplementary">supplementary character</a>,
9102
* an unspecified {@code char} is returned.
9103
*
9104
* <p>If
9105
* {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
9106
* is {@code true}, then
9107
* {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
9108
* {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
9109
* are also always {@code true}.
9110
*
9111
* @param codePoint a supplementary character (Unicode code point)
9112
* @return the leading surrogate code unit used to represent the
9113
* character in the UTF-16 encoding
9114
* @since 1.7
9115
*/
9116
public static char highSurrogate(int codePoint) {
9117
return (char) ((codePoint >>> 10)
9118
+ (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
9119
}
9120
9121
/**
9122
* Returns the trailing surrogate (a
9123
* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
9124
* low surrogate code unit</a>) of the
9125
* <a href="http://www.unicode.org/glossary/#surrogate_pair">
9126
* surrogate pair</a>
9127
* representing the specified supplementary character (Unicode
9128
* code point) in the UTF-16 encoding. If the specified character
9129
* is not a
9130
* <a href="Character.html#supplementary">supplementary character</a>,
9131
* an unspecified {@code char} is returned.
9132
*
9133
* <p>If
9134
* {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
9135
* is {@code true}, then
9136
* {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
9137
* {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
9138
* are also always {@code true}.
9139
*
9140
* @param codePoint a supplementary character (Unicode code point)
9141
* @return the trailing surrogate code unit used to represent the
9142
* character in the UTF-16 encoding
9143
* @since 1.7
9144
*/
9145
public static char lowSurrogate(int codePoint) {
9146
return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
9147
}
9148
9149
/**
9150
* Converts the specified character (Unicode code point) to its
9151
* UTF-16 representation. If the specified code point is a BMP
9152
* (Basic Multilingual Plane or Plane 0) value, the same value is
9153
* stored in {@code dst[dstIndex]}, and 1 is returned. If the
9154
* specified code point is a supplementary character, its
9155
* surrogate values are stored in {@code dst[dstIndex]}
9156
* (high-surrogate) and {@code dst[dstIndex+1]}
9157
* (low-surrogate), and 2 is returned.
9158
*
9159
* @param codePoint the character (Unicode code point) to be converted.
9160
* @param dst an array of {@code char} in which the
9161
* {@code codePoint}'s UTF-16 value is stored.
9162
* @param dstIndex the start index into the {@code dst}
9163
* array where the converted value is stored.
9164
* @return 1 if the code point is a BMP code point, 2 if the
9165
* code point is a supplementary code point.
9166
* @throws IllegalArgumentException if the specified
9167
* {@code codePoint} is not a valid Unicode code point.
9168
* @throws NullPointerException if the specified {@code dst} is null.
9169
* @throws IndexOutOfBoundsException if {@code dstIndex}
9170
* is negative or not less than {@code dst.length}, or if
9171
* {@code dst} at {@code dstIndex} doesn't have enough
9172
* array element(s) to store the resulting {@code char}
9173
* value(s). (If {@code dstIndex} is equal to
9174
* {@code dst.length-1} and the specified
9175
* {@code codePoint} is a supplementary character, the
9176
* high-surrogate value is not stored in
9177
* {@code dst[dstIndex]}.)
9178
* @since 1.5
9179
*/
9180
public static int toChars(int codePoint, char[] dst, int dstIndex) {
9181
if (isBmpCodePoint(codePoint)) {
9182
dst[dstIndex] = (char) codePoint;
9183
return 1;
9184
} else if (isValidCodePoint(codePoint)) {
9185
toSurrogates(codePoint, dst, dstIndex);
9186
return 2;
9187
} else {
9188
throw new IllegalArgumentException(
9189
String.format("Not a valid Unicode code point: 0x%X", codePoint));
9190
}
9191
}
9192
9193
/**
9194
* Converts the specified character (Unicode code point) to its
9195
* UTF-16 representation stored in a {@code char} array. If
9196
* the specified code point is a BMP (Basic Multilingual Plane or
9197
* Plane 0) value, the resulting {@code char} array has
9198
* the same value as {@code codePoint}. If the specified code
9199
* point is a supplementary code point, the resulting
9200
* {@code char} array has the corresponding surrogate pair.
9201
*
9202
* @param codePoint a Unicode code point
9203
* @return a {@code char} array having
9204
* {@code codePoint}'s UTF-16 representation.
9205
* @throws IllegalArgumentException if the specified
9206
* {@code codePoint} is not a valid Unicode code point.
9207
* @since 1.5
9208
*/
9209
public static char[] toChars(int codePoint) {
9210
if (isBmpCodePoint(codePoint)) {
9211
return new char[] { (char) codePoint };
9212
} else if (isValidCodePoint(codePoint)) {
9213
char[] result = new char[2];
9214
toSurrogates(codePoint, result, 0);
9215
return result;
9216
} else {
9217
throw new IllegalArgumentException(
9218
String.format("Not a valid Unicode code point: 0x%X", codePoint));
9219
}
9220
}
9221
9222
static void toSurrogates(int codePoint, char[] dst, int index) {
9223
// We write elements "backwards" to guarantee all-or-nothing
9224
dst[index+1] = lowSurrogate(codePoint);
9225
dst[index] = highSurrogate(codePoint);
9226
}
9227
9228
/**
9229
* Returns the number of Unicode code points in the text range of
9230
* the specified char sequence. The text range begins at the
9231
* specified {@code beginIndex} and extends to the
9232
* {@code char} at index {@code endIndex - 1}. Thus the
9233
* length (in {@code char}s) of the text range is
9234
* {@code endIndex-beginIndex}. Unpaired surrogates within
9235
* the text range count as one code point each.
9236
*
9237
* @param seq the char sequence
9238
* @param beginIndex the index to the first {@code char} of
9239
* the text range.
9240
* @param endIndex the index after the last {@code char} of
9241
* the text range.
9242
* @return the number of Unicode code points in the specified text
9243
* range
9244
* @throws NullPointerException if {@code seq} is null.
9245
* @throws IndexOutOfBoundsException if the
9246
* {@code beginIndex} is negative, or {@code endIndex}
9247
* is larger than the length of the given sequence, or
9248
* {@code beginIndex} is larger than {@code endIndex}.
9249
* @since 1.5
9250
*/
9251
public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
9252
int length = seq.length();
9253
if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
9254
throw new IndexOutOfBoundsException();
9255
}
9256
int n = endIndex - beginIndex;
9257
for (int i = beginIndex; i < endIndex; ) {
9258
if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
9259
isLowSurrogate(seq.charAt(i))) {
9260
n--;
9261
i++;
9262
}
9263
}
9264
return n;
9265
}
9266
9267
/**
9268
* Returns the number of Unicode code points in a subarray of the
9269
* {@code char} array argument. The {@code offset}
9270
* argument is the index of the first {@code char} of the
9271
* subarray and the {@code count} argument specifies the
9272
* length of the subarray in {@code char}s. Unpaired
9273
* surrogates within the subarray count as one code point each.
9274
*
9275
* @param a the {@code char} array
9276
* @param offset the index of the first {@code char} in the
9277
* given {@code char} array
9278
* @param count the length of the subarray in {@code char}s
9279
* @return the number of Unicode code points in the specified subarray
9280
* @throws NullPointerException if {@code a} is null.
9281
* @throws IndexOutOfBoundsException if {@code offset} or
9282
* {@code count} is negative, or if {@code offset +
9283
* count} is larger than the length of the given array.
9284
* @since 1.5
9285
*/
9286
public static int codePointCount(char[] a, int offset, int count) {
9287
if (count > a.length - offset || offset < 0 || count < 0) {
9288
throw new IndexOutOfBoundsException();
9289
}
9290
return codePointCountImpl(a, offset, count);
9291
}
9292
9293
static int codePointCountImpl(char[] a, int offset, int count) {
9294
int endIndex = offset + count;
9295
int n = count;
9296
for (int i = offset; i < endIndex; ) {
9297
if (isHighSurrogate(a[i++]) && i < endIndex &&
9298
isLowSurrogate(a[i])) {
9299
n--;
9300
i++;
9301
}
9302
}
9303
return n;
9304
}
9305
9306
/**
9307
* Returns the index within the given char sequence that is offset
9308
* from the given {@code index} by {@code codePointOffset}
9309
* code points. Unpaired surrogates within the text range given by
9310
* {@code index} and {@code codePointOffset} count as
9311
* one code point each.
9312
*
9313
* @param seq the char sequence
9314
* @param index the index to be offset
9315
* @param codePointOffset the offset in code points
9316
* @return the index within the char sequence
9317
* @throws NullPointerException if {@code seq} is null.
9318
* @throws IndexOutOfBoundsException if {@code index}
9319
* is negative or larger then the length of the char sequence,
9320
* or if {@code codePointOffset} is positive and the
9321
* subsequence starting with {@code index} has fewer than
9322
* {@code codePointOffset} code points, or if
9323
* {@code codePointOffset} is negative and the subsequence
9324
* before {@code index} has fewer than the absolute value
9325
* of {@code codePointOffset} code points.
9326
* @since 1.5
9327
*/
9328
public static int offsetByCodePoints(CharSequence seq, int index,
9329
int codePointOffset) {
9330
int length = seq.length();
9331
if (index < 0 || index > length) {
9332
throw new IndexOutOfBoundsException();
9333
}
9334
9335
int x = index;
9336
if (codePointOffset >= 0) {
9337
int i;
9338
for (i = 0; x < length && i < codePointOffset; i++) {
9339
if (isHighSurrogate(seq.charAt(x++)) && x < length &&
9340
isLowSurrogate(seq.charAt(x))) {
9341
x++;
9342
}
9343
}
9344
if (i < codePointOffset) {
9345
throw new IndexOutOfBoundsException();
9346
}
9347
} else {
9348
int i;
9349
for (i = codePointOffset; x > 0 && i < 0; i++) {
9350
if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
9351
isHighSurrogate(seq.charAt(x-1))) {
9352
x--;
9353
}
9354
}
9355
if (i < 0) {
9356
throw new IndexOutOfBoundsException();
9357
}
9358
}
9359
return x;
9360
}
9361
9362
/**
9363
* Returns the index within the given {@code char} subarray
9364
* that is offset from the given {@code index} by
9365
* {@code codePointOffset} code points. The
9366
* {@code start} and {@code count} arguments specify a
9367
* subarray of the {@code char} array. Unpaired surrogates
9368
* within the text range given by {@code index} and
9369
* {@code codePointOffset} count as one code point each.
9370
*
9371
* @param a the {@code char} array
9372
* @param start the index of the first {@code char} of the
9373
* subarray
9374
* @param count the length of the subarray in {@code char}s
9375
* @param index the index to be offset
9376
* @param codePointOffset the offset in code points
9377
* @return the index within the subarray
9378
* @throws NullPointerException if {@code a} is null.
9379
* @throws IndexOutOfBoundsException
9380
* if {@code start} or {@code count} is negative,
9381
* or if {@code start + count} is larger than the length of
9382
* the given array,
9383
* or if {@code index} is less than {@code start} or
9384
* larger then {@code start + count},
9385
* or if {@code codePointOffset} is positive and the text range
9386
* starting with {@code index} and ending with {@code start + count - 1}
9387
* has fewer than {@code codePointOffset} code
9388
* points,
9389
* or if {@code codePointOffset} is negative and the text range
9390
* starting with {@code start} and ending with {@code index - 1}
9391
* has fewer than the absolute value of
9392
* {@code codePointOffset} code points.
9393
* @since 1.5
9394
*/
9395
public static int offsetByCodePoints(char[] a, int start, int count,
9396
int index, int codePointOffset) {
9397
if (count > a.length-start || start < 0 || count < 0
9398
|| index < start || index > start+count) {
9399
throw new IndexOutOfBoundsException();
9400
}
9401
return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
9402
}
9403
9404
static int offsetByCodePointsImpl(char[]a, int start, int count,
9405
int index, int codePointOffset) {
9406
int x = index;
9407
if (codePointOffset >= 0) {
9408
int limit = start + count;
9409
int i;
9410
for (i = 0; x < limit && i < codePointOffset; i++) {
9411
if (isHighSurrogate(a[x++]) && x < limit &&
9412
isLowSurrogate(a[x])) {
9413
x++;
9414
}
9415
}
9416
if (i < codePointOffset) {
9417
throw new IndexOutOfBoundsException();
9418
}
9419
} else {
9420
int i;
9421
for (i = codePointOffset; x > start && i < 0; i++) {
9422
if (isLowSurrogate(a[--x]) && x > start &&
9423
isHighSurrogate(a[x-1])) {
9424
x--;
9425
}
9426
}
9427
if (i < 0) {
9428
throw new IndexOutOfBoundsException();
9429
}
9430
}
9431
return x;
9432
}
9433
9434
/**
9435
* Determines if the specified character is a lowercase character.
9436
* <p>
9437
* A character is lowercase if its general category type, provided
9438
* by {@code Character.getType(ch)}, is
9439
* {@code LOWERCASE_LETTER}, or it has contributory property
9440
* Other_Lowercase as defined by the Unicode Standard.
9441
* <p>
9442
* The following are examples of lowercase characters:
9443
* <blockquote><pre>
9444
* a b c d e f g h i j k l m n o p q r s t u v w x y z
9445
* '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
9446
* '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
9447
* '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
9448
* '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
9449
* </pre></blockquote>
9450
* <p> Many other Unicode characters are lowercase too.
9451
*
9452
* <p><b>Note:</b> This method cannot handle <a
9453
* href="#supplementary"> supplementary characters</a>. To support
9454
* all Unicode characters, including supplementary characters, use
9455
* the {@link #isLowerCase(int)} method.
9456
*
9457
* @param ch the character to be tested.
9458
* @return {@code true} if the character is lowercase;
9459
* {@code false} otherwise.
9460
* @see Character#isLowerCase(char)
9461
* @see Character#isTitleCase(char)
9462
* @see Character#toLowerCase(char)
9463
* @see Character#getType(char)
9464
*/
9465
public static boolean isLowerCase(char ch) {
9466
return isLowerCase((int)ch);
9467
}
9468
9469
/**
9470
* Determines if the specified character (Unicode code point) is a
9471
* lowercase character.
9472
* <p>
9473
* A character is lowercase if its general category type, provided
9474
* by {@link Character#getType getType(codePoint)}, is
9475
* {@code LOWERCASE_LETTER}, or it has contributory property
9476
* Other_Lowercase as defined by the Unicode Standard.
9477
* <p>
9478
* The following are examples of lowercase characters:
9479
* <blockquote><pre>
9480
* a b c d e f g h i j k l m n o p q r s t u v w x y z
9481
* '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
9482
* '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
9483
* '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
9484
* '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
9485
* </pre></blockquote>
9486
* <p> Many other Unicode characters are lowercase too.
9487
*
9488
* @param codePoint the character (Unicode code point) to be tested.
9489
* @return {@code true} if the character is lowercase;
9490
* {@code false} otherwise.
9491
* @see Character#isLowerCase(int)
9492
* @see Character#isTitleCase(int)
9493
* @see Character#toLowerCase(int)
9494
* @see Character#getType(int)
9495
* @since 1.5
9496
*/
9497
public static boolean isLowerCase(int codePoint) {
9498
return CharacterData.of(codePoint).isLowerCase(codePoint);
9499
}
9500
9501
/**
9502
* Determines if the specified character is an uppercase character.
9503
* <p>
9504
* A character is uppercase if its general category type, provided by
9505
* {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
9506
* or it has contributory property Other_Uppercase as defined by the Unicode Standard.
9507
* <p>
9508
* The following are examples of uppercase characters:
9509
* <blockquote><pre>
9510
* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
9511
* '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
9512
* '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
9513
* '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
9514
* '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
9515
* </pre></blockquote>
9516
* <p> Many other Unicode characters are uppercase too.
9517
*
9518
* <p><b>Note:</b> This method cannot handle <a
9519
* href="#supplementary"> supplementary characters</a>. To support
9520
* all Unicode characters, including supplementary characters, use
9521
* the {@link #isUpperCase(int)} method.
9522
*
9523
* @param ch the character to be tested.
9524
* @return {@code true} if the character is uppercase;
9525
* {@code false} otherwise.
9526
* @see Character#isLowerCase(char)
9527
* @see Character#isTitleCase(char)
9528
* @see Character#toUpperCase(char)
9529
* @see Character#getType(char)
9530
* @since 1.0
9531
*/
9532
public static boolean isUpperCase(char ch) {
9533
return isUpperCase((int)ch);
9534
}
9535
9536
/**
9537
* Determines if the specified character (Unicode code point) is an uppercase character.
9538
* <p>
9539
* A character is uppercase if its general category type, provided by
9540
* {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
9541
* or it has contributory property Other_Uppercase as defined by the Unicode Standard.
9542
* <p>
9543
* The following are examples of uppercase characters:
9544
* <blockquote><pre>
9545
* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
9546
* '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
9547
* '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
9548
* '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
9549
* '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
9550
* </pre></blockquote>
9551
* <p> Many other Unicode characters are uppercase too.
9552
*
9553
* @param codePoint the character (Unicode code point) to be tested.
9554
* @return {@code true} if the character is uppercase;
9555
* {@code false} otherwise.
9556
* @see Character#isLowerCase(int)
9557
* @see Character#isTitleCase(int)
9558
* @see Character#toUpperCase(int)
9559
* @see Character#getType(int)
9560
* @since 1.5
9561
*/
9562
public static boolean isUpperCase(int codePoint) {
9563
return CharacterData.of(codePoint).isUpperCase(codePoint);
9564
}
9565
9566
/**
9567
* Determines if the specified character is a titlecase character.
9568
* <p>
9569
* A character is a titlecase character if its general
9570
* category type, provided by {@code Character.getType(ch)},
9571
* is {@code TITLECASE_LETTER}.
9572
* <p>
9573
* Some characters look like pairs of Latin letters. For example, there
9574
* is an uppercase letter that looks like "LJ" and has a corresponding
9575
* lowercase letter that looks like "lj". A third form, which looks like "Lj",
9576
* is the appropriate form to use when rendering a word in lowercase
9577
* with initial capitals, as for a book title.
9578
* <p>
9579
* These are some of the Unicode characters for which this method returns
9580
* {@code true}:
9581
* <ul>
9582
* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
9583
* <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
9584
* <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
9585
* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
9586
* </ul>
9587
* <p> Many other Unicode characters are titlecase too.
9588
*
9589
* <p><b>Note:</b> This method cannot handle <a
9590
* href="#supplementary"> supplementary characters</a>. To support
9591
* all Unicode characters, including supplementary characters, use
9592
* the {@link #isTitleCase(int)} method.
9593
*
9594
* @param ch the character to be tested.
9595
* @return {@code true} if the character is titlecase;
9596
* {@code false} otherwise.
9597
* @see Character#isLowerCase(char)
9598
* @see Character#isUpperCase(char)
9599
* @see Character#toTitleCase(char)
9600
* @see Character#getType(char)
9601
* @since 1.0.2
9602
*/
9603
public static boolean isTitleCase(char ch) {
9604
return isTitleCase((int)ch);
9605
}
9606
9607
/**
9608
* Determines if the specified character (Unicode code point) is a titlecase character.
9609
* <p>
9610
* A character is a titlecase character if its general
9611
* category type, provided by {@link Character#getType(int) getType(codePoint)},
9612
* is {@code TITLECASE_LETTER}.
9613
* <p>
9614
* Some characters look like pairs of Latin letters. For example, there
9615
* is an uppercase letter that looks like "LJ" and has a corresponding
9616
* lowercase letter that looks like "lj". A third form, which looks like "Lj",
9617
* is the appropriate form to use when rendering a word in lowercase
9618
* with initial capitals, as for a book title.
9619
* <p>
9620
* These are some of the Unicode characters for which this method returns
9621
* {@code true}:
9622
* <ul>
9623
* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
9624
* <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
9625
* <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
9626
* <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
9627
* </ul>
9628
* <p> Many other Unicode characters are titlecase too.
9629
*
9630
* @param codePoint the character (Unicode code point) to be tested.
9631
* @return {@code true} if the character is titlecase;
9632
* {@code false} otherwise.
9633
* @see Character#isLowerCase(int)
9634
* @see Character#isUpperCase(int)
9635
* @see Character#toTitleCase(int)
9636
* @see Character#getType(int)
9637
* @since 1.5
9638
*/
9639
public static boolean isTitleCase(int codePoint) {
9640
return getType(codePoint) == Character.TITLECASE_LETTER;
9641
}
9642
9643
/**
9644
* Determines if the specified character is a digit.
9645
* <p>
9646
* A character is a digit if its general category type, provided
9647
* by {@code Character.getType(ch)}, is
9648
* {@code DECIMAL_DIGIT_NUMBER}.
9649
* <p>
9650
* Some Unicode character ranges that contain digits:
9651
* <ul>
9652
* <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9653
* ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9654
* <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9655
* Arabic-Indic digits
9656
* <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9657
* Extended Arabic-Indic digits
9658
* <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9659
* Devanagari digits
9660
* <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9661
* Fullwidth digits
9662
* </ul>
9663
*
9664
* Many other character ranges contain digits as well.
9665
*
9666
* <p><b>Note:</b> This method cannot handle <a
9667
* href="#supplementary"> supplementary characters</a>. To support
9668
* all Unicode characters, including supplementary characters, use
9669
* the {@link #isDigit(int)} method.
9670
*
9671
* @param ch the character to be tested.
9672
* @return {@code true} if the character is a digit;
9673
* {@code false} otherwise.
9674
* @see Character#digit(char, int)
9675
* @see Character#forDigit(int, int)
9676
* @see Character#getType(char)
9677
*/
9678
public static boolean isDigit(char ch) {
9679
return isDigit((int)ch);
9680
}
9681
9682
/**
9683
* Determines if the specified character (Unicode code point) is a digit.
9684
* <p>
9685
* A character is a digit if its general category type, provided
9686
* by {@link Character#getType(int) getType(codePoint)}, is
9687
* {@code DECIMAL_DIGIT_NUMBER}.
9688
* <p>
9689
* Some Unicode character ranges that contain digits:
9690
* <ul>
9691
* <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9692
* ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9693
* <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9694
* Arabic-Indic digits
9695
* <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9696
* Extended Arabic-Indic digits
9697
* <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9698
* Devanagari digits
9699
* <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9700
* Fullwidth digits
9701
* </ul>
9702
*
9703
* Many other character ranges contain digits as well.
9704
*
9705
* @param codePoint the character (Unicode code point) to be tested.
9706
* @return {@code true} if the character is a digit;
9707
* {@code false} otherwise.
9708
* @see Character#forDigit(int, int)
9709
* @see Character#getType(int)
9710
* @since 1.5
9711
*/
9712
public static boolean isDigit(int codePoint) {
9713
return CharacterData.of(codePoint).isDigit(codePoint);
9714
}
9715
9716
/**
9717
* Determines if a character is defined in Unicode.
9718
* <p>
9719
* A character is defined if at least one of the following is true:
9720
* <ul>
9721
* <li>It has an entry in the UnicodeData file.
9722
* <li>It has a value in a range defined by the UnicodeData file.
9723
* </ul>
9724
*
9725
* <p><b>Note:</b> This method cannot handle <a
9726
* href="#supplementary"> supplementary characters</a>. To support
9727
* all Unicode characters, including supplementary characters, use
9728
* the {@link #isDefined(int)} method.
9729
*
9730
* @param ch the character to be tested
9731
* @return {@code true} if the character has a defined meaning
9732
* in Unicode; {@code false} otherwise.
9733
* @see Character#isDigit(char)
9734
* @see Character#isLetter(char)
9735
* @see Character#isLetterOrDigit(char)
9736
* @see Character#isLowerCase(char)
9737
* @see Character#isTitleCase(char)
9738
* @see Character#isUpperCase(char)
9739
* @since 1.0.2
9740
*/
9741
public static boolean isDefined(char ch) {
9742
return isDefined((int)ch);
9743
}
9744
9745
/**
9746
* Determines if a character (Unicode code point) is defined in Unicode.
9747
* <p>
9748
* A character is defined if at least one of the following is true:
9749
* <ul>
9750
* <li>It has an entry in the UnicodeData file.
9751
* <li>It has a value in a range defined by the UnicodeData file.
9752
* </ul>
9753
*
9754
* @param codePoint the character (Unicode code point) to be tested.
9755
* @return {@code true} if the character has a defined meaning
9756
* in Unicode; {@code false} otherwise.
9757
* @see Character#isDigit(int)
9758
* @see Character#isLetter(int)
9759
* @see Character#isLetterOrDigit(int)
9760
* @see Character#isLowerCase(int)
9761
* @see Character#isTitleCase(int)
9762
* @see Character#isUpperCase(int)
9763
* @since 1.5
9764
*/
9765
public static boolean isDefined(int codePoint) {
9766
return getType(codePoint) != Character.UNASSIGNED;
9767
}
9768
9769
/**
9770
* Determines if the specified character is a letter.
9771
* <p>
9772
* A character is considered to be a letter if its general
9773
* category type, provided by {@code Character.getType(ch)},
9774
* is any of the following:
9775
* <ul>
9776
* <li> {@code UPPERCASE_LETTER}
9777
* <li> {@code LOWERCASE_LETTER}
9778
* <li> {@code TITLECASE_LETTER}
9779
* <li> {@code MODIFIER_LETTER}
9780
* <li> {@code OTHER_LETTER}
9781
* </ul>
9782
*
9783
* Not all letters have case. Many characters are
9784
* letters but are neither uppercase nor lowercase nor titlecase.
9785
*
9786
* <p><b>Note:</b> This method cannot handle <a
9787
* href="#supplementary"> supplementary characters</a>. To support
9788
* all Unicode characters, including supplementary characters, use
9789
* the {@link #isLetter(int)} method.
9790
*
9791
* @param ch the character to be tested.
9792
* @return {@code true} if the character is a letter;
9793
* {@code false} otherwise.
9794
* @see Character#isDigit(char)
9795
* @see Character#isJavaIdentifierStart(char)
9796
* @see Character#isJavaLetter(char)
9797
* @see Character#isJavaLetterOrDigit(char)
9798
* @see Character#isLetterOrDigit(char)
9799
* @see Character#isLowerCase(char)
9800
* @see Character#isTitleCase(char)
9801
* @see Character#isUnicodeIdentifierStart(char)
9802
* @see Character#isUpperCase(char)
9803
*/
9804
public static boolean isLetter(char ch) {
9805
return isLetter((int)ch);
9806
}
9807
9808
/**
9809
* Determines if the specified character (Unicode code point) is a letter.
9810
* <p>
9811
* A character is considered to be a letter if its general
9812
* category type, provided by {@link Character#getType(int) getType(codePoint)},
9813
* is any of the following:
9814
* <ul>
9815
* <li> {@code UPPERCASE_LETTER}
9816
* <li> {@code LOWERCASE_LETTER}
9817
* <li> {@code TITLECASE_LETTER}
9818
* <li> {@code MODIFIER_LETTER}
9819
* <li> {@code OTHER_LETTER}
9820
* </ul>
9821
*
9822
* Not all letters have case. Many characters are
9823
* letters but are neither uppercase nor lowercase nor titlecase.
9824
*
9825
* @param codePoint the character (Unicode code point) to be tested.
9826
* @return {@code true} if the character is a letter;
9827
* {@code false} otherwise.
9828
* @see Character#isDigit(int)
9829
* @see Character#isJavaIdentifierStart(int)
9830
* @see Character#isLetterOrDigit(int)
9831
* @see Character#isLowerCase(int)
9832
* @see Character#isTitleCase(int)
9833
* @see Character#isUnicodeIdentifierStart(int)
9834
* @see Character#isUpperCase(int)
9835
* @since 1.5
9836
*/
9837
public static boolean isLetter(int codePoint) {
9838
return ((((1 << Character.UPPERCASE_LETTER) |
9839
(1 << Character.LOWERCASE_LETTER) |
9840
(1 << Character.TITLECASE_LETTER) |
9841
(1 << Character.MODIFIER_LETTER) |
9842
(1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
9843
!= 0;
9844
}
9845
9846
/**
9847
* Determines if the specified character is a letter or digit.
9848
* <p>
9849
* A character is considered to be a letter or digit if either
9850
* {@code Character.isLetter(char ch)} or
9851
* {@code Character.isDigit(char ch)} returns
9852
* {@code true} for the character.
9853
*
9854
* <p><b>Note:</b> This method cannot handle <a
9855
* href="#supplementary"> supplementary characters</a>. To support
9856
* all Unicode characters, including supplementary characters, use
9857
* the {@link #isLetterOrDigit(int)} method.
9858
*
9859
* @param ch the character to be tested.
9860
* @return {@code true} if the character is a letter or digit;
9861
* {@code false} otherwise.
9862
* @see Character#isDigit(char)
9863
* @see Character#isJavaIdentifierPart(char)
9864
* @see Character#isJavaLetter(char)
9865
* @see Character#isJavaLetterOrDigit(char)
9866
* @see Character#isLetter(char)
9867
* @see Character#isUnicodeIdentifierPart(char)
9868
* @since 1.0.2
9869
*/
9870
public static boolean isLetterOrDigit(char ch) {
9871
return isLetterOrDigit((int)ch);
9872
}
9873
9874
/**
9875
* Determines if the specified character (Unicode code point) is a letter or digit.
9876
* <p>
9877
* A character is considered to be a letter or digit if either
9878
* {@link #isLetter(int) isLetter(codePoint)} or
9879
* {@link #isDigit(int) isDigit(codePoint)} returns
9880
* {@code true} for the character.
9881
*
9882
* @param codePoint the character (Unicode code point) to be tested.
9883
* @return {@code true} if the character is a letter or digit;
9884
* {@code false} otherwise.
9885
* @see Character#isDigit(int)
9886
* @see Character#isJavaIdentifierPart(int)
9887
* @see Character#isLetter(int)
9888
* @see Character#isUnicodeIdentifierPart(int)
9889
* @since 1.5
9890
*/
9891
public static boolean isLetterOrDigit(int codePoint) {
9892
return ((((1 << Character.UPPERCASE_LETTER) |
9893
(1 << Character.LOWERCASE_LETTER) |
9894
(1 << Character.TITLECASE_LETTER) |
9895
(1 << Character.MODIFIER_LETTER) |
9896
(1 << Character.OTHER_LETTER) |
9897
(1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
9898
!= 0;
9899
}
9900
9901
/**
9902
* Determines if the specified character is permissible as the first
9903
* character in a Java identifier.
9904
* <p>
9905
* A character may start a Java identifier if and only if
9906
* one of the following conditions is true:
9907
* <ul>
9908
* <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9909
* <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9910
* <li> {@code ch} is a currency symbol (such as {@code '$'})
9911
* <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9912
* </ul>
9913
*
9914
* @param ch the character to be tested.
9915
* @return {@code true} if the character may start a Java
9916
* identifier; {@code false} otherwise.
9917
* @see Character#isJavaLetterOrDigit(char)
9918
* @see Character#isJavaIdentifierStart(char)
9919
* @see Character#isJavaIdentifierPart(char)
9920
* @see Character#isLetter(char)
9921
* @see Character#isLetterOrDigit(char)
9922
* @see Character#isUnicodeIdentifierStart(char)
9923
* @since 1.0.2
9924
* @deprecated Replaced by isJavaIdentifierStart(char).
9925
*/
9926
@Deprecated(since="1.1")
9927
public static boolean isJavaLetter(char ch) {
9928
return isJavaIdentifierStart(ch);
9929
}
9930
9931
/**
9932
* Determines if the specified character may be part of a Java
9933
* identifier as other than the first character.
9934
* <p>
9935
* A character may be part of a Java identifier if and only if one
9936
* of the following conditions is true:
9937
* <ul>
9938
* <li> it is a letter
9939
* <li> it is a currency symbol (such as {@code '$'})
9940
* <li> it is a connecting punctuation character (such as {@code '_'})
9941
* <li> it is a digit
9942
* <li> it is a numeric letter (such as a Roman numeral character)
9943
* <li> it is a combining mark
9944
* <li> it is a non-spacing mark
9945
* <li> {@code isIdentifierIgnorable} returns
9946
* {@code true} for the character.
9947
* </ul>
9948
*
9949
* @param ch the character to be tested.
9950
* @return {@code true} if the character may be part of a
9951
* Java identifier; {@code false} otherwise.
9952
* @see Character#isJavaLetter(char)
9953
* @see Character#isJavaIdentifierStart(char)
9954
* @see Character#isJavaIdentifierPart(char)
9955
* @see Character#isLetter(char)
9956
* @see Character#isLetterOrDigit(char)
9957
* @see Character#isUnicodeIdentifierPart(char)
9958
* @see Character#isIdentifierIgnorable(char)
9959
* @since 1.0.2
9960
* @deprecated Replaced by isJavaIdentifierPart(char).
9961
*/
9962
@Deprecated(since="1.1")
9963
public static boolean isJavaLetterOrDigit(char ch) {
9964
return isJavaIdentifierPart(ch);
9965
}
9966
9967
/**
9968
* Determines if the specified character (Unicode code point) is alphabetic.
9969
* <p>
9970
* A character is considered to be alphabetic if its general category type,
9971
* provided by {@link Character#getType(int) getType(codePoint)}, is any of
9972
* the following:
9973
* <ul>
9974
* <li> {@code UPPERCASE_LETTER}
9975
* <li> {@code LOWERCASE_LETTER}
9976
* <li> {@code TITLECASE_LETTER}
9977
* <li> {@code MODIFIER_LETTER}
9978
* <li> {@code OTHER_LETTER}
9979
* <li> {@code LETTER_NUMBER}
9980
* </ul>
9981
* or it has contributory property Other_Alphabetic as defined by the
9982
* Unicode Standard.
9983
*
9984
* @param codePoint the character (Unicode code point) to be tested.
9985
* @return {@code true} if the character is a Unicode alphabet
9986
* character, {@code false} otherwise.
9987
* @since 1.7
9988
*/
9989
public static boolean isAlphabetic(int codePoint) {
9990
return (((((1 << Character.UPPERCASE_LETTER) |
9991
(1 << Character.LOWERCASE_LETTER) |
9992
(1 << Character.TITLECASE_LETTER) |
9993
(1 << Character.MODIFIER_LETTER) |
9994
(1 << Character.OTHER_LETTER) |
9995
(1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
9996
CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
9997
}
9998
9999
/**
10000
* Determines if the specified character (Unicode code point) is a CJKV
10001
* (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
10002
* the Unicode Standard.
10003
*
10004
* @param codePoint the character (Unicode code point) to be tested.
10005
* @return {@code true} if the character is a Unicode ideograph
10006
* character, {@code false} otherwise.
10007
* @since 1.7
10008
*/
10009
public static boolean isIdeographic(int codePoint) {
10010
return CharacterData.of(codePoint).isIdeographic(codePoint);
10011
}
10012
10013
/**
10014
* Determines if the specified character is
10015
* permissible as the first character in a Java identifier.
10016
* <p>
10017
* A character may start a Java identifier if and only if
10018
* one of the following conditions is true:
10019
* <ul>
10020
* <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10021
* <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10022
* <li> {@code ch} is a currency symbol (such as {@code '$'})
10023
* <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10024
* </ul>
10025
*
10026
* <p><b>Note:</b> This method cannot handle <a
10027
* href="#supplementary"> supplementary characters</a>. To support
10028
* all Unicode characters, including supplementary characters, use
10029
* the {@link #isJavaIdentifierStart(int)} method.
10030
*
10031
* @param ch the character to be tested.
10032
* @return {@code true} if the character may start a Java identifier;
10033
* {@code false} otherwise.
10034
* @see Character#isJavaIdentifierPart(char)
10035
* @see Character#isLetter(char)
10036
* @see Character#isUnicodeIdentifierStart(char)
10037
* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10038
* @since 1.1
10039
*/
10040
public static boolean isJavaIdentifierStart(char ch) {
10041
return isJavaIdentifierStart((int)ch);
10042
}
10043
10044
/**
10045
* Determines if the character (Unicode code point) is
10046
* permissible as the first character in a Java identifier.
10047
* <p>
10048
* A character may start a Java identifier if and only if
10049
* one of the following conditions is true:
10050
* <ul>
10051
* <li> {@link #isLetter(int) isLetter(codePoint)}
10052
* returns {@code true}
10053
* <li> {@link #getType(int) getType(codePoint)}
10054
* returns {@code LETTER_NUMBER}
10055
* <li> the referenced character is a currency symbol (such as {@code '$'})
10056
* <li> the referenced character is a connecting punctuation character
10057
* (such as {@code '_'}).
10058
* </ul>
10059
*
10060
* @param codePoint the character (Unicode code point) to be tested.
10061
* @return {@code true} if the character may start a Java identifier;
10062
* {@code false} otherwise.
10063
* @see Character#isJavaIdentifierPart(int)
10064
* @see Character#isLetter(int)
10065
* @see Character#isUnicodeIdentifierStart(int)
10066
* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10067
* @since 1.5
10068
*/
10069
public static boolean isJavaIdentifierStart(int codePoint) {
10070
return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
10071
}
10072
10073
/**
10074
* Determines if the specified character may be part of a Java
10075
* identifier as other than the first character.
10076
* <p>
10077
* A character may be part of a Java identifier if any of the following
10078
* conditions are true:
10079
* <ul>
10080
* <li> it is a letter
10081
* <li> it is a currency symbol (such as {@code '$'})
10082
* <li> it is a connecting punctuation character (such as {@code '_'})
10083
* <li> it is a digit
10084
* <li> it is a numeric letter (such as a Roman numeral character)
10085
* <li> it is a combining mark
10086
* <li> it is a non-spacing mark
10087
* <li> {@code isIdentifierIgnorable} returns
10088
* {@code true} for the character
10089
* </ul>
10090
*
10091
* <p><b>Note:</b> This method cannot handle <a
10092
* href="#supplementary"> supplementary characters</a>. To support
10093
* all Unicode characters, including supplementary characters, use
10094
* the {@link #isJavaIdentifierPart(int)} method.
10095
*
10096
* @param ch the character to be tested.
10097
* @return {@code true} if the character may be part of a
10098
* Java identifier; {@code false} otherwise.
10099
* @see Character#isIdentifierIgnorable(char)
10100
* @see Character#isJavaIdentifierStart(char)
10101
* @see Character#isLetterOrDigit(char)
10102
* @see Character#isUnicodeIdentifierPart(char)
10103
* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10104
* @since 1.1
10105
*/
10106
public static boolean isJavaIdentifierPart(char ch) {
10107
return isJavaIdentifierPart((int)ch);
10108
}
10109
10110
/**
10111
* Determines if the character (Unicode code point) may be part of a Java
10112
* identifier as other than the first character.
10113
* <p>
10114
* A character may be part of a Java identifier if any of the following
10115
* conditions are true:
10116
* <ul>
10117
* <li> it is a letter
10118
* <li> it is a currency symbol (such as {@code '$'})
10119
* <li> it is a connecting punctuation character (such as {@code '_'})
10120
* <li> it is a digit
10121
* <li> it is a numeric letter (such as a Roman numeral character)
10122
* <li> it is a combining mark
10123
* <li> it is a non-spacing mark
10124
* <li> {@link #isIdentifierIgnorable(int)
10125
* isIdentifierIgnorable(codePoint)} returns {@code true} for
10126
* the code point
10127
* </ul>
10128
*
10129
* @param codePoint the character (Unicode code point) to be tested.
10130
* @return {@code true} if the character may be part of a
10131
* Java identifier; {@code false} otherwise.
10132
* @see Character#isIdentifierIgnorable(int)
10133
* @see Character#isJavaIdentifierStart(int)
10134
* @see Character#isLetterOrDigit(int)
10135
* @see Character#isUnicodeIdentifierPart(int)
10136
* @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10137
* @since 1.5
10138
*/
10139
public static boolean isJavaIdentifierPart(int codePoint) {
10140
return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
10141
}
10142
10143
/**
10144
* Determines if the specified character is permissible as the
10145
* first character in a Unicode identifier.
10146
* <p>
10147
* A character may start a Unicode identifier if and only if
10148
* one of the following conditions is true:
10149
* <ul>
10150
* <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10151
* <li> {@link #getType(char) getType(ch)} returns
10152
* {@code LETTER_NUMBER}.
10153
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10154
* {@code Other_ID_Start}</a> character.
10155
* </ul>
10156
* <p>
10157
* This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10158
* UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10159
* with the following profile of UAX31:
10160
* <pre>
10161
* Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
10162
* </pre>
10163
* {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
10164
* compatibility.
10165
*
10166
* <p><b>Note:</b> This method cannot handle <a
10167
* href="#supplementary"> supplementary characters</a>. To support
10168
* all Unicode characters, including supplementary characters, use
10169
* the {@link #isUnicodeIdentifierStart(int)} method.
10170
*
10171
* @param ch the character to be tested.
10172
* @return {@code true} if the character may start a Unicode
10173
* identifier; {@code false} otherwise.
10174
* @see Character#isJavaIdentifierStart(char)
10175
* @see Character#isLetter(char)
10176
* @see Character#isUnicodeIdentifierPart(char)
10177
* @since 1.1
10178
*/
10179
public static boolean isUnicodeIdentifierStart(char ch) {
10180
return isUnicodeIdentifierStart((int)ch);
10181
}
10182
10183
/**
10184
* Determines if the specified character (Unicode code point) is permissible as the
10185
* first character in a Unicode identifier.
10186
* <p>
10187
* A character may start a Unicode identifier if and only if
10188
* one of the following conditions is true:
10189
* <ul>
10190
* <li> {@link #isLetter(int) isLetter(codePoint)}
10191
* returns {@code true}
10192
* <li> {@link #getType(int) getType(codePoint)}
10193
* returns {@code LETTER_NUMBER}.
10194
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10195
* {@code Other_ID_Start}</a> character.
10196
* </ul>
10197
* <p>
10198
* This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10199
* UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10200
* with the following profile of UAX31:
10201
* <pre>
10202
* Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
10203
* </pre>
10204
* {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
10205
* compatibility.
10206
*
10207
* @param codePoint the character (Unicode code point) to be tested.
10208
* @return {@code true} if the character may start a Unicode
10209
* identifier; {@code false} otherwise.
10210
* @see Character#isJavaIdentifierStart(int)
10211
* @see Character#isLetter(int)
10212
* @see Character#isUnicodeIdentifierPart(int)
10213
* @since 1.5
10214
*/
10215
public static boolean isUnicodeIdentifierStart(int codePoint) {
10216
return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
10217
}
10218
10219
/**
10220
* Determines if the specified character may be part of a Unicode
10221
* identifier as other than the first character.
10222
* <p>
10223
* A character may be part of a Unicode identifier if and only if
10224
* one of the following statements is true:
10225
* <ul>
10226
* <li> it is a letter
10227
* <li> it is a connecting punctuation character (such as {@code '_'})
10228
* <li> it is a digit
10229
* <li> it is a numeric letter (such as a Roman numeral character)
10230
* <li> it is a combining mark
10231
* <li> it is a non-spacing mark
10232
* <li> {@code isIdentifierIgnorable} returns
10233
* {@code true} for this character.
10234
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10235
* {@code Other_ID_Start}</a> character.
10236
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
10237
* {@code Other_ID_Continue}</a> character.
10238
* </ul>
10239
* <p>
10240
* This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10241
* UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10242
* with the following profile of UAX31:
10243
* <pre>
10244
* Continue := Start + ID_Continue + ignorable
10245
* Medial := empty
10246
* ignorable := isIdentifierIgnorable(char) returns true for the character
10247
* </pre>
10248
* {@code ignorable} is added to {@code Continue} for backward
10249
* compatibility.
10250
*
10251
* <p><b>Note:</b> This method cannot handle <a
10252
* href="#supplementary"> supplementary characters</a>. To support
10253
* all Unicode characters, including supplementary characters, use
10254
* the {@link #isUnicodeIdentifierPart(int)} method.
10255
*
10256
* @param ch the character to be tested.
10257
* @return {@code true} if the character may be part of a
10258
* Unicode identifier; {@code false} otherwise.
10259
* @see Character#isIdentifierIgnorable(char)
10260
* @see Character#isJavaIdentifierPart(char)
10261
* @see Character#isLetterOrDigit(char)
10262
* @see Character#isUnicodeIdentifierStart(char)
10263
* @since 1.1
10264
*/
10265
public static boolean isUnicodeIdentifierPart(char ch) {
10266
return isUnicodeIdentifierPart((int)ch);
10267
}
10268
10269
/**
10270
* Determines if the specified character (Unicode code point) may be part of a Unicode
10271
* identifier as other than the first character.
10272
* <p>
10273
* A character may be part of a Unicode identifier if and only if
10274
* one of the following statements is true:
10275
* <ul>
10276
* <li> it is a letter
10277
* <li> it is a connecting punctuation character (such as {@code '_'})
10278
* <li> it is a digit
10279
* <li> it is a numeric letter (such as a Roman numeral character)
10280
* <li> it is a combining mark
10281
* <li> it is a non-spacing mark
10282
* <li> {@code isIdentifierIgnorable} returns
10283
* {@code true} for this character.
10284
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10285
* {@code Other_ID_Start}</a> character.
10286
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
10287
* {@code Other_ID_Continue}</a> character.
10288
* </ul>
10289
* <p>
10290
* This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10291
* UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10292
* with the following profile of UAX31:
10293
* <pre>
10294
* Continue := Start + ID_Continue + ignorable
10295
* Medial := empty
10296
* ignorable := isIdentifierIgnorable(int) returns true for the character
10297
* </pre>
10298
* {@code ignorable} is added to {@code Continue} for backward
10299
* compatibility.
10300
*
10301
* @param codePoint the character (Unicode code point) to be tested.
10302
* @return {@code true} if the character may be part of a
10303
* Unicode identifier; {@code false} otherwise.
10304
* @see Character#isIdentifierIgnorable(int)
10305
* @see Character#isJavaIdentifierPart(int)
10306
* @see Character#isLetterOrDigit(int)
10307
* @see Character#isUnicodeIdentifierStart(int)
10308
* @since 1.5
10309
*/
10310
public static boolean isUnicodeIdentifierPart(int codePoint) {
10311
return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
10312
}
10313
10314
/**
10315
* Determines if the specified character should be regarded as
10316
* an ignorable character in a Java identifier or a Unicode identifier.
10317
* <p>
10318
* The following Unicode characters are ignorable in a Java identifier
10319
* or a Unicode identifier:
10320
* <ul>
10321
* <li>ISO control characters that are not whitespace
10322
* <ul>
10323
* <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
10324
* <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
10325
* <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
10326
* </ul>
10327
*
10328
* <li>all characters that have the {@code FORMAT} general
10329
* category value
10330
* </ul>
10331
*
10332
* <p><b>Note:</b> This method cannot handle <a
10333
* href="#supplementary"> supplementary characters</a>. To support
10334
* all Unicode characters, including supplementary characters, use
10335
* the {@link #isIdentifierIgnorable(int)} method.
10336
*
10337
* @param ch the character to be tested.
10338
* @return {@code true} if the character is an ignorable control
10339
* character that may be part of a Java or Unicode identifier;
10340
* {@code false} otherwise.
10341
* @see Character#isJavaIdentifierPart(char)
10342
* @see Character#isUnicodeIdentifierPart(char)
10343
* @since 1.1
10344
*/
10345
public static boolean isIdentifierIgnorable(char ch) {
10346
return isIdentifierIgnorable((int)ch);
10347
}
10348
10349
/**
10350
* Determines if the specified character (Unicode code point) should be regarded as
10351
* an ignorable character in a Java identifier or a Unicode identifier.
10352
* <p>
10353
* The following Unicode characters are ignorable in a Java identifier
10354
* or a Unicode identifier:
10355
* <ul>
10356
* <li>ISO control characters that are not whitespace
10357
* <ul>
10358
* <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
10359
* <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
10360
* <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
10361
* </ul>
10362
*
10363
* <li>all characters that have the {@code FORMAT} general
10364
* category value
10365
* </ul>
10366
*
10367
* @param codePoint the character (Unicode code point) to be tested.
10368
* @return {@code true} if the character is an ignorable control
10369
* character that may be part of a Java or Unicode identifier;
10370
* {@code false} otherwise.
10371
* @see Character#isJavaIdentifierPart(int)
10372
* @see Character#isUnicodeIdentifierPart(int)
10373
* @since 1.5
10374
*/
10375
public static boolean isIdentifierIgnorable(int codePoint) {
10376
return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
10377
}
10378
10379
/**
10380
* Converts the character argument to lowercase using case
10381
* mapping information from the UnicodeData file.
10382
* <p>
10383
* Note that
10384
* {@code Character.isLowerCase(Character.toLowerCase(ch))}
10385
* does not always return {@code true} for some ranges of
10386
* characters, particularly those that are symbols or ideographs.
10387
*
10388
* <p>In general, {@link String#toLowerCase()} should be used to map
10389
* characters to lowercase. {@code String} case mapping methods
10390
* have several benefits over {@code Character} case mapping methods.
10391
* {@code String} case mapping methods can perform locale-sensitive
10392
* mappings, context-sensitive mappings, and 1:M character mappings, whereas
10393
* the {@code Character} case mapping methods cannot.
10394
*
10395
* <p><b>Note:</b> This method cannot handle <a
10396
* href="#supplementary"> supplementary characters</a>. To support
10397
* all Unicode characters, including supplementary characters, use
10398
* the {@link #toLowerCase(int)} method.
10399
*
10400
* @param ch the character to be converted.
10401
* @return the lowercase equivalent of the character, if any;
10402
* otherwise, the character itself.
10403
* @see Character#isLowerCase(char)
10404
* @see String#toLowerCase()
10405
*/
10406
public static char toLowerCase(char ch) {
10407
return (char)toLowerCase((int)ch);
10408
}
10409
10410
/**
10411
* Converts the character (Unicode code point) argument to
10412
* lowercase using case mapping information from the UnicodeData
10413
* file.
10414
*
10415
* <p> Note that
10416
* {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
10417
* does not always return {@code true} for some ranges of
10418
* characters, particularly those that are symbols or ideographs.
10419
*
10420
* <p>In general, {@link String#toLowerCase()} should be used to map
10421
* characters to lowercase. {@code String} case mapping methods
10422
* have several benefits over {@code Character} case mapping methods.
10423
* {@code String} case mapping methods can perform locale-sensitive
10424
* mappings, context-sensitive mappings, and 1:M character mappings, whereas
10425
* the {@code Character} case mapping methods cannot.
10426
*
10427
* @param codePoint the character (Unicode code point) to be converted.
10428
* @return the lowercase equivalent of the character (Unicode code
10429
* point), if any; otherwise, the character itself.
10430
* @see Character#isLowerCase(int)
10431
* @see String#toLowerCase()
10432
*
10433
* @since 1.5
10434
*/
10435
public static int toLowerCase(int codePoint) {
10436
return CharacterData.of(codePoint).toLowerCase(codePoint);
10437
}
10438
10439
/**
10440
* Converts the character argument to uppercase using case mapping
10441
* information from the UnicodeData file.
10442
* <p>
10443
* Note that
10444
* {@code Character.isUpperCase(Character.toUpperCase(ch))}
10445
* does not always return {@code true} for some ranges of
10446
* characters, particularly those that are symbols or ideographs.
10447
*
10448
* <p>In general, {@link String#toUpperCase()} should be used to map
10449
* characters to uppercase. {@code String} case mapping methods
10450
* have several benefits over {@code Character} case mapping methods.
10451
* {@code String} case mapping methods can perform locale-sensitive
10452
* mappings, context-sensitive mappings, and 1:M character mappings, whereas
10453
* the {@code Character} case mapping methods cannot.
10454
*
10455
* <p><b>Note:</b> This method cannot handle <a
10456
* href="#supplementary"> supplementary characters</a>. To support
10457
* all Unicode characters, including supplementary characters, use
10458
* the {@link #toUpperCase(int)} method.
10459
*
10460
* @param ch the character to be converted.
10461
* @return the uppercase equivalent of the character, if any;
10462
* otherwise, the character itself.
10463
* @see Character#isUpperCase(char)
10464
* @see String#toUpperCase()
10465
*/
10466
public static char toUpperCase(char ch) {
10467
return (char)toUpperCase((int)ch);
10468
}
10469
10470
/**
10471
* Converts the character (Unicode code point) argument to
10472
* uppercase using case mapping information from the UnicodeData
10473
* file.
10474
*
10475
* <p>Note that
10476
* {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
10477
* does not always return {@code true} for some ranges of
10478
* characters, particularly those that are symbols or ideographs.
10479
*
10480
* <p>In general, {@link String#toUpperCase()} should be used to map
10481
* characters to uppercase. {@code String} case mapping methods
10482
* have several benefits over {@code Character} case mapping methods.
10483
* {@code String} case mapping methods can perform locale-sensitive
10484
* mappings, context-sensitive mappings, and 1:M character mappings, whereas
10485
* the {@code Character} case mapping methods cannot.
10486
*
10487
* @param codePoint the character (Unicode code point) to be converted.
10488
* @return the uppercase equivalent of the character, if any;
10489
* otherwise, the character itself.
10490
* @see Character#isUpperCase(int)
10491
* @see String#toUpperCase()
10492
*
10493
* @since 1.5
10494
*/
10495
public static int toUpperCase(int codePoint) {
10496
return CharacterData.of(codePoint).toUpperCase(codePoint);
10497
}
10498
10499
/**
10500
* Converts the character argument to titlecase using case mapping
10501
* information from the UnicodeData file. If a character has no
10502
* explicit titlecase mapping and is not itself a titlecase char
10503
* according to UnicodeData, then the uppercase mapping is
10504
* returned as an equivalent titlecase mapping. If the
10505
* {@code char} argument is already a titlecase
10506
* {@code char}, the same {@code char} value will be
10507
* returned.
10508
* <p>
10509
* Note that
10510
* {@code Character.isTitleCase(Character.toTitleCase(ch))}
10511
* does not always return {@code true} for some ranges of
10512
* characters.
10513
*
10514
* <p><b>Note:</b> This method cannot handle <a
10515
* href="#supplementary"> supplementary characters</a>. To support
10516
* all Unicode characters, including supplementary characters, use
10517
* the {@link #toTitleCase(int)} method.
10518
*
10519
* @param ch the character to be converted.
10520
* @return the titlecase equivalent of the character, if any;
10521
* otherwise, the character itself.
10522
* @see Character#isTitleCase(char)
10523
* @see Character#toLowerCase(char)
10524
* @see Character#toUpperCase(char)
10525
* @since 1.0.2
10526
*/
10527
public static char toTitleCase(char ch) {
10528
return (char)toTitleCase((int)ch);
10529
}
10530
10531
/**
10532
* Converts the character (Unicode code point) argument to titlecase using case mapping
10533
* information from the UnicodeData file. If a character has no
10534
* explicit titlecase mapping and is not itself a titlecase char
10535
* according to UnicodeData, then the uppercase mapping is
10536
* returned as an equivalent titlecase mapping. If the
10537
* character argument is already a titlecase
10538
* character, the same character value will be
10539
* returned.
10540
*
10541
* <p>Note that
10542
* {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
10543
* does not always return {@code true} for some ranges of
10544
* characters.
10545
*
10546
* @param codePoint the character (Unicode code point) to be converted.
10547
* @return the titlecase equivalent of the character, if any;
10548
* otherwise, the character itself.
10549
* @see Character#isTitleCase(int)
10550
* @see Character#toLowerCase(int)
10551
* @see Character#toUpperCase(int)
10552
* @since 1.5
10553
*/
10554
public static int toTitleCase(int codePoint) {
10555
return CharacterData.of(codePoint).toTitleCase(codePoint);
10556
}
10557
10558
/**
10559
* Returns the numeric value of the character {@code ch} in the
10560
* specified radix.
10561
* <p>
10562
* If the radix is not in the range {@code MIN_RADIX} &le;
10563
* {@code radix} &le; {@code MAX_RADIX} or if the
10564
* value of {@code ch} is not a valid digit in the specified
10565
* radix, {@code -1} is returned. A character is a valid digit
10566
* if at least one of the following is true:
10567
* <ul>
10568
* <li>The method {@code isDigit} is {@code true} of the character
10569
* and the Unicode decimal digit value of the character (or its
10570
* single-character decomposition) is less than the specified radix.
10571
* In this case the decimal digit value is returned.
10572
* <li>The character is one of the uppercase Latin letters
10573
* {@code 'A'} through {@code 'Z'} and its code is less than
10574
* {@code radix + 'A' - 10}.
10575
* In this case, {@code ch - 'A' + 10}
10576
* is returned.
10577
* <li>The character is one of the lowercase Latin letters
10578
* {@code 'a'} through {@code 'z'} and its code is less than
10579
* {@code radix + 'a' - 10}.
10580
* In this case, {@code ch - 'a' + 10}
10581
* is returned.
10582
* <li>The character is one of the fullwidth uppercase Latin letters A
10583
* ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
10584
* and its code is less than
10585
* {@code radix + '\u005CuFF21' - 10}.
10586
* In this case, {@code ch - '\u005CuFF21' + 10}
10587
* is returned.
10588
* <li>The character is one of the fullwidth lowercase Latin letters a
10589
* ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
10590
* and its code is less than
10591
* {@code radix + '\u005CuFF41' - 10}.
10592
* In this case, {@code ch - '\u005CuFF41' + 10}
10593
* is returned.
10594
* </ul>
10595
*
10596
* <p><b>Note:</b> This method cannot handle <a
10597
* href="#supplementary"> supplementary characters</a>. To support
10598
* all Unicode characters, including supplementary characters, use
10599
* the {@link #digit(int, int)} method.
10600
*
10601
* @param ch the character to be converted.
10602
* @param radix the radix.
10603
* @return the numeric value represented by the character in the
10604
* specified radix.
10605
* @see Character#forDigit(int, int)
10606
* @see Character#isDigit(char)
10607
*/
10608
public static int digit(char ch, int radix) {
10609
return digit((int)ch, radix);
10610
}
10611
10612
/**
10613
* Returns the numeric value of the specified character (Unicode
10614
* code point) in the specified radix.
10615
*
10616
* <p>If the radix is not in the range {@code MIN_RADIX} &le;
10617
* {@code radix} &le; {@code MAX_RADIX} or if the
10618
* character is not a valid digit in the specified
10619
* radix, {@code -1} is returned. A character is a valid digit
10620
* if at least one of the following is true:
10621
* <ul>
10622
* <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
10623
* and the Unicode decimal digit value of the character (or its
10624
* single-character decomposition) is less than the specified radix.
10625
* In this case the decimal digit value is returned.
10626
* <li>The character is one of the uppercase Latin letters
10627
* {@code 'A'} through {@code 'Z'} and its code is less than
10628
* {@code radix + 'A' - 10}.
10629
* In this case, {@code codePoint - 'A' + 10}
10630
* is returned.
10631
* <li>The character is one of the lowercase Latin letters
10632
* {@code 'a'} through {@code 'z'} and its code is less than
10633
* {@code radix + 'a' - 10}.
10634
* In this case, {@code codePoint - 'a' + 10}
10635
* is returned.
10636
* <li>The character is one of the fullwidth uppercase Latin letters A
10637
* ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
10638
* and its code is less than
10639
* {@code radix + '\u005CuFF21' - 10}.
10640
* In this case,
10641
* {@code codePoint - '\u005CuFF21' + 10}
10642
* is returned.
10643
* <li>The character is one of the fullwidth lowercase Latin letters a
10644
* ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
10645
* and its code is less than
10646
* {@code radix + '\u005CuFF41'- 10}.
10647
* In this case,
10648
* {@code codePoint - '\u005CuFF41' + 10}
10649
* is returned.
10650
* </ul>
10651
*
10652
* @param codePoint the character (Unicode code point) to be converted.
10653
* @param radix the radix.
10654
* @return the numeric value represented by the character in the
10655
* specified radix.
10656
* @see Character#forDigit(int, int)
10657
* @see Character#isDigit(int)
10658
* @since 1.5
10659
*/
10660
public static int digit(int codePoint, int radix) {
10661
return CharacterData.of(codePoint).digit(codePoint, radix);
10662
}
10663
10664
/**
10665
* Returns the {@code int} value that the specified Unicode
10666
* character represents. For example, the character
10667
* {@code '\u005Cu216C'} (the roman numeral fifty) will return
10668
* an int with a value of 50.
10669
* <p>
10670
* The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
10671
* {@code '\u005Cu005A'}), lowercase
10672
* ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
10673
* full width variant ({@code '\u005CuFF21'} through
10674
* {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
10675
* {@code '\u005CuFF5A'}) forms have numeric values from 10
10676
* through 35. This is independent of the Unicode specification,
10677
* which does not assign numeric values to these {@code char}
10678
* values.
10679
* <p>
10680
* If the character does not have a numeric value, then -1 is returned.
10681
* If the character has a numeric value that cannot be represented as a
10682
* nonnegative integer (for example, a fractional value), then -2
10683
* is returned.
10684
*
10685
* <p><b>Note:</b> This method cannot handle <a
10686
* href="#supplementary"> supplementary characters</a>. To support
10687
* all Unicode characters, including supplementary characters, use
10688
* the {@link #getNumericValue(int)} method.
10689
*
10690
* @param ch the character to be converted.
10691
* @return the numeric value of the character, as a nonnegative {@code int}
10692
* value; -2 if the character has a numeric value but the value
10693
* can not be represented as a nonnegative {@code int} value;
10694
* -1 if the character has no numeric value.
10695
* @see Character#forDigit(int, int)
10696
* @see Character#isDigit(char)
10697
* @since 1.1
10698
*/
10699
public static int getNumericValue(char ch) {
10700
return getNumericValue((int)ch);
10701
}
10702
10703
/**
10704
* Returns the {@code int} value that the specified
10705
* character (Unicode code point) represents. For example, the character
10706
* {@code '\u005Cu216C'} (the Roman numeral fifty) will return
10707
* an {@code int} with a value of 50.
10708
* <p>
10709
* The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
10710
* {@code '\u005Cu005A'}), lowercase
10711
* ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
10712
* full width variant ({@code '\u005CuFF21'} through
10713
* {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
10714
* {@code '\u005CuFF5A'}) forms have numeric values from 10
10715
* through 35. This is independent of the Unicode specification,
10716
* which does not assign numeric values to these {@code char}
10717
* values.
10718
* <p>
10719
* If the character does not have a numeric value, then -1 is returned.
10720
* If the character has a numeric value that cannot be represented as a
10721
* nonnegative integer (for example, a fractional value), then -2
10722
* is returned.
10723
*
10724
* @param codePoint the character (Unicode code point) to be converted.
10725
* @return the numeric value of the character, as a nonnegative {@code int}
10726
* value; -2 if the character has a numeric value but the value
10727
* can not be represented as a nonnegative {@code int} value;
10728
* -1 if the character has no numeric value.
10729
* @see Character#forDigit(int, int)
10730
* @see Character#isDigit(int)
10731
* @since 1.5
10732
*/
10733
public static int getNumericValue(int codePoint) {
10734
return CharacterData.of(codePoint).getNumericValue(codePoint);
10735
}
10736
10737
/**
10738
* Determines if the specified character is ISO-LATIN-1 white space.
10739
* This method returns {@code true} for the following five
10740
* characters only:
10741
* <table class="striped">
10742
* <caption style="display:none">truechars</caption>
10743
* <thead>
10744
* <tr><th scope="col">Character
10745
* <th scope="col">Code
10746
* <th scope="col">Name
10747
* </thead>
10748
* <tbody>
10749
* <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td>
10750
* <td>{@code HORIZONTAL TABULATION}</td></tr>
10751
* <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td>
10752
* <td>{@code NEW LINE}</td></tr>
10753
* <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td>
10754
* <td>{@code FORM FEED}</td></tr>
10755
* <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td>
10756
* <td>{@code CARRIAGE RETURN}</td></tr>
10757
* <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td>
10758
* <td>{@code SPACE}</td></tr>
10759
* </tbody>
10760
* </table>
10761
*
10762
* @param ch the character to be tested.
10763
* @return {@code true} if the character is ISO-LATIN-1 white
10764
* space; {@code false} otherwise.
10765
* @see Character#isSpaceChar(char)
10766
* @see Character#isWhitespace(char)
10767
* @deprecated Replaced by isWhitespace(char).
10768
*/
10769
@Deprecated(since="1.1")
10770
public static boolean isSpace(char ch) {
10771
return (ch <= 0x0020) &&
10772
(((((1L << 0x0009) |
10773
(1L << 0x000A) |
10774
(1L << 0x000C) |
10775
(1L << 0x000D) |
10776
(1L << 0x0020)) >> ch) & 1L) != 0);
10777
}
10778
10779
10780
/**
10781
* Determines if the specified character is a Unicode space character.
10782
* A character is considered to be a space character if and only if
10783
* it is specified to be a space character by the Unicode Standard. This
10784
* method returns true if the character's general category type is any of
10785
* the following:
10786
* <ul>
10787
* <li> {@code SPACE_SEPARATOR}
10788
* <li> {@code LINE_SEPARATOR}
10789
* <li> {@code PARAGRAPH_SEPARATOR}
10790
* </ul>
10791
*
10792
* <p><b>Note:</b> This method cannot handle <a
10793
* href="#supplementary"> supplementary characters</a>. To support
10794
* all Unicode characters, including supplementary characters, use
10795
* the {@link #isSpaceChar(int)} method.
10796
*
10797
* @param ch the character to be tested.
10798
* @return {@code true} if the character is a space character;
10799
* {@code false} otherwise.
10800
* @see Character#isWhitespace(char)
10801
* @since 1.1
10802
*/
10803
public static boolean isSpaceChar(char ch) {
10804
return isSpaceChar((int)ch);
10805
}
10806
10807
/**
10808
* Determines if the specified character (Unicode code point) is a
10809
* Unicode space character. A character is considered to be a
10810
* space character if and only if it is specified to be a space
10811
* character by the Unicode Standard. This method returns true if
10812
* the character's general category type is any of the following:
10813
*
10814
* <ul>
10815
* <li> {@link #SPACE_SEPARATOR}
10816
* <li> {@link #LINE_SEPARATOR}
10817
* <li> {@link #PARAGRAPH_SEPARATOR}
10818
* </ul>
10819
*
10820
* @param codePoint the character (Unicode code point) to be tested.
10821
* @return {@code true} if the character is a space character;
10822
* {@code false} otherwise.
10823
* @see Character#isWhitespace(int)
10824
* @since 1.5
10825
*/
10826
public static boolean isSpaceChar(int codePoint) {
10827
return ((((1 << Character.SPACE_SEPARATOR) |
10828
(1 << Character.LINE_SEPARATOR) |
10829
(1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
10830
!= 0;
10831
}
10832
10833
/**
10834
* Determines if the specified character is white space according to Java.
10835
* A character is a Java whitespace character if and only if it satisfies
10836
* one of the following criteria:
10837
* <ul>
10838
* <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
10839
* {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
10840
* but is not also a non-breaking space ({@code '\u005Cu00A0'},
10841
* {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10842
* <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10843
* <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10844
* <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10845
* <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10846
* <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10847
* <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10848
* <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10849
* <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10850
* <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10851
* </ul>
10852
*
10853
* <p><b>Note:</b> This method cannot handle <a
10854
* href="#supplementary"> supplementary characters</a>. To support
10855
* all Unicode characters, including supplementary characters, use
10856
* the {@link #isWhitespace(int)} method.
10857
*
10858
* @param ch the character to be tested.
10859
* @return {@code true} if the character is a Java whitespace
10860
* character; {@code false} otherwise.
10861
* @see Character#isSpaceChar(char)
10862
* @since 1.1
10863
*/
10864
public static boolean isWhitespace(char ch) {
10865
return isWhitespace((int)ch);
10866
}
10867
10868
/**
10869
* Determines if the specified character (Unicode code point) is
10870
* white space according to Java. A character is a Java
10871
* whitespace character if and only if it satisfies one of the
10872
* following criteria:
10873
* <ul>
10874
* <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
10875
* {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
10876
* but is not also a non-breaking space ({@code '\u005Cu00A0'},
10877
* {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10878
* <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10879
* <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10880
* <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10881
* <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10882
* <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10883
* <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10884
* <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10885
* <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10886
* <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10887
* </ul>
10888
*
10889
* @param codePoint the character (Unicode code point) to be tested.
10890
* @return {@code true} if the character is a Java whitespace
10891
* character; {@code false} otherwise.
10892
* @see Character#isSpaceChar(int)
10893
* @since 1.5
10894
*/
10895
public static boolean isWhitespace(int codePoint) {
10896
return CharacterData.of(codePoint).isWhitespace(codePoint);
10897
}
10898
10899
/**
10900
* Determines if the specified character is an ISO control
10901
* character. A character is considered to be an ISO control
10902
* character if its code is in the range {@code '\u005Cu0000'}
10903
* through {@code '\u005Cu001F'} or in the range
10904
* {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10905
*
10906
* <p><b>Note:</b> This method cannot handle <a
10907
* href="#supplementary"> supplementary characters</a>. To support
10908
* all Unicode characters, including supplementary characters, use
10909
* the {@link #isISOControl(int)} method.
10910
*
10911
* @param ch the character to be tested.
10912
* @return {@code true} if the character is an ISO control character;
10913
* {@code false} otherwise.
10914
*
10915
* @see Character#isSpaceChar(char)
10916
* @see Character#isWhitespace(char)
10917
* @since 1.1
10918
*/
10919
public static boolean isISOControl(char ch) {
10920
return isISOControl((int)ch);
10921
}
10922
10923
/**
10924
* Determines if the referenced character (Unicode code point) is an ISO control
10925
* character. A character is considered to be an ISO control
10926
* character if its code is in the range {@code '\u005Cu0000'}
10927
* through {@code '\u005Cu001F'} or in the range
10928
* {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10929
*
10930
* @param codePoint the character (Unicode code point) to be tested.
10931
* @return {@code true} if the character is an ISO control character;
10932
* {@code false} otherwise.
10933
* @see Character#isSpaceChar(int)
10934
* @see Character#isWhitespace(int)
10935
* @since 1.5
10936
*/
10937
public static boolean isISOControl(int codePoint) {
10938
// Optimized form of:
10939
// (codePoint >= 0x00 && codePoint <= 0x1F) ||
10940
// (codePoint >= 0x7F && codePoint <= 0x9F);
10941
return codePoint <= 0x9F &&
10942
(codePoint >= 0x7F || (codePoint >>> 5 == 0));
10943
}
10944
10945
/**
10946
* Returns a value indicating a character's general category.
10947
*
10948
* <p><b>Note:</b> This method cannot handle <a
10949
* href="#supplementary"> supplementary characters</a>. To support
10950
* all Unicode characters, including supplementary characters, use
10951
* the {@link #getType(int)} method.
10952
*
10953
* @param ch the character to be tested.
10954
* @return a value of type {@code int} representing the
10955
* character's general category.
10956
* @see Character#COMBINING_SPACING_MARK
10957
* @see Character#CONNECTOR_PUNCTUATION
10958
* @see Character#CONTROL
10959
* @see Character#CURRENCY_SYMBOL
10960
* @see Character#DASH_PUNCTUATION
10961
* @see Character#DECIMAL_DIGIT_NUMBER
10962
* @see Character#ENCLOSING_MARK
10963
* @see Character#END_PUNCTUATION
10964
* @see Character#FINAL_QUOTE_PUNCTUATION
10965
* @see Character#FORMAT
10966
* @see Character#INITIAL_QUOTE_PUNCTUATION
10967
* @see Character#LETTER_NUMBER
10968
* @see Character#LINE_SEPARATOR
10969
* @see Character#LOWERCASE_LETTER
10970
* @see Character#MATH_SYMBOL
10971
* @see Character#MODIFIER_LETTER
10972
* @see Character#MODIFIER_SYMBOL
10973
* @see Character#NON_SPACING_MARK
10974
* @see Character#OTHER_LETTER
10975
* @see Character#OTHER_NUMBER
10976
* @see Character#OTHER_PUNCTUATION
10977
* @see Character#OTHER_SYMBOL
10978
* @see Character#PARAGRAPH_SEPARATOR
10979
* @see Character#PRIVATE_USE
10980
* @see Character#SPACE_SEPARATOR
10981
* @see Character#START_PUNCTUATION
10982
* @see Character#SURROGATE
10983
* @see Character#TITLECASE_LETTER
10984
* @see Character#UNASSIGNED
10985
* @see Character#UPPERCASE_LETTER
10986
* @since 1.1
10987
*/
10988
public static int getType(char ch) {
10989
return getType((int)ch);
10990
}
10991
10992
/**
10993
* Returns a value indicating a character's general category.
10994
*
10995
* @param codePoint the character (Unicode code point) to be tested.
10996
* @return a value of type {@code int} representing the
10997
* character's general category.
10998
* @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
10999
* @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
11000
* @see Character#CONTROL CONTROL
11001
* @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
11002
* @see Character#DASH_PUNCTUATION DASH_PUNCTUATION
11003
* @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
11004
* @see Character#ENCLOSING_MARK ENCLOSING_MARK
11005
* @see Character#END_PUNCTUATION END_PUNCTUATION
11006
* @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
11007
* @see Character#FORMAT FORMAT
11008
* @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
11009
* @see Character#LETTER_NUMBER LETTER_NUMBER
11010
* @see Character#LINE_SEPARATOR LINE_SEPARATOR
11011
* @see Character#LOWERCASE_LETTER LOWERCASE_LETTER
11012
* @see Character#MATH_SYMBOL MATH_SYMBOL
11013
* @see Character#MODIFIER_LETTER MODIFIER_LETTER
11014
* @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
11015
* @see Character#NON_SPACING_MARK NON_SPACING_MARK
11016
* @see Character#OTHER_LETTER OTHER_LETTER
11017
* @see Character#OTHER_NUMBER OTHER_NUMBER
11018
* @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
11019
* @see Character#OTHER_SYMBOL OTHER_SYMBOL
11020
* @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
11021
* @see Character#PRIVATE_USE PRIVATE_USE
11022
* @see Character#SPACE_SEPARATOR SPACE_SEPARATOR
11023
* @see Character#START_PUNCTUATION START_PUNCTUATION
11024
* @see Character#SURROGATE SURROGATE
11025
* @see Character#TITLECASE_LETTER TITLECASE_LETTER
11026
* @see Character#UNASSIGNED UNASSIGNED
11027
* @see Character#UPPERCASE_LETTER UPPERCASE_LETTER
11028
* @since 1.5
11029
*/
11030
public static int getType(int codePoint) {
11031
return CharacterData.of(codePoint).getType(codePoint);
11032
}
11033
11034
/**
11035
* Determines the character representation for a specific digit in
11036
* the specified radix. If the value of {@code radix} is not a
11037
* valid radix, or the value of {@code digit} is not a valid
11038
* digit in the specified radix, the null character
11039
* ({@code '\u005Cu0000'}) is returned.
11040
* <p>
11041
* The {@code radix} argument is valid if it is greater than or
11042
* equal to {@code MIN_RADIX} and less than or equal to
11043
* {@code MAX_RADIX}. The {@code digit} argument is valid if
11044
* {@code 0 <= digit < radix}.
11045
* <p>
11046
* If the digit is less than 10, then
11047
* {@code '0' + digit} is returned. Otherwise, the value
11048
* {@code 'a' + digit - 10} is returned.
11049
*
11050
* @param digit the number to convert to a character.
11051
* @param radix the radix.
11052
* @return the {@code char} representation of the specified digit
11053
* in the specified radix.
11054
* @see Character#MIN_RADIX
11055
* @see Character#MAX_RADIX
11056
* @see Character#digit(char, int)
11057
*/
11058
public static char forDigit(int digit, int radix) {
11059
if ((digit >= radix) || (digit < 0)) {
11060
return '\0';
11061
}
11062
if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
11063
return '\0';
11064
}
11065
if (digit < 10) {
11066
return (char)('0' + digit);
11067
}
11068
return (char)('a' - 10 + digit);
11069
}
11070
11071
/**
11072
* Returns the Unicode directionality property for the given
11073
* character. Character directionality is used to calculate the
11074
* visual ordering of text. The directionality value of undefined
11075
* {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
11076
*
11077
* <p><b>Note:</b> This method cannot handle <a
11078
* href="#supplementary"> supplementary characters</a>. To support
11079
* all Unicode characters, including supplementary characters, use
11080
* the {@link #getDirectionality(int)} method.
11081
*
11082
* @param ch {@code char} for which the directionality property
11083
* is requested.
11084
* @return the directionality property of the {@code char} value.
11085
*
11086
* @see Character#DIRECTIONALITY_UNDEFINED
11087
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
11088
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
11089
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
11090
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
11091
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
11092
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
11093
* @see Character#DIRECTIONALITY_ARABIC_NUMBER
11094
* @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
11095
* @see Character#DIRECTIONALITY_NONSPACING_MARK
11096
* @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
11097
* @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
11098
* @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
11099
* @see Character#DIRECTIONALITY_WHITESPACE
11100
* @see Character#DIRECTIONALITY_OTHER_NEUTRALS
11101
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
11102
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
11103
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
11104
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
11105
* @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
11106
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
11107
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
11108
* @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
11109
* @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
11110
* @since 1.4
11111
*/
11112
public static byte getDirectionality(char ch) {
11113
return getDirectionality((int)ch);
11114
}
11115
11116
/**
11117
* Returns the Unicode directionality property for the given
11118
* character (Unicode code point). Character directionality is
11119
* used to calculate the visual ordering of text. The
11120
* directionality value of undefined character is {@link
11121
* #DIRECTIONALITY_UNDEFINED}.
11122
*
11123
* @param codePoint the character (Unicode code point) for which
11124
* the directionality property is requested.
11125
* @return the directionality property of the character.
11126
*
11127
* @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
11128
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
11129
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
11130
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
11131
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
11132
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
11133
* @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
11134
* @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
11135
* @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
11136
* @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
11137
* @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
11138
* @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
11139
* @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
11140
* @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
11141
* @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
11142
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
11143
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
11144
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
11145
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
11146
* @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
11147
* @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
11148
* @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
11149
* @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
11150
* @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
11151
* @since 1.5
11152
*/
11153
public static byte getDirectionality(int codePoint) {
11154
return CharacterData.of(codePoint).getDirectionality(codePoint);
11155
}
11156
11157
/**
11158
* Determines whether the character is mirrored according to the
11159
* Unicode specification. Mirrored characters should have their
11160
* glyphs horizontally mirrored when displayed in text that is
11161
* right-to-left. For example, {@code '\u005Cu0028'} LEFT
11162
* PARENTHESIS is semantically defined to be an <i>opening
11163
* parenthesis</i>. This will appear as a "(" in text that is
11164
* left-to-right but as a ")" in text that is right-to-left.
11165
*
11166
* <p><b>Note:</b> This method cannot handle <a
11167
* href="#supplementary"> supplementary characters</a>. To support
11168
* all Unicode characters, including supplementary characters, use
11169
* the {@link #isMirrored(int)} method.
11170
*
11171
* @param ch {@code char} for which the mirrored property is requested
11172
* @return {@code true} if the char is mirrored, {@code false}
11173
* if the {@code char} is not mirrored or is not defined.
11174
* @since 1.4
11175
*/
11176
public static boolean isMirrored(char ch) {
11177
return isMirrored((int)ch);
11178
}
11179
11180
/**
11181
* Determines whether the specified character (Unicode code point)
11182
* is mirrored according to the Unicode specification. Mirrored
11183
* characters should have their glyphs horizontally mirrored when
11184
* displayed in text that is right-to-left. For example,
11185
* {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
11186
* defined to be an <i>opening parenthesis</i>. This will appear
11187
* as a "(" in text that is left-to-right but as a ")" in text
11188
* that is right-to-left.
11189
*
11190
* @param codePoint the character (Unicode code point) to be tested.
11191
* @return {@code true} if the character is mirrored, {@code false}
11192
* if the character is not mirrored or is not defined.
11193
* @since 1.5
11194
*/
11195
public static boolean isMirrored(int codePoint) {
11196
return CharacterData.of(codePoint).isMirrored(codePoint);
11197
}
11198
11199
/**
11200
* Compares two {@code Character} objects numerically.
11201
*
11202
* @param anotherCharacter the {@code Character} to be compared.
11203
* @return the value {@code 0} if the argument {@code Character}
11204
* is equal to this {@code Character}; a value less than
11205
* {@code 0} if this {@code Character} is numerically less
11206
* than the {@code Character} argument; and a value greater than
11207
* {@code 0} if this {@code Character} is numerically greater
11208
* than the {@code Character} argument (unsigned comparison).
11209
* Note that this is strictly a numerical comparison; it is not
11210
* locale-dependent.
11211
* @since 1.2
11212
*/
11213
public int compareTo(Character anotherCharacter) {
11214
return compare(this.value, anotherCharacter.value);
11215
}
11216
11217
/**
11218
* Compares two {@code char} values numerically.
11219
* The value returned is identical to what would be returned by:
11220
* <pre>
11221
* Character.valueOf(x).compareTo(Character.valueOf(y))
11222
* </pre>
11223
*
11224
* @param x the first {@code char} to compare
11225
* @param y the second {@code char} to compare
11226
* @return the value {@code 0} if {@code x == y};
11227
* a value less than {@code 0} if {@code x < y}; and
11228
* a value greater than {@code 0} if {@code x > y}
11229
* @since 1.7
11230
*/
11231
public static int compare(char x, char y) {
11232
return x - y;
11233
}
11234
11235
/**
11236
* Converts the character (Unicode code point) argument to uppercase using
11237
* information from the UnicodeData file.
11238
*
11239
* @param codePoint the character (Unicode code point) to be converted.
11240
* @return either the uppercase equivalent of the character, if
11241
* any, or an error flag ({@code Character.ERROR})
11242
* that indicates that a 1:M {@code char} mapping exists.
11243
* @see Character#isLowerCase(char)
11244
* @see Character#isUpperCase(char)
11245
* @see Character#toLowerCase(char)
11246
* @see Character#toTitleCase(char)
11247
* @since 1.4
11248
*/
11249
static int toUpperCaseEx(int codePoint) {
11250
assert isValidCodePoint(codePoint);
11251
return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
11252
}
11253
11254
/**
11255
* Converts the character (Unicode code point) argument to uppercase using case
11256
* mapping information from the SpecialCasing file in the Unicode
11257
* specification. If a character has no explicit uppercase
11258
* mapping, then the {@code char} itself is returned in the
11259
* {@code char[]}.
11260
*
11261
* @param codePoint the character (Unicode code point) to be converted.
11262
* @return a {@code char[]} with the uppercased character.
11263
* @since 1.4
11264
*/
11265
static char[] toUpperCaseCharArray(int codePoint) {
11266
// As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
11267
assert isBmpCodePoint(codePoint);
11268
return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
11269
}
11270
11271
/**
11272
* The number of bits used to represent a {@code char} value in unsigned
11273
* binary form, constant {@code 16}.
11274
*
11275
* @since 1.5
11276
*/
11277
public static final int SIZE = 16;
11278
11279
/**
11280
* The number of bytes used to represent a {@code char} value in unsigned
11281
* binary form.
11282
*
11283
* @since 1.8
11284
*/
11285
public static final int BYTES = SIZE / Byte.SIZE;
11286
11287
/**
11288
* Returns the value obtained by reversing the order of the bytes in the
11289
* specified {@code char} value.
11290
*
11291
* @param ch The {@code char} of which to reverse the byte order.
11292
* @return the value obtained by reversing (or, equivalently, swapping)
11293
* the bytes in the specified {@code char} value.
11294
* @since 1.5
11295
*/
11296
@IntrinsicCandidate
11297
public static char reverseBytes(char ch) {
11298
return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
11299
}
11300
11301
/**
11302
* Returns the Unicode name of the specified character
11303
* {@code codePoint}, or null if the code point is
11304
* {@link #UNASSIGNED unassigned}.
11305
* <p>
11306
* Note: if the specified character is not assigned a name by
11307
* the <i>UnicodeData</i> file (part of the Unicode Character
11308
* Database maintained by the Unicode Consortium), the returned
11309
* name is the same as the result of expression:
11310
*
11311
* <blockquote>{@code
11312
* Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
11313
* + " "
11314
* + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11315
*
11316
* }</blockquote>
11317
*
11318
* @param codePoint the character (Unicode code point)
11319
*
11320
* @return the Unicode name of the specified character, or null if
11321
* the code point is unassigned.
11322
*
11323
* @throws IllegalArgumentException if the specified
11324
* {@code codePoint} is not a valid Unicode
11325
* code point.
11326
*
11327
* @since 1.7
11328
*/
11329
public static String getName(int codePoint) {
11330
if (!isValidCodePoint(codePoint)) {
11331
throw new IllegalArgumentException(
11332
String.format("Not a valid Unicode code point: 0x%X", codePoint));
11333
}
11334
String name = CharacterName.getInstance().getName(codePoint);
11335
if (name != null)
11336
return name;
11337
if (getType(codePoint) == UNASSIGNED)
11338
return null;
11339
UnicodeBlock block = UnicodeBlock.of(codePoint);
11340
if (block != null)
11341
return block.toString().replace('_', ' ') + " "
11342
+ Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11343
// should never come here
11344
return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11345
}
11346
11347
/**
11348
* Returns the code point value of the Unicode character specified by
11349
* the given Unicode character name.
11350
* <p>
11351
* Note: if a character is not assigned a name by the <i>UnicodeData</i>
11352
* file (part of the Unicode Character Database maintained by the Unicode
11353
* Consortium), its name is defined as the result of expression:
11354
*
11355
* <blockquote>{@code
11356
* Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
11357
* + " "
11358
* + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11359
*
11360
* }</blockquote>
11361
* <p>
11362
* The {@code name} matching is case insensitive, with any leading and
11363
* trailing whitespace character removed.
11364
*
11365
* @param name the Unicode character name
11366
*
11367
* @return the code point value of the character specified by its name.
11368
*
11369
* @throws IllegalArgumentException if the specified {@code name}
11370
* is not a valid Unicode character name.
11371
* @throws NullPointerException if {@code name} is {@code null}
11372
*
11373
* @since 9
11374
*/
11375
public static int codePointOf(String name) {
11376
name = name.trim().toUpperCase(Locale.ROOT);
11377
int cp = CharacterName.getInstance().getCodePoint(name);
11378
if (cp != -1)
11379
return cp;
11380
try {
11381
int off = name.lastIndexOf(' ');
11382
if (off != -1) {
11383
cp = Integer.parseInt(name, off + 1, name.length(), 16);
11384
if (isValidCodePoint(cp) && name.equals(getName(cp)))
11385
return cp;
11386
}
11387
} catch (Exception x) {}
11388
throw new IllegalArgumentException("Unrecognized character name :" + name);
11389
}
11390
}
11391
11392