CoCalc -- UnicodeSpec.java

GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/test/jdk/java/lang/Character/UnicodeSpec.java
⁴¹¹⁴⁹ views
1
/*
2
 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26

27
import java.io.BufferedReader;
28
import java.io.FileReader;
29
import java.io.FileNotFoundException;
30
import java.io.IOException;
31
import java.io.File;
32
import java.util.regex.Pattern;
33
import java.util.ArrayList;
34

35
/**
36
 * The UnicodeSpec class provides a way to read in Unicode character
37
 * properties from a Unicode data file.  One instance of class UnicodeSpec
38
 * holds a decoded version of one line of the data file.  The file may
39
 * be obtained from www.unicode.org.  The method readSpecFile returns an array
40
 * of UnicodeSpec objects.
41
 *
42
 * @author      Guy Steele
43
 * @author  John O'Conner
44
 */
45

46
public class UnicodeSpec {
47

48
    public UnicodeSpec() {
49
        this(0xffff);
50
    }
51

52
    public UnicodeSpec(int codePoint) {
53
        this.codePoint = codePoint;
54
        generalCategory = UNASSIGNED;
55
        bidiCategory = DIRECTIONALITY_UNDEFINED;
56
        mirrored = false;
57
        titleMap = 0xFFFF;
58
        upperMap = 0xFFFF;
59
        lowerMap = 0xFFFF;
60
        decimalValue = -1;
61
        digitValue = -1;
62
        numericValue = "";
63
                oldName = null;
64
                comment = null;
65
                name = null;
66
    }
67

68
    public String toString() {
69
        StringBuffer result = new StringBuffer(hex6(codePoint));
70
        if (getUpperMap() != 0xffff) {
71
            result.append(", upper=").append(hex6(upperMap));
72
        }
73
        if (getLowerMap() != 0xffff) {
74
            result.append(", lower=").append(hex6(lowerMap));
75
        }
76
        if (getTitleMap() != 0xffff) {
77
            result.append(", title=").append(hex6(titleMap));
78
        }
79
        return result.toString();
80
    }
81

82
    static String hex4(int n) {
83
        String q = Long.toHexString(n & 0xFFFF).toUpperCase();
84
        return "0000".substring(Math.min(4, q.length())) + q;
85
    }
86

87
        static String hex6(int n) {
88
                String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase();
89
                return "000000".substring(Math.min(6, str.length())) + str;
90

91
        }
92

93

94
    /**
95
    * Given one line of a Unicode data file as a String, parse the line
96
    * and return a UnicodeSpec object that contains the same character information.
97
    *
98
    * @param s a line of the Unicode data file to be parsed
99
    * @return a UnicodeSpec object, or null if the parsing process failed for some reason
100
    */
101
    public static UnicodeSpec parse(String s) {
102
        UnicodeSpec spec = null;
103
        String[] tokens = null;
104

105
        try {
106
                        tokens = tokenSeparator.split(s, REQUIRED_FIELDS);
107
            spec = new UnicodeSpec();
108
            spec.setCodePoint(parseCodePoint(tokens[FIELD_VALUE]));
109
            spec.setName(parseName(tokens[FIELD_NAME]));
110
            spec.setGeneralCategory(parseGeneralCategory(tokens[FIELD_CATEGORY]));
111
            spec.setBidiCategory(parseBidiCategory(tokens[FIELD_BIDI]));
112
            spec.setCombiningClass(parseCombiningClass(tokens[FIELD_CLASS]));
113
            spec.setDecomposition(parseDecomposition(tokens[FIELD_DECOMPOSITION]));
114
            spec.setDecimalValue(parseDecimalValue(tokens[FIELD_DECIMAL]));
115
            spec.setDigitValue(parseDigitValue(tokens[FIELD_DIGIT]));
116
            spec.setNumericValue(parseNumericValue(tokens[FIELD_NUMERIC]));
117
            spec.setMirrored(parseMirrored(tokens[FIELD_MIRRORED]));
118
            spec.setOldName(parseOldName(tokens[FIELD_OLDNAME]));
119
            spec.setComment(parseComment(tokens[FIELD_COMMENT]));
120
            spec.setUpperMap(parseUpperMap(tokens[FIELD_UPPERCASE]));
121
            spec.setLowerMap(parseLowerMap(tokens[FIELD_LOWERCASE]));
122
            spec.setTitleMap(parseTitleMap(tokens[FIELD_TITLECASE]));
123
        }
124
        catch(Exception e) {
125
            spec = null;
126
            System.out.println("Error parsing spec line.");
127
        }
128
        return spec;
129
    }
130

131
    /**
132
    * Parse the codePoint attribute for a Unicode character.  If the parse succeeds,
133
    * the codePoint field of this UnicodeSpec object is updated and false is returned.
134
    *
135
    * The codePoint attribute should be a four-digit hexadecimal integer.
136
    *
137
    * @param s   the codePoint attribute extracted from a line of the Unicode data file
138
    * @return   code point if successful
139
    * @exception NumberFormatException if unable to parse argument
140
    */
141
    public static int parseCodePoint(String s) throws NumberFormatException {
142
        return Integer.parseInt(s, 16);
143
    }
144

145
    public static String parseName(String s) throws Exception {
146
        if (s==null) throw new Exception("Cannot parse name.");
147
        return s;
148
    }
149

150
    public static byte parseGeneralCategory(String s) throws Exception {
151
        byte category = GENERAL_CATEGORY_COUNT;
152

153
        for (byte x=0; x<generalCategoryList.length; x++) {
154
            if (s.equals(generalCategoryList[x][SHORT])) {
155
                category = x;
156
                break;
157
            }
158
        }
159
        if (category >= GENERAL_CATEGORY_COUNT) {
160
            throw new Exception("Could not parse general category.");
161
        }
162
        return category;
163
    }
164

165
    public static byte parseBidiCategory(String s) throws Exception {
166
        byte category = DIRECTIONALITY_CATEGORY_COUNT;
167

168
        for (byte x=0; x<bidiCategoryList.length; x++) {
169
            if (s.equals(bidiCategoryList[x][SHORT])) {
170
                category = x;
171
                break;
172
            }
173
        }
174
        if (category >= DIRECTIONALITY_CATEGORY_COUNT) {
175
            throw new Exception("Could not parse bidi category.");
176
        }
177
        return category;
178
    }
179

180

181
    /**
182
    * Parse the combining attribute for a Unicode character.  If there is a combining
183
    * attribute and the parse succeeds, then the hasCombining field is set to true,
184
    * the combining field of this UnicodeSpec object is updated, and false is returned.
185
    * If the combining attribute is an empty string, the parse succeeds but the
186
    * hasCombining field is set to false. (and false is returned).
187
    *
188
    * The combining attribute, if any, should be a nonnegative decimal integer.
189
    *
190
    * @param s   the combining attribute extracted from a line of the Unicode data file
191
    * @return   the combining class value if any, -1 if property not defined
192
    * @exception Exception if can't parse the combining class
193
    */
194

195
    public static int parseCombiningClass(String s) throws Exception {
196
        int combining = -1;
197
        if (s.length()>0) {
198
            combining = Integer.parseInt(s, 10);
199
        }
200
        return combining;
201
    }
202

203
    /**
204
    * Parse the decomposition attribute for a Unicode character.  If the parse succeeds,
205
    * the decomposition field of this UnicodeSpec object is updated and false is returned.
206
    *
207
    * The decomposition attribute is complicated; for now, it is treated as a string.
208
    *
209
    * @param s   the decomposition attribute extracted from a line of the Unicode data file
210
    * @return   true if the parse failed; otherwise false
211
    */
212

213
    public static String parseDecomposition(String s) throws Exception {
214
        if (s==null) throw new Exception("Cannot parse decomposition.");
215
        return s;
216
    }
217

218

219
    /**
220
    * Parse the decimal value attribute for a Unicode character.  If there is a decimal value
221
    * attribute and the parse succeeds, then the hasDecimalValue field is set to true,
222
    * the decimalValue field of this UnicodeSpec object is updated, and false is returned.
223
    * If the decimal value attribute is an empty string, the parse succeeds but the
224
    * hasDecimalValue field is set to false. (and false is returned).
225
    *
226
    * The decimal value attribute, if any, should be a nonnegative decimal integer.
227
    *
228
    * @param s   the decimal value attribute extracted from a line of the Unicode data file
229
    * @return   the decimal value as an int, -1 if no decimal value defined
230
    * @exception NumberFormatException if the parse fails
231
    */
232
    public static int parseDecimalValue(String s) throws NumberFormatException {
233
        int value = -1;
234

235
        if (s.length() > 0) {
236
            value = Integer.parseInt(s, 10);
237
        }
238
        return value;
239
    }
240

241
    /**
242
    * Parse the digit value attribute for a Unicode character.  If there is a digit value
243
    * attribute and the parse succeeds, then the hasDigitValue field is set to true,
244
    * the digitValue field of this UnicodeSpec object is updated, and false is returned.
245
    * If the digit value attribute is an empty string, the parse succeeds but the
246
    * hasDigitValue field is set to false. (and false is returned).
247
    *
248
    * The digit value attribute, if any, should be a nonnegative decimal integer.
249
    *
250
    * @param s   the digit value attribute extracted from a line of the Unicode data file
251
    * @return   the digit value as an non-negative int, or -1 if no digit property defined
252
    * @exception NumberFormatException if the parse fails
253
    */
254
    public static int parseDigitValue(String s) throws NumberFormatException {
255
        int value = -1;
256

257
        if (s.length() > 0) {
258
            value = Integer.parseInt(s, 10);
259
        }
260
        return value;
261
    }
262

263
    public static String parseNumericValue(String s) throws Exception {
264
        if (s == null) throw new Exception("Cannot parse numeric value.");
265
        return s;
266
    }
267

268
    public static String parseComment(String s) throws Exception {
269
        if (s == null) throw new Exception("Cannot parse comment.");
270
        return s;
271
    }
272

273
    public static boolean parseMirrored(String s) throws Exception {
274
        boolean mirrored;
275
        if (s.length() == 1) {
276
            if (s.charAt(0) == 'Y') {mirrored = true;}
277
            else if (s.charAt(0) == 'N') {mirrored = false;}
278
            else {throw new Exception("Cannot parse mirrored property.");}
279
        }
280
        else { throw new Exception("Cannot parse mirrored property.");}
281
        return mirrored;
282
    }
283

284
    public static String parseOldName(String s) throws Exception {
285
        if (s == null) throw new Exception("Cannot parse old name");
286
        return s;
287
    }
288

289
    /**
290
    * Parse the uppercase mapping attribute for a Unicode character.  If there is a uppercase
291
    * mapping attribute and the parse succeeds, then the hasUpperMap field is set to true,
292
    * the upperMap field of this UnicodeSpec object is updated, and false is returned.
293
    * If the uppercase mapping attribute is an empty string, the parse succeeds but the
294
    * hasUpperMap field is set to false. (and false is returned).
295
    *
296
    * The uppercase mapping attribute should be a four-digit hexadecimal integer.
297
    *
298
    * @param s   the uppercase mapping attribute extracted from a line of the Unicode data file
299
    * @return   uppercase char if defined, \uffff otherwise
300
    * @exception NumberFormatException if parse fails
301
    */
302
    public static int parseUpperMap(String s) throws NumberFormatException {
303
        int upperCase = 0xFFFF;
304

305
        if (s.length() >= 4) {
306
            upperCase = Integer.parseInt(s, 16);
307
        }
308
        else if (s.length() != 0) {
309
            throw new NumberFormatException();
310
        }
311
        return upperCase;
312
    }
313

314
    /**
315
    * Parse the lowercase mapping attribute for a Unicode character.  If there is a lowercase
316
    * mapping attribute and the parse succeeds, then the hasLowerMap field is set to true,
317
    * the lowerMap field of this UnicodeSpec object is updated, and false is returned.
318
    * If the lowercase mapping attribute is an empty string, the parse succeeds but the
319
     * hasLowerMap field is set to false. (and false is returned).
320
    *
321
    * The lowercase mapping attribute should be a four-digit hexadecimal integer.
322
    *
323
    * @param s   the lowercase mapping attribute extracted from a line of the Unicode data file
324
    * @return   lowercase char mapping if defined, \uFFFF otherwise
325
    * @exception NumberFormatException if parse fails
326
    */
327
    public static int parseLowerMap(String s) throws NumberFormatException {
328
        int lowerCase = 0xFFFF;
329

330
        if (s.length() >= 4) {
331
            lowerCase = Integer.parseInt(s, 16);
332
        }
333
        else if (s.length() != 0) {
334
            throw new NumberFormatException();
335
        }
336
        return lowerCase;
337
    }
338

339
    /**
340
    * Parse the titlecase mapping attribute for a Unicode character.  If there is a titlecase
341
    * mapping attribute and the parse succeeds, then the hasTitleMap field is set to true,
342
    * the titleMap field of this UnicodeSpec object is updated, and false is returned.
343
    * If the titlecase mapping attribute is an empty string, the parse succeeds but the
344
    * hasTitleMap field is set to false. (and false is returned).
345
    *
346
    * The titlecase mapping attribute should be a four-digit hexadecimal integer.
347
    *
348
    * @param s   the titlecase mapping attribute extracted from a line of the Unicode data file
349
    * @return   title case char mapping if defined, \uFFFF otherwise
350
    * @exception NumberFormatException if parse fails
351
    */
352
    public static int parseTitleMap(String s) throws NumberFormatException {
353
        int titleCase = 0xFFFF;
354

355
        if (s.length() >= 4) {
356
            titleCase = Integer.parseInt(s, 16);
357
        }
358
        else if (s.length() != 0) {
359
            throw new NumberFormatException();
360
        }
361
        return titleCase;
362
    }
363

364
    /**
365
    * Read and parse a Unicode data file.
366
    *
367
    * @param file   a file specifying the Unicode data file to be read
368
    * @return   an array of UnicodeSpec objects, one for each line of the
369
    *           Unicode data file that could be successfully parsed as
370
    *           specifying Unicode character attributes
371
    */
372

373
    public static UnicodeSpec[] readSpecFile(File file, int plane) throws FileNotFoundException {
374
                ArrayList<UnicodeSpec> list = new ArrayList<>(3000);
375
        UnicodeSpec[] result = null;
376
        int count = 0;
377
        BufferedReader f = new BufferedReader(new FileReader(file));
378
                String line = null;
379
        loop:
380
        while(true) {
381
            try {
382
                line = f.readLine();
383
            }
384
            catch (IOException e) {
385
                                break loop;
386
                        }
387
            if (line == null) break loop;
388
            UnicodeSpec item = parse(line.trim());
389
                        int specPlane = item.getCodePoint() >>> 16;
390
                        if (specPlane < plane) continue;
391
                        if (specPlane > plane) break;
392

393
            if (item != null) {
394
                                list.add(item);
395
            }
396
        }
397
                result = new UnicodeSpec[list.size()];
398
                list.toArray(result);
399
        return result;
400
    }
401

402
    void setCodePoint(int value) {
403
        codePoint = value;
404
    }
405

406
    /**
407
     * Return the code point in this Unicode specification
408
     * @return the char code point representing by the specification
409
     */
410
    public int getCodePoint() {
411
        return codePoint;
412
    }
413

414
    void setName(String name) {
415
        this.name = name;
416
    }
417

418
    public String getName() {
419
        return name;
420
    }
421

422
    void setGeneralCategory(byte category) {
423
        generalCategory = category;
424
    }
425

426
    public byte getGeneralCategory() {
427
        return generalCategory;
428
    }
429

430
    void setBidiCategory(byte category) {
431
        bidiCategory = category;
432
    }
433

434
    public byte getBidiCategory() {
435
        return bidiCategory;
436
    }
437

438
    void setCombiningClass(int combiningClass) {
439
        this.combiningClass = combiningClass;
440
    }
441

442
    public int getCombiningClass() {
443
        return combiningClass;
444
    }
445

446
    void setDecomposition(String decomposition) {
447
        this.decomposition = decomposition;
448
    }
449

450
    public String getDecomposition() {
451
         return decomposition;
452
    }
453

454
    void setDecimalValue(int value) {
455
        decimalValue = value;
456
    }
457

458
    public int getDecimalValue() {
459
        return decimalValue;
460
    }
461

462
    public boolean isDecimalValue() {
463
        return decimalValue != -1;
464
    }
465

466
    void setDigitValue(int value) {
467
        digitValue = value;
468
    }
469

470
    public int getDigitValue() {
471
        return digitValue;
472
    }
473

474
    public boolean isDigitValue() {
475
        return digitValue != -1;
476
    }
477

478
    void setNumericValue(String value) {
479
        numericValue = value;
480
    }
481

482
    public String getNumericValue() {
483
        return numericValue;
484
    }
485

486
    public boolean isNumericValue() {
487
        return numericValue.length() > 0;
488
    }
489

490
    void setMirrored(boolean value) {
491
        mirrored = value;
492
    }
493

494
    public boolean isMirrored() {
495
        return mirrored;
496
    }
497

498
    void setOldName(String name) {
499
        oldName = name;
500
    }
501

502
    public String getOldName() {
503
        return oldName;
504
    }
505

506
    void setComment(String comment) {
507
        this.comment = comment;
508
    }
509

510
    public String getComment() {
511
        return comment;
512
    }
513

514
    void setUpperMap(int ch) {
515
        upperMap = ch;
516
    };
517

518
    public int getUpperMap() {
519
        return upperMap;
520
    }
521

522
    public boolean hasUpperMap() {
523
        return upperMap != 0xffff;
524
    }
525

526
    void setLowerMap(int ch) {
527
        lowerMap = ch;
528
    }
529

530
    public int getLowerMap() {
531
        return lowerMap;
532
    }
533

534
    public boolean hasLowerMap() {
535
        return lowerMap != 0xffff;
536
    }
537

538
    void setTitleMap(int ch) {
539
        titleMap = ch;
540
    }
541

542
    public int getTitleMap() {
543
        return titleMap;
544
    }
545

546
    public boolean hasTitleMap() {
547
        return titleMap != 0xffff;
548
    }
549

550
    int codePoint;         // the characters UTF-32 code value
551
    String name;            // the ASCII name
552
    byte generalCategory;   // general category, available via Characte.getType()
553
    byte bidiCategory;      // available via Character.getBidiType()
554
    int combiningClass;     // not used in Character
555
    String decomposition;   // not used in Character
556
    int decimalValue;       // decimal digit value
557
    int digitValue;         // not all digits are decimal
558
    String numericValue;    // numeric value if digit or non-digit
559
    boolean mirrored;       //
560
    String oldName;
561
    String comment;
562
    int upperMap;
563
    int lowerMap;
564
    int titleMap;
565

566
    // this is the number of fields in one line of the UnicodeData.txt file
567
    // each field is separated by a semicolon (a token)
568
    static final int REQUIRED_FIELDS = 15;
569

570
    /**
571
     * General category types
572
     * To preserve compatibility, these values cannot be changed
573
     */
574
    public static final byte
575
        UNASSIGNED                  =  0, // Cn normative
576
        UPPERCASE_LETTER            =  1, // Lu normative
577
        LOWERCASE_LETTER            =  2, // Ll normative
578
        TITLECASE_LETTER            =  3, // Lt normative
579
        MODIFIER_LETTER             =  4, // Lm normative
580
        OTHER_LETTER                =  5, // Lo normative
581
        NON_SPACING_MARK            =  6, // Mn informative
582
        ENCLOSING_MARK              =  7, // Me informative
583
        COMBINING_SPACING_MARK      =  8, // Mc normative
584
        DECIMAL_DIGIT_NUMBER        =  9, // Nd normative
585
        LETTER_NUMBER               = 10, // Nl normative
586
        OTHER_NUMBER                = 11, // No normative
587
        SPACE_SEPARATOR             = 12, // Zs normative
588
        LINE_SEPARATOR              = 13, // Zl normative
589
        PARAGRAPH_SEPARATOR         = 14, // Zp normative
590
        CONTROL                     = 15, // Cc normative
591
        FORMAT                      = 16, // Cf normative
592
        // 17 is unused for no apparent reason,
593
        // but must preserve forward compatibility
594
        PRIVATE_USE                 = 18, // Co normative
595
        SURROGATE                   = 19, // Cs normative
596
        DASH_PUNCTUATION            = 20, // Pd informative
597
        START_PUNCTUATION           = 21, // Ps informative
598
        END_PUNCTUATION             = 22, // Pe informative
599
        CONNECTOR_PUNCTUATION       = 23, // Pc informative
600
        OTHER_PUNCTUATION           = 24, // Po informative
601
        MATH_SYMBOL                 = 25, // Sm informative
602
        CURRENCY_SYMBOL             = 26, // Sc informative
603
        MODIFIER_SYMBOL             = 27, // Sk informative
604
        OTHER_SYMBOL                = 28, // So informative
605
        INITIAL_QUOTE_PUNCTUATION   = 29, // Pi informative
606
        FINAL_QUOTE_PUNCTUATION     = 30, // Pf informative
607

608
        // this value is only used in the character generation tool
609
        // it can change to accommodate the addition of new categories.
610
        GENERAL_CATEGORY_COUNT      = 31; // sentinel value
611

612
    static final byte SHORT = 0, LONG = 1;
613
    // general category type strings
614
    // NOTE: The order of this category array is dependent on the assignment of
615
    // category constants above. We want to access this array using constants above.
616
    // [][SHORT] is the SHORT name, [][LONG] is the LONG name
617
    static final String[][] generalCategoryList = {
618
        {"Cn", "UNASSIGNED"},
619
        {"Lu", "UPPERCASE_LETTER"},
620
        {"Ll", "LOWERCASE_LETTER"},
621
        {"Lt", "TITLECASE_LETTER"},
622
        {"Lm", "MODIFIER_LETTER"},
623
        {"Lo", "OTHER_LETTER"},
624
        {"Mn", "NON_SPACING_MARK"},
625
        {"Me", "ENCLOSING_MARK"},
626
        {"Mc", "COMBINING_SPACING_MARK"},
627
        {"Nd", "DECIMAL_DIGIT_NUMBER"},
628
        {"Nl", "LETTER_NUMBER"},
629
        {"No", "OTHER_NUMBER"},
630
        {"Zs", "SPACE_SEPARATOR"},
631
        {"Zl", "LINE_SEPARATOR"},
632
        {"Zp", "PARAGRAPH_SEPARATOR"},
633
        {"Cc", "CONTROL"},
634
        {"Cf", "FORMAT"},
635
        {"xx", "unused"},
636
        {"Co", "PRIVATE_USE"},
637
        {"Cs", "SURROGATE"},
638
        {"Pd", "DASH_PUNCTUATION"},
639
        {"Ps", "START_PUNCTUATION"},
640
        {"Pe", "END_PUNCTUATION"},
641
        {"Pc", "CONNECTOR_PUNCTUATION"},
642
        {"Po", "OTHER_PUNCTUATION"},
643
        {"Sm", "MATH_SYMBOL"},
644
        {"Sc", "CURRENCY_SYMBOL"},
645
        {"Sk", "MODIFIER_SYMBOL"},
646
        {"So", "OTHER_SYMBOL"},
647
        {"Pi", "INITIAL_QUOTE_PUNCTUATION"},
648
        {"Pf", "FINAL_QUOTE_PUNCTUATION"}
649
    };
650

651
    /**
652
     * Bidirectional categories
653
     */
654
    public static final byte
655
                DIRECTIONALITY_UNDEFINED                  = -1,
656
        // Strong category
657
        DIRECTIONALITY_LEFT_TO_RIGHT              =  0, // L
658
        DIRECTIONALITY_RIGHT_TO_LEFT              =  1, // R
659
        DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC       =  2, // AL
660
        // Weak category
661
        DIRECTIONALITY_EUROPEAN_NUMBER            =  3, // EN
662
        DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR  =  4, // ES
663
        DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR =  5, // ET
664
        DIRECTIONALITY_ARABIC_NUMBER              =  6, // AN
665
        DIRECTIONALITY_COMMON_NUMBER_SEPARATOR    =  7, // CS
666
        DIRECTIONALITY_NONSPACING_MARK            =  8, // NSM
667
        DIRECTIONALITY_BOUNDARY_NEUTRAL           =  9, // BN
668
        // Neutral category
669
        DIRECTIONALITY_PARAGRAPH_SEPARATOR        = 10, // B
670
        DIRECTIONALITY_SEGMENT_SEPARATOR          = 11, // S
671
        DIRECTIONALITY_WHITESPACE                 = 12, // WS
672
        DIRECTIONALITY_OTHER_NEUTRALS              = 13, // ON
673

674
        DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING    = 14, // LRE
675
        DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE     = 15, // LRO
676
        DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING    = 16, // RLE
677
        DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE     = 17, // RLO
678
        DIRECTIONALITY_POP_DIRECTIONAL_FORMAT     = 18, // PDF
679

680
        DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE      = 19, // LRI
681
        DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE      = 20, // RLI
682
        DIRECTIONALITY_FIRST_STRONG_ISOLATE       = 21, // FSI
683
        DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE    = 22, // PDI
684

685
        DIRECTIONALITY_CATEGORY_COUNT             = 23; // sentinel value
686

687
    // If changes are made to the above bidi category assignments, this
688
    // list of bidi category names must be changed to keep their order in synch.
689
    // Access this list using the bidi category constants above.
690
    static final String[][] bidiCategoryList = {
691
        {"L", "DIRECTIONALITY_LEFT_TO_RIGHT"},
692
        {"R", "DIRECTIONALITY_RIGHT_TO_LEFT"},
693
        {"AL", "DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC"},
694
        {"EN", "DIRECTIONALITY_EUROPEAN_NUMBER"},
695
        {"ES", "DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR"},
696
        {"ET", "DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR"},
697
        {"AN", "DIRECTIONALITY_ARABIC_NUMBER"},
698
        {"CS", "DIRECTIONALITY_COMMON_NUMBER_SEPARATOR"},
699
        {"NSM", "DIRECTIONALITY_NONSPACING_MARK"},
700
        {"BN", "DIRECTIONALITY_BOUNDARY_NEUTRAL"},
701
        {"B", "DIRECTIONALITY_PARAGRAPH_SEPARATOR"},
702
        {"S", "DIRECTIONALITY_SEGMENT_SEPARATOR"},
703
        {"WS", "DIRECTIONALITY_WHITESPACE"},
704
        {"ON", "DIRECTIONALITY_OTHER_NEUTRALS"},
705
        {"LRE", "DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING"},
706
        {"LRO", "DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE"},
707
        {"RLE", "DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING"},
708
        {"RLO", "DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE"},
709
        {"PDF", "DIRECTIONALITY_POP_DIRECTIONAL_FORMAT"},
710
        {"LRI", "DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE"},
711
        {"RLI", "DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE"},
712
        {"FSI", "DIRECTIONALITY_FIRST_STRONG_ISOLATE"},
713
        {"PDI", "DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE"},
714

715
    };
716

717
    // Unicode specification lines have fields in this order.
718
    static final byte
719
        FIELD_VALUE         = 0,
720
        FIELD_NAME          = 1,
721
        FIELD_CATEGORY      = 2,
722
        FIELD_CLASS         = 3,
723
        FIELD_BIDI          = 4,
724
        FIELD_DECOMPOSITION = 5,
725
        FIELD_DECIMAL       = 6,
726
        FIELD_DIGIT         = 7,
727
        FIELD_NUMERIC       = 8,
728
        FIELD_MIRRORED      = 9,
729
        FIELD_OLDNAME       = 10,
730
        FIELD_COMMENT       = 11,
731
        FIELD_UPPERCASE     = 12,
732
        FIELD_LOWERCASE     = 13,
733
        FIELD_TITLECASE     = 14;
734

735
        static final Pattern tokenSeparator = Pattern.compile(";");
736

737
        public static void main(String[] args) {
738
                UnicodeSpec[] spec = null;
739
                if (args.length == 2 ) {
740
                        try {
741
                                File file = new File(args[0]);
742
                                int plane = Integer.parseInt(args[1]);
743
                                spec = UnicodeSpec.readSpecFile(file, plane);
744
                                System.out.println("UnicodeSpec[" + spec.length + "]:");
745
                                for (int x=0; x<spec.length; x++) {
746
                                        System.out.println(spec[x].toString());
747
                                }
748
                        }
749
                        catch(Exception e) {
750
                                e.printStackTrace();
751
                        }
752
                }
753

754
        }
755

756
}
757

758
Product

Resources

Company