Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/jdk/internal/icu/text/BidiBase.java
41161 views
1
/*
2
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
/*
27
*******************************************************************************
28
* Copyright (C) 2001-2014, International Business Machines
29
* Corporation and others. All Rights Reserved.
30
*******************************************************************************
31
*/
32
33
/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
34
* algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre
35
* concept of RUNS_ONLY which is a double operation.
36
* It could be advantageous to divide this into 3 concepts:
37
* a) Operation: direct / inverse / RUNS_ONLY
38
* b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L
39
* c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
40
* This would allow combinations not possible today like RUNS_ONLY with
41
* NUMBERS_SPECIAL.
42
* Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
43
* REMOVE_CONTROLS for the inverse step.
44
* Not all combinations would be supported, and probably not all do make sense.
45
* This would need to document which ones are supported and what are the
46
* fallbacks for unsupported combinations.
47
*/
48
49
package jdk.internal.icu.text;
50
51
import java.lang.reflect.Array;
52
import java.text.AttributedCharacterIterator;
53
import java.text.Bidi;
54
import java.util.Arrays;
55
import jdk.internal.access.JavaAWTFontAccess;
56
import jdk.internal.access.SharedSecrets;
57
import jdk.internal.icu.lang.UCharacter;
58
import jdk.internal.icu.impl.UBiDiProps;
59
60
/**
61
*
62
* <h2>Bidi algorithm for ICU</h2>
63
*
64
* This is an implementation of the Unicode Bidirectional Algorithm. The
65
* algorithm is defined in the
66
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
67
* Unicode Bidirectional Algorithm</a>.
68
* <p>
69
*
70
* Note: Libraries that perform a bidirectional algorithm and reorder strings
71
* accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and
72
* shaping (ArabicShaping) classes can be used at the core of such "Storage
73
* Layout Engines".
74
*
75
* <h3>General remarks about the API:</h3>
76
*
77
* The "limit" of a sequence of characters is the position just after
78
* their last character, i.e., one more than that position.
79
* <p>
80
*
81
* Some of the API methods provide access to "runs". Such a
82
* "run" is defined as a sequence of characters that are at the same
83
* embedding level after performing the Bidi algorithm.
84
*
85
* <h3>Basic concept: paragraph</h3>
86
* A piece of text can be divided into several paragraphs by characters
87
* with the Bidi class <code>Block Separator</code>. For handling of
88
* paragraphs, see:
89
* <ul>
90
* <li>{@link #countParagraphs}
91
* <li>{@link #getParaLevel}
92
* <li>{@link #getParagraph}
93
* <li>{@link #getParagraphByIndex}
94
* </ul>
95
*
96
* <h3>Basic concept: text direction</h3>
97
* The direction of a piece of text may be:
98
* <ul>
99
* <li>{@link #LTR}
100
* <li>{@link #RTL}
101
* <li>{@link #MIXED}
102
* <li>{@link #NEUTRAL}
103
* </ul>
104
*
105
* <h3>Basic concept: levels</h3>
106
*
107
* Levels in this API represent embedding levels according to the Unicode
108
* Bidirectional Algorithm.
109
* Their low-order bit (even/odd value) indicates the visual direction.<p>
110
*
111
* Levels can be abstract values when used for the
112
* <code>paraLevel</code> and <code>embeddingLevels</code>
113
* arguments of <code>setPara()</code>; there:
114
* <ul>
115
* <li>the high-order bit of an <code>embeddingLevels[]</code>
116
* value indicates whether the using application is
117
* specifying the level of a character to <i>override</i> whatever the
118
* Bidi implementation would resolve it to.</li>
119
* <li><code>paraLevel</code> can be set to the
120
* pseudo-level values <code>LEVEL_DEFAULT_LTR</code>
121
* and <code>LEVEL_DEFAULT_RTL</code>.</li>
122
* </ul>
123
*
124
* <p>The related constants are not real, valid level values.
125
* <code>DEFAULT_XXX</code> can be used to specify
126
* a default for the paragraph level for
127
* when the <code>setPara()</code> method
128
* shall determine it but there is no
129
* strongly typed character in the input.<p>
130
*
131
* Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even
132
* and the one for <code>LEVEL_DEFAULT_RTL</code> is odd,
133
* just like with normal LTR and RTL level values -
134
* these special values are designed that way. Also, the implementation
135
* assumes that MAX_EXPLICIT_LEVEL is odd.
136
*
137
* <p><b>See Also:</b>
138
* <ul>
139
* <li>{@link #LEVEL_DEFAULT_LTR}
140
* <li>{@link #LEVEL_DEFAULT_RTL}
141
* <li>{@link #LEVEL_OVERRIDE}
142
* <li>{@link #MAX_EXPLICIT_LEVEL}
143
* <li>{@link #setPara}
144
* </ul>
145
*
146
* <h3>Basic concept: Reordering Mode</h3>
147
* Reordering mode values indicate which variant of the Bidi algorithm to
148
* use.
149
*
150
* <p><b>See Also:</b>
151
* <ul>
152
* <li>{@link #setReorderingMode}
153
* <li>{@link #REORDER_DEFAULT}
154
* <li>{@link #REORDER_NUMBERS_SPECIAL}
155
* <li>{@link #REORDER_GROUP_NUMBERS_WITH_R}
156
* <li>{@link #REORDER_RUNS_ONLY}
157
* <li>{@link #REORDER_INVERSE_NUMBERS_AS_L}
158
* <li>{@link #REORDER_INVERSE_LIKE_DIRECT}
159
* <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL}
160
* </ul>
161
*
162
* <h3>Basic concept: Reordering Options</h3>
163
* Reordering options can be applied during Bidi text transformations.
164
*
165
* <p><b>See Also:</b>
166
* <ul>
167
* <li>{@link #setReorderingOptions}
168
* <li>{@link #OPTION_DEFAULT}
169
* <li>{@link #OPTION_INSERT_MARKS}
170
* <li>{@link #OPTION_REMOVE_CONTROLS}
171
* <li>{@link #OPTION_STREAMING}
172
* </ul>
173
*
174
*
175
* @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer)
176
* @stable ICU 3.8
177
*
178
*
179
* <h4> Sample code for the ICU Bidi API </h4>
180
*
181
* <h5>Rendering a paragraph with the ICU Bidi API</h5>
182
*
183
* This is (hypothetical) sample code that illustrates how the ICU Bidi API
184
* could be used to render a paragraph of text. Rendering code depends highly on
185
* the graphics system, therefore this sample code must make a lot of
186
* assumptions, which may or may not match any existing graphics system's
187
* properties.
188
*
189
* <p>
190
* The basic assumptions are:
191
* </p>
192
* <ul>
193
* <li>Rendering is done from left to right on a horizontal line.</li>
194
* <li>A run of single-style, unidirectional text can be rendered at once.
195
* </li>
196
* <li>Such a run of text is passed to the graphics system with characters
197
* (code units) in logical order.</li>
198
* <li>The line-breaking algorithm is very complicated and Locale-dependent -
199
* and therefore its implementation omitted from this sample code.</li>
200
* </ul>
201
*
202
* <pre>{@code
203
*
204
* package com.ibm.icu.dev.test.bidi;
205
*
206
* import com.ibm.icu.text.Bidi;
207
* import com.ibm.icu.text.BidiRun;
208
*
209
* public class Sample {
210
*
211
* static final int styleNormal = 0;
212
* static final int styleSelected = 1;
213
* static final int styleBold = 2;
214
* static final int styleItalics = 4;
215
* static final int styleSuper=8;
216
* static final int styleSub = 16;
217
*
218
* static class StyleRun {
219
* int limit;
220
* int style;
221
*
222
* public StyleRun(int limit, int style) {
223
* this.limit = limit;
224
* this.style = style;
225
* }
226
* }
227
*
228
* static class Bounds {
229
* int start;
230
* int limit;
231
*
232
* public Bounds(int start, int limit) {
233
* this.start = start;
234
* this.limit = limit;
235
* }
236
* }
237
*
238
* static int getTextWidth(String text, int start, int limit,
239
* StyleRun[] styleRuns, int styleRunCount) {
240
* // simplistic way to compute the width
241
* return limit - start;
242
* }
243
*
244
* // set limit and StyleRun limit for a line
245
* // from text[start] and from styleRuns[styleRunStart]
246
* // using Bidi.getLogicalRun(...)
247
* // returns line width
248
* static int getLineBreak(String text, Bounds line, Bidi para,
249
* StyleRun styleRuns[], Bounds styleRun) {
250
* // dummy return
251
* return 0;
252
* }
253
*
254
* // render runs on a line sequentially, always from left to right
255
*
256
* // prepare rendering a new line
257
* static void startLine(byte textDirection, int lineWidth) {
258
* System.out.println();
259
* }
260
*
261
* // render a run of text and advance to the right by the run width
262
* // the text[start..limit-1] is always in logical order
263
* static void renderRun(String text, int start, int limit,
264
* byte textDirection, int style) {
265
* }
266
*
267
* // We could compute a cross-product
268
* // from the style runs with the directional runs
269
* // and then reorder it.
270
* // Instead, here we iterate over each run type
271
* // and render the intersections -
272
* // with shortcuts in simple (and common) cases.
273
* // renderParagraph() is the main function.
274
*
275
* // render a directional run with
276
* // (possibly) multiple style runs intersecting with it
277
* static void renderDirectionalRun(String text, int start, int limit,
278
* byte direction, StyleRun styleRuns[],
279
* int styleRunCount) {
280
* int i;
281
*
282
* // iterate over style runs
283
* if (direction == Bidi.LTR) {
284
* int styleLimit;
285
* for (i = 0; i < styleRunCount; ++i) {
286
* styleLimit = styleRuns[i].limit;
287
* if (start < styleLimit) {
288
* if (styleLimit > limit) {
289
* styleLimit = limit;
290
* }
291
* renderRun(text, start, styleLimit,
292
* direction, styleRuns[i].style);
293
* if (styleLimit == limit) {
294
* break;
295
* }
296
* start = styleLimit;
297
* }
298
* }
299
* } else {
300
* int styleStart;
301
*
302
* for (i = styleRunCount-1; i >= 0; --i) {
303
* if (i > 0) {
304
* styleStart = styleRuns[i-1].limit;
305
* } else {
306
* styleStart = 0;
307
* }
308
* if (limit >= styleStart) {
309
* if (styleStart < start) {
310
* styleStart = start;
311
* }
312
* renderRun(text, styleStart, limit, direction,
313
* styleRuns[i].style);
314
* if (styleStart == start) {
315
* break;
316
* }
317
* limit = styleStart;
318
* }
319
* }
320
* }
321
* }
322
*
323
* // the line object represents text[start..limit-1]
324
* static void renderLine(Bidi line, String text, int start, int limit,
325
* StyleRun styleRuns[], int styleRunCount) {
326
* byte direction = line.getDirection();
327
* if (direction != Bidi.MIXED) {
328
* // unidirectional
329
* if (styleRunCount <= 1) {
330
* renderRun(text, start, limit, direction, styleRuns[0].style);
331
* } else {
332
* renderDirectionalRun(text, start, limit, direction,
333
* styleRuns, styleRunCount);
334
* }
335
* } else {
336
* // mixed-directional
337
* int count, i;
338
* BidiRun run;
339
*
340
* try {
341
* count = line.countRuns();
342
* } catch (IllegalStateException e) {
343
* e.printStackTrace();
344
* return;
345
* }
346
* if (styleRunCount <= 1) {
347
* int style = styleRuns[0].style;
348
*
349
* // iterate over directional runs
350
* for (i = 0; i < count; ++i) {
351
* run = line.getVisualRun(i);
352
* renderRun(text, run.getStart(), run.getLimit(),
353
* run.getDirection(), style);
354
* }
355
* } else {
356
* // iterate over both directional and style runs
357
* for (i = 0; i < count; ++i) {
358
* run = line.getVisualRun(i);
359
* renderDirectionalRun(text, run.getStart(),
360
* run.getLimit(), run.getDirection(),
361
* styleRuns, styleRunCount);
362
* }
363
* }
364
* }
365
* }
366
*
367
* static void renderParagraph(String text, byte textDirection,
368
* StyleRun styleRuns[], int styleRunCount,
369
* int lineWidth) {
370
* int length = text.length();
371
* Bidi para = new Bidi();
372
* try {
373
* para.setPara(text,
374
* textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL
375
* : Bidi.LEVEL_DEFAULT_LTR,
376
* null);
377
* } catch (Exception e) {
378
* e.printStackTrace();
379
* return;
380
* }
381
* byte paraLevel = (byte)(1 & para.getParaLevel());
382
* StyleRun styleRun = new StyleRun(length, styleNormal);
383
*
384
* if (styleRuns == null || styleRunCount <= 0) {
385
* styleRuns = new StyleRun[1];
386
* styleRunCount = 1;
387
* styleRuns[0] = styleRun;
388
* }
389
* // assume styleRuns[styleRunCount-1].limit>=length
390
*
391
* int width = getTextWidth(text, 0, length, styleRuns, styleRunCount);
392
* if (width <= lineWidth) {
393
* // everything fits onto one line
394
*
395
* // prepare rendering a new line from either left or right
396
* startLine(paraLevel, width);
397
*
398
* renderLine(para, text, 0, length, styleRuns, styleRunCount);
399
* } else {
400
* // we need to render several lines
401
* Bidi line = new Bidi(length, 0);
402
* int start = 0, limit;
403
* int styleRunStart = 0, styleRunLimit;
404
*
405
* for (;;) {
406
* limit = length;
407
* styleRunLimit = styleRunCount;
408
* width = getLineBreak(text, new Bounds(start, limit),
409
* para, styleRuns,
410
* new Bounds(styleRunStart, styleRunLimit));
411
* try {
412
* line = para.setLine(start, limit);
413
* } catch (Exception e) {
414
* e.printStackTrace();
415
* return;
416
* }
417
* // prepare rendering a new line
418
* // from either left or right
419
* startLine(paraLevel, width);
420
*
421
* if (styleRunStart > 0) {
422
* int newRunCount = styleRuns.length - styleRunStart;
423
* StyleRun[] newRuns = new StyleRun[newRunCount];
424
* System.arraycopy(styleRuns, styleRunStart, newRuns, 0,
425
* newRunCount);
426
* renderLine(line, text, start, limit, newRuns,
427
* styleRunLimit - styleRunStart);
428
* } else {
429
* renderLine(line, text, start, limit, styleRuns,
430
* styleRunLimit - styleRunStart);
431
* }
432
* if (limit == length) {
433
* break;
434
* }
435
* start = limit;
436
* styleRunStart = styleRunLimit - 1;
437
* if (start >= styleRuns[styleRunStart].limit) {
438
* ++styleRunStart;
439
* }
440
* }
441
* }
442
* }
443
*
444
* public static void main(String[] args)
445
* {
446
* renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80);
447
* renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60);
448
* }
449
* }
450
*
451
* }</pre>
452
*/
453
454
/*
455
* General implementation notes:
456
*
457
* Throughout the implementation, there are comments like (W2) that refer to
458
* rules of the BiDi algorithm, in this example to the second rule of the
459
* resolution of weak types.
460
*
461
* For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
462
* character according to UTF-16, the second UChar gets the directional property of
463
* the entire character assigned, while the first one gets a BN, a boundary
464
* neutral, type, which is ignored by most of the algorithm according to
465
* rule (X9) and the implementation suggestions of the BiDi algorithm.
466
*
467
* Later, adjustWSLevels() will set the level for each BN to that of the
468
* following character (UChar), which results in surrogate pairs getting the
469
* same level on each of their surrogates.
470
*
471
* In a UTF-8 implementation, the same thing could be done: the last byte of
472
* a multi-byte sequence would get the "real" property, while all previous
473
* bytes of that sequence would get BN.
474
*
475
* It is not possible to assign all those parts of a character the same real
476
* property because this would fail in the resolution of weak types with rules
477
* that look at immediately surrounding types.
478
*
479
* As a related topic, this implementation does not remove Boundary Neutral
480
* types from the input, but ignores them wherever this is relevant.
481
* For example, the loop for the resolution of the weak types reads
482
* types until it finds a non-BN.
483
* Also, explicit embedding codes are neither changed into BN nor removed.
484
* They are only treated the same way real BNs are.
485
* As stated before, adjustWSLevels() takes care of them at the end.
486
* For the purpose of conformance, the levels of all these codes
487
* do not matter.
488
*
489
* Note that this implementation modifies the dirProps
490
* after the initial setup, when applying X5c (replace FSI by LRI or RLI),
491
* X6, N0 (replace paired brackets by L or R).
492
*
493
* In this implementation, the resolution of weak types (W1 to W6),
494
* neutrals (N1 and N2), and the assignment of the resolved level (In)
495
* are all done in one single loop, in resolveImplicitLevels().
496
* Changes of dirProp values are done on the fly, without writing
497
* them back to the dirProps array.
498
*
499
*
500
* This implementation contains code that allows to bypass steps of the
501
* algorithm that are not needed on the specific paragraph
502
* in order to speed up the most common cases considerably,
503
* like text that is entirely LTR, or RTL text without numbers.
504
*
505
* Most of this is done by setting a bit for each directional property
506
* in a flags variable and later checking for whether there are
507
* any LTR characters or any RTL characters, or both, whether
508
* there are any explicit embedding codes, etc.
509
*
510
* If the (Xn) steps are performed, then the flags are re-evaluated,
511
* because they will then not contain the embedding codes any more
512
* and will be adjusted for override codes, so that subsequently
513
* more bypassing may be possible than what the initial flags suggested.
514
*
515
* If the text is not mixed-directional, then the
516
* algorithm steps for the weak type resolution are not performed,
517
* and all levels are set to the paragraph level.
518
*
519
* If there are no explicit embedding codes, then the (Xn) steps
520
* are not performed.
521
*
522
* If embedding levels are supplied as a parameter, then all
523
* explicit embedding codes are ignored, and the (Xn) steps
524
* are not performed.
525
*
526
* White Space types could get the level of the run they belong to,
527
* and are checked with a test of (flags&MASK_EMBEDDING) to
528
* consider if the paragraph direction should be considered in
529
* the flags variable.
530
*
531
* If there are no White Space types in the paragraph, then
532
* (L1) is not necessary in adjustWSLevels().
533
*/
534
535
// Original filename in ICU4J: Bidi.java
536
public class BidiBase {
537
538
static class Point {
539
int pos; /* position in text */
540
int flag; /* flag for LRM/RLM, before/after */
541
}
542
543
static class InsertPoints {
544
int size;
545
int confirmed;
546
Point[] points = new Point[0];
547
}
548
549
static class Opening {
550
int position; /* position of opening bracket */
551
int match; /* matching char or -position of closing bracket */
552
int contextPos; /* position of last strong char found before opening */
553
short flags; /* bits for L or R/AL found within the pair */
554
byte contextDir; /* L or R according to last strong char before opening */
555
}
556
557
static class IsoRun {
558
int contextPos; /* position of char determining context */
559
short start; /* index of first opening entry for this run */
560
short limit; /* index after last opening entry for this run */
561
byte level; /* level of this run */
562
byte lastStrong; /* bidi class of last strong char found in this run */
563
byte lastBase; /* bidi class of last base char found in this run */
564
byte contextDir; /* L or R to use as context for following openings */
565
}
566
567
static class BracketData {
568
Opening[] openings = new Opening[SIMPLE_PARAS_COUNT];
569
int isoRunLast; /* index of last used entry */
570
/* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
571
+ 1 for index 0, + 1 for before the first isolated sequence */
572
IsoRun[] isoRuns = new IsoRun[MAX_EXPLICIT_LEVEL+2];
573
boolean isNumbersSpecial; /*reordering mode for NUMBERS_SPECIAL */
574
}
575
576
static class Isolate {
577
int startON;
578
int start1;
579
short stateImp;
580
short state;
581
}
582
583
/** Paragraph level setting<p>
584
*
585
* Constant indicating that the base direction depends on the first strong
586
* directional character in the text according to the Unicode Bidirectional
587
* Algorithm. If no strong directional character is present,
588
* then set the paragraph level to 0 (left-to-right).<p>
589
*
590
* If this value is used in conjunction with reordering modes
591
* <code>REORDER_INVERSE_LIKE_DIRECT</code> or
592
* <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
593
* is assumed to be visual LTR, and the text after reordering is required
594
* to be the corresponding logical string with appropriate contextual
595
* direction. The direction of the result string will be RTL if either
596
* the rightmost or leftmost strong character of the source text is RTL
597
* or Arabic Letter, the direction will be LTR otherwise.<p>
598
*
599
* If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
600
* be added at the beginning of the result string to ensure round trip
601
* (that the result string, when reordered back to visual, will produce
602
* the original source text).
603
* @see #REORDER_INVERSE_LIKE_DIRECT
604
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
605
* @stable ICU 3.8
606
*/
607
public static final byte LEVEL_DEFAULT_LTR = (byte)0x7e;
608
609
/** Paragraph level setting<p>
610
*
611
* Constant indicating that the base direction depends on the first strong
612
* directional character in the text according to the Unicode Bidirectional
613
* Algorithm. If no strong directional character is present,
614
* then set the paragraph level to 1 (right-to-left).<p>
615
*
616
* If this value is used in conjunction with reordering modes
617
* <code>REORDER_INVERSE_LIKE_DIRECT</code> or
618
* <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
619
* is assumed to be visual LTR, and the text after reordering is required
620
* to be the corresponding logical string with appropriate contextual
621
* direction. The direction of the result string will be RTL if either
622
* the rightmost or leftmost strong character of the source text is RTL
623
* or Arabic Letter, or if the text contains no strong character;
624
* the direction will be LTR otherwise.<p>
625
*
626
* If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
627
* be added at the beginning of the result string to ensure round trip
628
* (that the result string, when reordered back to visual, will produce
629
* the original source text).
630
* @see #REORDER_INVERSE_LIKE_DIRECT
631
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
632
* @stable ICU 3.8
633
*/
634
public static final byte LEVEL_DEFAULT_RTL = (byte)0x7f;
635
636
/**
637
* Maximum explicit embedding level.
638
* (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>).
639
* @stable ICU 3.8
640
*/
641
public static final byte MAX_EXPLICIT_LEVEL = 125;
642
643
/**
644
* Bit flag for level input.
645
* Overrides directional properties.
646
* @stable ICU 3.8
647
*/
648
public static final byte LEVEL_OVERRIDE = (byte)0x80;
649
650
/**
651
* Special value which can be returned by the mapping methods when a
652
* logical index has no corresponding visual index or vice-versa. This may
653
* happen for the logical-to-visual mapping of a Bidi control when option
654
* <code>OPTION_REMOVE_CONTROLS</code> is
655
* specified. This can also happen for the visual-to-logical mapping of a
656
* Bidi mark (LRM or RLM) inserted by option
657
* <code>OPTION_INSERT_MARKS</code>.
658
* @see #getVisualIndex
659
* @see #getVisualMap
660
* @see #getLogicalIndex
661
* @see #getLogicalMap
662
* @see #OPTION_INSERT_MARKS
663
* @see #OPTION_REMOVE_CONTROLS
664
* @stable ICU 3.8
665
*/
666
public static final int MAP_NOWHERE = -1;
667
668
/**
669
* Left-to-right text.
670
* <ul>
671
* <li>As return value for <code>getDirection()</code>, it means
672
* that the source string contains no right-to-left characters, or
673
* that the source string is empty and the paragraph level is even.
674
* <li>As return value for <code>getBaseDirection()</code>, it
675
* means that the first strong character of the source string has
676
* a left-to-right direction.
677
* </ul>
678
* @stable ICU 3.8
679
*/
680
public static final byte LTR = 0;
681
682
/**
683
* Right-to-left text.
684
* <ul>
685
* <li>As return value for <code>getDirection()</code>, it means
686
* that the source string contains no left-to-right characters, or
687
* that the source string is empty and the paragraph level is odd.
688
* <li>As return value for <code>getBaseDirection()</code>, it
689
* means that the first strong character of the source string has
690
* a right-to-left direction.
691
* </ul>
692
* @stable ICU 3.8
693
*/
694
public static final byte RTL = 1;
695
696
/**
697
* Mixed-directional text.
698
* <p>As return value for <code>getDirection()</code>, it means
699
* that the source string contains both left-to-right and
700
* right-to-left characters.
701
* @stable ICU 3.8
702
*/
703
public static final byte MIXED = 2;
704
705
/**
706
* option bit for writeReordered():
707
* keep combining characters after their base characters in RTL runs
708
*
709
* @see #writeReordered
710
* @stable ICU 3.8
711
*/
712
public static final short KEEP_BASE_COMBINING = 1;
713
714
/**
715
* option bit for writeReordered():
716
* replace characters with the "mirrored" property in RTL runs
717
* by their mirror-image mappings
718
*
719
* @see #writeReordered
720
* @stable ICU 3.8
721
*/
722
public static final short DO_MIRRORING = 2;
723
724
/**
725
* option bit for writeReordered():
726
* surround the run with LRMs if necessary;
727
* this is part of the approximate "inverse Bidi" algorithm
728
*
729
* <p>This option does not imply corresponding adjustment of the index
730
* mappings.</p>
731
*
732
* @see #setInverse
733
* @see #writeReordered
734
* @stable ICU 3.8
735
*/
736
public static final short INSERT_LRM_FOR_NUMERIC = 4;
737
738
/**
739
* option bit for writeReordered():
740
* remove Bidi control characters
741
* (this does not affect INSERT_LRM_FOR_NUMERIC)
742
*
743
* <p>This option does not imply corresponding adjustment of the index
744
* mappings.</p>
745
*
746
* @see #writeReordered
747
* @see #INSERT_LRM_FOR_NUMERIC
748
* @stable ICU 3.8
749
*/
750
public static final short REMOVE_BIDI_CONTROLS = 8;
751
752
/**
753
* option bit for writeReordered():
754
* write the output in reverse order
755
*
756
* <p>This has the same effect as calling <code>writeReordered()</code>
757
* first without this option, and then calling
758
* <code>writeReverse()</code> without mirroring.
759
* Doing this in the same step is faster and avoids a temporary buffer.
760
* An example for using this option is output to a character terminal that
761
* is designed for RTL scripts and stores text in reverse order.</p>
762
*
763
* @see #writeReordered
764
* @stable ICU 3.8
765
*/
766
public static final short OUTPUT_REVERSE = 16;
767
768
/** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode.
769
* @see #setReorderingMode
770
* @stable ICU 3.8
771
*/
772
private static final short REORDER_DEFAULT = 0;
773
774
/** Reordering mode: Logical to Visual algorithm which handles numbers in
775
* a way which mimicks the behavior of Windows XP.
776
* @see #setReorderingMode
777
* @stable ICU 3.8
778
*/
779
private static final short REORDER_NUMBERS_SPECIAL = 1;
780
781
/** Reordering mode: Logical to Visual algorithm grouping numbers with
782
* adjacent R characters (reversible algorithm).
783
* @see #setReorderingMode
784
* @stable ICU 3.8
785
*/
786
private static final short REORDER_GROUP_NUMBERS_WITH_R = 2;
787
788
/** Reordering mode: Reorder runs only to transform a Logical LTR string
789
* to the logical RTL string with the same display, or vice-versa.<br>
790
* If this mode is set together with option
791
* <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source
792
* text may be removed and other controls may be added to produce the
793
* minimum combination which has the required display.
794
* @see #OPTION_INSERT_MARKS
795
* @see #setReorderingMode
796
* @stable ICU 3.8
797
*/
798
static final short REORDER_RUNS_ONLY = 3;
799
800
/** Reordering mode: Visual to Logical algorithm which handles numbers
801
* like L (same algorithm as selected by <code>setInverse(true)</code>.
802
* @see #setInverse
803
* @see #setReorderingMode
804
* @stable ICU 3.8
805
*/
806
static final short REORDER_INVERSE_NUMBERS_AS_L = 4;
807
808
/** Reordering mode: Visual to Logical algorithm equivalent to the regular
809
* Logical to Visual algorithm.
810
* @see #setReorderingMode
811
* @stable ICU 3.8
812
*/
813
static final short REORDER_INVERSE_LIKE_DIRECT = 5;
814
815
/** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the
816
* <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
817
* @see #setReorderingMode
818
* @stable ICU 3.8
819
*/
820
static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6;
821
822
/* Reordering mode values must be ordered so that all the regular logical to
823
* visual modes come first, and all inverse Bidi modes come last.
824
*/
825
private static final short REORDER_LAST_LOGICAL_TO_VISUAL =
826
REORDER_NUMBERS_SPECIAL;
827
828
/**
829
* Option bit for <code>setReorderingOptions</code>:
830
* insert Bidi marks (LRM or RLM) when needed to ensure correct result of
831
* a reordering to a Logical order
832
*
833
* <p>This option must be set or reset before calling
834
* <code>setPara</code>.</p>
835
*
836
* <p>This option is significant only with reordering modes which generate
837
* a result with Logical order, specifically.</p>
838
* <ul>
839
* <li><code>REORDER_RUNS_ONLY</code></li>
840
* <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li>
841
* <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li>
842
* <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
843
* </ul>
844
*
845
* <p>If this option is set in conjunction with reordering mode
846
* <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
847
* <code>setInverse(true)</code>, it implies option
848
* <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
849
* <code>writeReordered()</code>.</p>
850
*
851
* <p>For other reordering modes, a minimum number of LRM or RLM characters
852
* will be added to the source text after reordering it so as to ensure
853
* round trip, i.e. when applying the inverse reordering mode on the
854
* resulting logical text with removal of Bidi marks
855
* (option <code>OPTION_REMOVE_CONTROLS</code> set before calling
856
* <code>setPara()</code> or option
857
* <code>REMOVE_BIDI_CONTROLS</code> in
858
* <code>writeReordered</code>), the result will be identical to the
859
* source text in the first transformation.
860
*
861
* <p>This option will be ignored if specified together with option
862
* <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option
863
* <code>REMOVE_BIDI_CONTROLS</code> in calls to method
864
* <code>writeReordered()</code> and it implies option
865
* <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
866
* <code>writeReordered()</code> if the reordering mode is
867
* <code>REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
868
*
869
* @see #setReorderingMode
870
* @see #setReorderingOptions
871
* @see #INSERT_LRM_FOR_NUMERIC
872
* @see #REMOVE_BIDI_CONTROLS
873
* @see #OPTION_REMOVE_CONTROLS
874
* @see #REORDER_RUNS_ONLY
875
* @see #REORDER_INVERSE_NUMBERS_AS_L
876
* @see #REORDER_INVERSE_LIKE_DIRECT
877
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
878
* @stable ICU 3.8
879
*/
880
static final int OPTION_INSERT_MARKS = 1;
881
882
/**
883
* Option bit for <code>setReorderingOptions</code>:
884
* remove Bidi control characters
885
*
886
* <p>This option must be set or reset before calling
887
* <code>setPara</code>.</p>
888
*
889
* <p>This option nullifies option
890
* <code>OPTION_INSERT_MARKS</code>. It inhibits option
891
* <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
892
* <code>writeReordered()</code> and it implies option
893
* <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.</p>
894
*
895
* @see #setReorderingMode
896
* @see #setReorderingOptions
897
* @see #OPTION_INSERT_MARKS
898
* @see #INSERT_LRM_FOR_NUMERIC
899
* @see #REMOVE_BIDI_CONTROLS
900
* @stable ICU 3.8
901
*/
902
static final int OPTION_REMOVE_CONTROLS = 2;
903
904
/**
905
* Option bit for <code>setReorderingOptions</code>:
906
* process the output as part of a stream to be continued
907
*
908
* <p>This option must be set or reset before calling
909
* <code>setPara</code>.</p>
910
*
911
* <p>This option specifies that the caller is interested in processing
912
* large text object in parts. The results of the successive calls are
913
* expected to be concatenated by the caller. Only the call for the last
914
* part will have this option bit off.</p>
915
*
916
* <p>When this option bit is on, <code>setPara()</code> may process
917
* less than the full source text in order to truncate the text at a
918
* meaningful boundary. The caller should call
919
* <code>getProcessedLength()</code> immediately after calling
920
* <code>setPara()</code> in order to determine how much of the source
921
* text has been processed. Source text beyond that length should be
922
* resubmitted in following calls to <code>setPara</code>. The
923
* processed length may be less than the length of the source text if a
924
* character preceding the last character of the source text constitutes a
925
* reasonable boundary (like a block separator) for text to be continued.<br>
926
* If the last character of the source text constitutes a reasonable
927
* boundary, the whole text will be processed at once.<br>
928
* If nowhere in the source text there exists
929
* such a reasonable boundary, the processed length will be zero.<br>
930
* The caller should check for such an occurrence and do one of the following:
931
* <ul><li>submit a larger amount of text with a better chance to include
932
* a reasonable boundary.</li>
933
* <li>resubmit the same text after turning off option
934
* <code>OPTION_STREAMING</code>.</li></ul>
935
* In all cases, this option should be turned off before processing the last
936
* part of the text.</p>
937
*
938
* <p>When the <code>OPTION_STREAMING</code> option is used, it is
939
* recommended to call <code>orderParagraphsLTR(true)</code> before calling
940
* <code>setPara()</code> so that later paragraphs may be concatenated to
941
* previous paragraphs on the right.
942
* </p>
943
*
944
* @see #setReorderingMode
945
* @see #setReorderingOptions
946
* @see #getProcessedLength
947
* @stable ICU 3.8
948
*/
949
private static final int OPTION_STREAMING = 4;
950
951
/*
952
* Comparing the description of the Bidi algorithm with this implementation
953
* is easier with the same names for the Bidi types in the code as there.
954
* See UCharacterDirection
955
*/
956
/* private */ static final byte L = 0;
957
private static final byte R = 1;
958
private static final byte EN = 2;
959
private static final byte ES = 3;
960
private static final byte ET = 4;
961
private static final byte AN = 5;
962
private static final byte CS = 6;
963
static final byte B = 7;
964
private static final byte S = 8;
965
private static final byte WS = 9;
966
private static final byte ON = 10;
967
private static final byte LRE = 11;
968
private static final byte LRO = 12;
969
private static final byte AL = 13;
970
private static final byte RLE = 14;
971
private static final byte RLO = 15;
972
private static final byte PDF = 16;
973
private static final byte NSM = 17;
974
private static final byte BN = 18;
975
private static final byte FSI = 19;
976
private static final byte LRI = 20;
977
private static final byte RLI = 21;
978
private static final byte PDI = 22;
979
private static final byte ENL = PDI + 1; /* EN after W7 */
980
private static final byte ENR = ENL + 1; /* EN not subject to W7 */
981
982
// Number of directional types
983
private static final int CHAR_DIRECTION_COUNT = 23;
984
985
/**
986
* Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
987
* Used in
988
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
989
* Unicode Bidirectional Algorithm</a>.
990
* Returns UCharacter.BidiPairedBracketType values.
991
* @stable ICU 52
992
*/
993
public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015;
994
995
/**
996
* Bidi Paired Bracket Type constants.
997
*
998
* @see UProperty#BIDI_PAIRED_BRACKET_TYPE
999
* @stable ICU 52
1000
*/
1001
public static interface BidiPairedBracketType {
1002
/**
1003
* Not a paired bracket.
1004
* @stable ICU 52
1005
*/
1006
public static final int NONE = 0;
1007
/**
1008
* Open paired bracket.
1009
* @stable ICU 52
1010
*/
1011
public static final int OPEN = 1;
1012
/**
1013
* Close paired bracket.
1014
* @stable ICU 52
1015
*/
1016
public static final int CLOSE = 2;
1017
/**
1018
* @stable ICU 52
1019
*/
1020
public static final int COUNT = 3;
1021
}
1022
1023
/* number of paras entries allocated initially */
1024
static final int SIMPLE_PARAS_COUNT = 10;
1025
1026
private static final char CR = '\r';
1027
private static final char LF = '\n';
1028
1029
static final int LRM_BEFORE = 1;
1030
static final int LRM_AFTER = 2;
1031
static final int RLM_BEFORE = 4;
1032
static final int RLM_AFTER = 8;
1033
1034
/* flags for Opening.flags */
1035
static final byte FOUND_L = (byte)DirPropFlag(L);
1036
static final byte FOUND_R = (byte)DirPropFlag(R);
1037
1038
/*
1039
* The following bit is used for the directional isolate status.
1040
* Stack entries corresponding to isolate sequences are greater than ISOLATE.
1041
*/
1042
static final int ISOLATE = 0x0100;
1043
1044
/*
1045
* reference to parent paragraph object (reference to self if this object is
1046
* a paragraph object); set to null in a newly opened object; set to a
1047
* real value after a successful execution of setPara or setLine
1048
*/
1049
BidiBase paraBidi;
1050
1051
final UBiDiProps bdp;
1052
1053
/* character array representing the current text */
1054
char[] text;
1055
1056
/* length of the current text */
1057
int originalLength;
1058
1059
/* if the option OPTION_STREAMING is set, this is the length of
1060
* text actually processed by <code>setPara</code>, which may be shorter
1061
* than the original length. Otherwise, it is identical to the original
1062
* length.
1063
*/
1064
public int length;
1065
1066
/* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi
1067
* marks are allowed to be inserted in one of the reordering modes, the
1068
* length of the result string may be different from the processed length.
1069
*/
1070
int resultLength;
1071
1072
/* indicators for whether memory may be allocated after construction */
1073
boolean mayAllocateText;
1074
boolean mayAllocateRuns;
1075
1076
/* arrays with one value per text-character */
1077
byte[] dirPropsMemory = new byte[1];
1078
byte[] levelsMemory = new byte[1];
1079
byte[] dirProps;
1080
byte[] levels;
1081
1082
/* are we performing an approximation of the "inverse Bidi" algorithm? */
1083
boolean isInverse;
1084
1085
/* are we using the basic algorithm or its variation? */
1086
int reorderingMode;
1087
1088
/* bitmask for reordering options */
1089
int reorderingOptions;
1090
1091
/* must block separators receive level 0? */
1092
boolean orderParagraphsLTR;
1093
1094
/* the paragraph level */
1095
byte paraLevel;
1096
1097
/* original paraLevel when contextual */
1098
/* must be one of DEFAULT_xxx or 0 if not contextual */
1099
byte defaultParaLevel;
1100
1101
/* the following is set in setPara, used in processPropertySeq */
1102
1103
ImpTabPair impTabPair; /* reference to levels state table pair */
1104
1105
/* the overall paragraph or line directionality*/
1106
byte direction;
1107
1108
/* flags is a bit set for which directional properties are in the text */
1109
int flags;
1110
1111
/* lastArabicPos is index to the last AL in the text, -1 if none */
1112
int lastArabicPos;
1113
1114
/* characters after trailingWSStart are WS and are */
1115
/* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
1116
int trailingWSStart;
1117
1118
/* fields for paragraph handling, set in getDirProps() */
1119
int paraCount;
1120
int[] paras_limit = new int[SIMPLE_PARAS_COUNT];
1121
byte[] paras_level = new byte[SIMPLE_PARAS_COUNT];
1122
1123
/* fields for line reordering */
1124
int runCount; /* ==-1: runs not set up yet */
1125
BidiRun[] runsMemory = new BidiRun[0];
1126
BidiRun[] runs;
1127
1128
/* for non-mixed text, we only need a tiny array of runs (no allocation) */
1129
BidiRun[] simpleRuns = {new BidiRun()};
1130
1131
/* fields for managing isolate sequences */
1132
Isolate[] isolates;
1133
1134
/* maximum or current nesting depth of isolate sequences */
1135
/* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
1136
nesting encountered.
1137
Within resolveImplicitLevels(), this is the index of the current isolates
1138
stack entry. */
1139
int isolateCount;
1140
1141
/* mapping of runs in logical order to visual order */
1142
int[] logicalToVisualRunsMap;
1143
/* flag to indicate that the map has been updated */
1144
boolean isGoodLogicalToVisualRunsMap;
1145
1146
/* for inverse Bidi with insertion of directional marks */
1147
InsertPoints insertPoints = new InsertPoints();
1148
1149
/* for option OPTION_REMOVE_CONTROLS */
1150
int controlCount;
1151
1152
/*
1153
* Sometimes, bit values are more appropriate
1154
* to deal with directionality properties.
1155
* Abbreviations in these method names refer to names
1156
* used in the Bidi algorithm.
1157
*/
1158
static int DirPropFlag(byte dir) {
1159
return (1 << dir);
1160
}
1161
1162
boolean testDirPropFlagAt(int flag, int index) {
1163
return ((DirPropFlag(dirProps[index]) & flag) != 0);
1164
}
1165
1166
static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
1167
1168
/* to avoid some conditional statements, use tiny constant arrays */
1169
static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) };
1170
static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) };
1171
static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) };
1172
1173
static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
1174
static final int DirPropFlagE(byte level) { return DirPropFlagE[level & 1]; }
1175
static final int DirPropFlagO(byte level) { return DirPropFlagO[level & 1]; }
1176
static final byte DirFromStrong(byte strong) { return strong == L ? L : R; }
1177
static final byte NoOverride(byte level) { return (byte)(level & ~LEVEL_OVERRIDE); }
1178
1179
/* are there any characters that are LTR or RTL? */
1180
static final int MASK_LTR =
1181
DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(ENL)|DirPropFlag(ENR)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(LRI);
1182
static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(RLI);
1183
1184
static final int MASK_R_AL = DirPropFlag(R)|DirPropFlag(AL);
1185
1186
/* explicit embedding codes */
1187
private static final int MASK_EXPLICIT = DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(PDF);
1188
private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
1189
1190
/* explicit isolate codes */
1191
private static final int MASK_ISO = DirPropFlag(LRI)|DirPropFlag(RLI)|DirPropFlag(FSI)|DirPropFlag(PDI);
1192
1193
/* paragraph and segment separators */
1194
private static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S);
1195
1196
/* all types that are counted as White Space or Neutral in some steps */
1197
static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT|MASK_ISO;
1198
1199
/* types that are neutrals or could becomes neutrals in (Wn) */
1200
private static final int MASK_POSSIBLE_N = DirPropFlag(ON)|DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_WS;
1201
1202
/*
1203
* These types may be changed to "e",
1204
* the embedding type (L or R) of the run,
1205
* in the Bidi algorithm (N2)
1206
*/
1207
private static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N;
1208
1209
/*
1210
* the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java
1211
*/
1212
private static byte GetLRFromLevel(byte level)
1213
{
1214
return (byte)(level & 1);
1215
}
1216
1217
private static boolean IsDefaultLevel(byte level)
1218
{
1219
return ((level & LEVEL_DEFAULT_LTR) == LEVEL_DEFAULT_LTR);
1220
}
1221
1222
static boolean IsBidiControlChar(int c)
1223
{
1224
/* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
1225
0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
1226
return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e))
1227
|| ((c >= 0x2066) && (c <= 0x2069)));
1228
}
1229
1230
void verifyValidPara()
1231
{
1232
if (!(this == this.paraBidi)) {
1233
throw new IllegalStateException();
1234
}
1235
}
1236
1237
void verifyValidParaOrLine()
1238
{
1239
BidiBase para = this.paraBidi;
1240
/* verify Para */
1241
if (this == para) {
1242
return;
1243
}
1244
/* verify Line */
1245
if ((para == null) || (para != para.paraBidi)) {
1246
throw new IllegalStateException();
1247
}
1248
}
1249
1250
void verifyRange(int index, int start, int limit)
1251
{
1252
if (index < start || index >= limit) {
1253
throw new IllegalArgumentException("Value " + index +
1254
" is out of range " + start + " to " + limit);
1255
}
1256
}
1257
1258
/**
1259
* Allocate a <code>Bidi</code> object with preallocated memory
1260
* for internal structures.
1261
* This method provides a <code>Bidi</code> object like the default constructor
1262
* but it also preallocates memory for internal structures
1263
* according to the sizings supplied by the caller.<p>
1264
* The preallocation can be limited to some of the internal memory
1265
* by setting some values to 0 here. That means that if, e.g.,
1266
* <code>maxRunCount</code> cannot be reasonably predetermined and should not
1267
* be set to <code>maxLength</code> (the only failproof value) to avoid
1268
* wasting memory, then <code>maxRunCount</code> could be set to 0 here
1269
* and the internal structures that are associated with it will be allocated
1270
* on demand, just like with the default constructor.
1271
*
1272
* @param maxLength is the maximum text or line length that internal memory
1273
* will be preallocated for. An attempt to associate this object with a
1274
* longer text will fail, unless this value is 0, which leaves the allocation
1275
* up to the implementation.
1276
*
1277
* @param maxRunCount is the maximum anticipated number of same-level runs
1278
* that internal memory will be preallocated for. An attempt to access
1279
* visual runs on an object that was not preallocated for as many runs
1280
* as the text was actually resolved to will fail,
1281
* unless this value is 0, which leaves the allocation up to the implementation.<br><br>
1282
* The number of runs depends on the actual text and maybe anywhere between
1283
* 1 and <code>maxLength</code>. It is typically small.
1284
*
1285
* @throws IllegalArgumentException if maxLength or maxRunCount is less than 0
1286
* @stable ICU 3.8
1287
*/
1288
public BidiBase(int maxLength, int maxRunCount)
1289
{
1290
/* check the argument values */
1291
if (maxLength < 0 || maxRunCount < 0) {
1292
throw new IllegalArgumentException();
1293
}
1294
1295
/* reset the object, all reference variables null, all flags false,
1296
all sizes 0.
1297
In fact, we don't need to do anything, since class members are
1298
initialized as zero when an instance is created.
1299
*/
1300
/*
1301
mayAllocateText = false;
1302
mayAllocateRuns = false;
1303
orderParagraphsLTR = false;
1304
paraCount = 0;
1305
runCount = 0;
1306
trailingWSStart = 0;
1307
flags = 0;
1308
paraLevel = 0;
1309
defaultParaLevel = 0;
1310
direction = 0;
1311
*/
1312
/* get Bidi properties */
1313
bdp = UBiDiProps.INSTANCE;
1314
1315
/* allocate memory for arrays as requested */
1316
if (maxLength > 0) {
1317
getInitialDirPropsMemory(maxLength);
1318
getInitialLevelsMemory(maxLength);
1319
} else {
1320
mayAllocateText = true;
1321
}
1322
1323
if (maxRunCount > 0) {
1324
// if maxRunCount == 1, use simpleRuns[]
1325
if (maxRunCount > 1) {
1326
getInitialRunsMemory(maxRunCount);
1327
}
1328
} else {
1329
mayAllocateRuns = true;
1330
}
1331
}
1332
1333
/*
1334
* We are allowed to allocate memory if object==null or
1335
* mayAllocate==true for each array that we need.
1336
*
1337
* Assume sizeNeeded>0.
1338
* If object != null, then assume size > 0.
1339
*/
1340
private Object getMemory(String label, Object array, Class<?> arrayClass,
1341
boolean mayAllocate, int sizeNeeded)
1342
{
1343
int len = Array.getLength(array);
1344
1345
/* we have at least enough memory and must not allocate */
1346
if (sizeNeeded == len) {
1347
return array;
1348
}
1349
if (!mayAllocate) {
1350
/* we must not allocate */
1351
if (sizeNeeded <= len) {
1352
return array;
1353
}
1354
throw new OutOfMemoryError("Failed to allocate memory for "
1355
+ label);
1356
}
1357
/* we may try to grow or shrink */
1358
/* FOOD FOR THOUGHT: when shrinking it should be possible to avoid
1359
the allocation altogether and rely on this.length */
1360
try {
1361
return Array.newInstance(arrayClass, sizeNeeded);
1362
} catch (Exception e) {
1363
throw new OutOfMemoryError("Failed to allocate memory for "
1364
+ label);
1365
}
1366
}
1367
1368
/* helper methods for each allocated array */
1369
private void getDirPropsMemory(boolean mayAllocate, int len)
1370
{
1371
Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len);
1372
dirPropsMemory = (byte[]) array;
1373
}
1374
1375
void getDirPropsMemory(int len)
1376
{
1377
getDirPropsMemory(mayAllocateText, len);
1378
}
1379
1380
private void getLevelsMemory(boolean mayAllocate, int len)
1381
{
1382
Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len);
1383
levelsMemory = (byte[]) array;
1384
}
1385
1386
void getLevelsMemory(int len)
1387
{
1388
getLevelsMemory(mayAllocateText, len);
1389
}
1390
1391
private void getRunsMemory(boolean mayAllocate, int len)
1392
{
1393
Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len);
1394
runsMemory = (BidiRun[]) array;
1395
}
1396
1397
void getRunsMemory(int len)
1398
{
1399
getRunsMemory(mayAllocateRuns, len);
1400
}
1401
1402
/* additional methods used by constructor - always allow allocation */
1403
private void getInitialDirPropsMemory(int len)
1404
{
1405
getDirPropsMemory(true, len);
1406
}
1407
1408
private void getInitialLevelsMemory(int len)
1409
{
1410
getLevelsMemory(true, len);
1411
}
1412
1413
private void getInitialRunsMemory(int len)
1414
{
1415
getRunsMemory(true, len);
1416
}
1417
1418
/**
1419
* Is this <code>Bidi</code> object set to perform the inverse Bidi
1420
* algorithm?
1421
* <p>Note: calling this method after setting the reordering mode with
1422
* <code>setReorderingMode</code> will return <code>true</code> if the
1423
* reordering mode was set to
1424
* <code>REORDER_INVERSE_NUMBERS_AS_L</code>, <code>false</code>
1425
* for all other values.</p>
1426
*
1427
* @return <code>true</code> if the <code>Bidi</code> object is set to
1428
* perform the inverse Bidi algorithm by handling numbers as L.
1429
*
1430
* @see #setInverse
1431
* @see #setReorderingMode
1432
* @see #REORDER_INVERSE_NUMBERS_AS_L
1433
* @stable ICU 3.8
1434
*/
1435
public boolean isInverse() {
1436
return isInverse;
1437
}
1438
1439
/* perform (P2)..(P3) ------------------------------------------------------- */
1440
1441
/*
1442
* Check that there are enough entries in the arrays paras_limit and paras_level
1443
*/
1444
private void checkParaCount() {
1445
int[] saveLimits;
1446
byte[] saveLevels;
1447
int count = paraCount;
1448
if (count <= paras_level.length)
1449
return;
1450
int oldLength = paras_level.length;
1451
saveLimits = paras_limit;
1452
saveLevels = paras_level;
1453
try {
1454
paras_limit = new int[count * 2];
1455
paras_level = new byte[count * 2];
1456
} catch (Exception e) {
1457
throw new OutOfMemoryError("Failed to allocate memory for paras");
1458
}
1459
System.arraycopy(saveLimits, 0, paras_limit, 0, oldLength);
1460
System.arraycopy(saveLevels, 0, paras_level, 0, oldLength);
1461
}
1462
1463
/*
1464
* Get the directional properties for the text, calculate the flags bit-set, and
1465
* determine the paragraph level if necessary (in paras_level[i]).
1466
* FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
1467
* When encountering an FSI, it is initially replaced with an LRI, which is the
1468
* default. Only if a strong R or AL is found within its scope will the LRI be
1469
* replaced by an RLI.
1470
*/
1471
static final int NOT_SEEKING_STRONG = 0; /* 0: not contextual paraLevel, not after FSI */
1472
static final int SEEKING_STRONG_FOR_PARA = 1; /* 1: looking for first strong char in para */
1473
static final int SEEKING_STRONG_FOR_FSI = 2; /* 2: looking for first strong after FSI */
1474
static final int LOOKING_FOR_PDI = 3; /* 3: found strong after FSI, looking for PDI */
1475
1476
private void getDirProps()
1477
{
1478
int i = 0, i0, i1;
1479
flags = 0; /* collect all directionalities in the text */
1480
int uchar;
1481
byte dirProp;
1482
byte defaultParaLevel = 0; /* initialize to avoid compiler warnings */
1483
boolean isDefaultLevel = IsDefaultLevel(paraLevel);
1484
/* for inverse Bidi, the default para level is set to RTL if there is a
1485
strong R or AL character at either end of the text */
1486
boolean isDefaultLevelInverse=isDefaultLevel &&
1487
(reorderingMode == REORDER_INVERSE_LIKE_DIRECT ||
1488
reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
1489
lastArabicPos = -1;
1490
int controlCount = 0;
1491
boolean removeBidiControls = (reorderingOptions & OPTION_REMOVE_CONTROLS) != 0;
1492
1493
byte state;
1494
byte lastStrong = ON; /* for default level & inverse Bidi */
1495
/* The following stacks are used to manage isolate sequences. Those
1496
sequences may be nested, but obviously never more deeply than the
1497
maximum explicit embedding level.
1498
lastStack is the index of the last used entry in the stack. A value of -1
1499
means that there is no open isolate sequence.
1500
lastStack is reset to -1 on paragraph boundaries. */
1501
/* The following stack contains the position of the initiator of
1502
each open isolate sequence */
1503
int[] isolateStartStack= new int[MAX_EXPLICIT_LEVEL+1];
1504
/* The following stack contains the last known state before
1505
encountering the initiator of an isolate sequence */
1506
byte[] previousStateStack = new byte[MAX_EXPLICIT_LEVEL+1];
1507
int stackLast=-1;
1508
1509
if ((reorderingOptions & OPTION_STREAMING) != 0)
1510
length = 0;
1511
defaultParaLevel = (byte)(paraLevel & 1);
1512
1513
if (isDefaultLevel) {
1514
paras_level[0] = defaultParaLevel;
1515
lastStrong = defaultParaLevel;
1516
state = SEEKING_STRONG_FOR_PARA;
1517
} else {
1518
paras_level[0] = paraLevel;
1519
state = NOT_SEEKING_STRONG;
1520
}
1521
/* count paragraphs and determine the paragraph level (P2..P3) */
1522
/*
1523
* see comment on constant fields:
1524
* the LEVEL_DEFAULT_XXX values are designed so that
1525
* their low-order bit alone yields the intended default
1526
*/
1527
1528
for (i = 0; i < originalLength; /* i is incremented in the loop */) {
1529
i0 = i; /* index of first code unit */
1530
uchar = UTF16.charAt(text, 0, originalLength, i);
1531
i += UTF16.getCharCount(uchar);
1532
i1 = i - 1; /* index of last code unit, gets the directional property */
1533
1534
dirProp = (byte)getCustomizedClass(uchar);
1535
flags |= DirPropFlag(dirProp);
1536
dirProps[i1] = dirProp;
1537
if (i1 > i0) { /* set previous code units' properties to BN */
1538
flags |= DirPropFlag(BN);
1539
do {
1540
dirProps[--i1] = BN;
1541
} while (i1 > i0);
1542
}
1543
if (removeBidiControls && IsBidiControlChar(uchar)) {
1544
controlCount++;
1545
}
1546
if (dirProp == L) {
1547
if (state == SEEKING_STRONG_FOR_PARA) {
1548
paras_level[paraCount - 1] = 0;
1549
state = NOT_SEEKING_STRONG;
1550
}
1551
else if (state == SEEKING_STRONG_FOR_FSI) {
1552
if (stackLast <= MAX_EXPLICIT_LEVEL) {
1553
/* no need for next statement, already set by default */
1554
/* dirProps[isolateStartStack[stackLast]] = LRI; */
1555
flags |= DirPropFlag(LRI);
1556
}
1557
state = LOOKING_FOR_PDI;
1558
}
1559
lastStrong = L;
1560
continue;
1561
}
1562
if (dirProp == R || dirProp == AL) {
1563
if (state == SEEKING_STRONG_FOR_PARA) {
1564
paras_level[paraCount - 1] = 1;
1565
state = NOT_SEEKING_STRONG;
1566
}
1567
else if (state == SEEKING_STRONG_FOR_FSI) {
1568
if (stackLast <= MAX_EXPLICIT_LEVEL) {
1569
dirProps[isolateStartStack[stackLast]] = RLI;
1570
flags |= DirPropFlag(RLI);
1571
}
1572
state = LOOKING_FOR_PDI;
1573
}
1574
lastStrong = R;
1575
if (dirProp == AL)
1576
lastArabicPos = i - 1;
1577
continue;
1578
}
1579
if (dirProp >= FSI && dirProp <= RLI) { /* FSI, LRI or RLI */
1580
stackLast++;
1581
if (stackLast <= MAX_EXPLICIT_LEVEL) {
1582
isolateStartStack[stackLast] = i - 1;
1583
previousStateStack[stackLast] = state;
1584
}
1585
if (dirProp == FSI) {
1586
dirProps[i-1] = LRI; /* default if no strong char */
1587
state = SEEKING_STRONG_FOR_FSI;
1588
}
1589
else
1590
state = LOOKING_FOR_PDI;
1591
continue;
1592
}
1593
if (dirProp == PDI) {
1594
if (state == SEEKING_STRONG_FOR_FSI) {
1595
if (stackLast <= MAX_EXPLICIT_LEVEL) {
1596
/* no need for next statement, already set by default */
1597
/* dirProps[isolateStartStack[stackLast]] = LRI; */
1598
flags |= DirPropFlag(LRI);
1599
}
1600
}
1601
if (stackLast >= 0) {
1602
if (stackLast <= MAX_EXPLICIT_LEVEL)
1603
state = previousStateStack[stackLast];
1604
stackLast--;
1605
}
1606
continue;
1607
}
1608
if (dirProp == B) {
1609
if (i < originalLength && uchar == CR && text[i] == LF) /* do nothing on the CR */
1610
continue;
1611
paras_limit[paraCount - 1] = i;
1612
if (isDefaultLevelInverse && lastStrong == R)
1613
paras_level[paraCount - 1] = 1;
1614
if ((reorderingOptions & OPTION_STREAMING) != 0) {
1615
/* When streaming, we only process whole paragraphs
1616
thus some updates are only done on paragraph boundaries */
1617
length = i; /* i is index to next character */
1618
this.controlCount = controlCount;
1619
}
1620
if (i < originalLength) { /* B not last char in text */
1621
paraCount++;
1622
checkParaCount(); /* check that there is enough memory for a new para entry */
1623
if (isDefaultLevel) {
1624
paras_level[paraCount - 1] = defaultParaLevel;
1625
state = SEEKING_STRONG_FOR_PARA;
1626
lastStrong = defaultParaLevel;
1627
} else {
1628
paras_level[paraCount - 1] = paraLevel;
1629
state = NOT_SEEKING_STRONG;
1630
}
1631
stackLast = -1;
1632
}
1633
continue;
1634
}
1635
}
1636
/* +Ignore still open isolate sequences with overflow */
1637
if (stackLast > MAX_EXPLICIT_LEVEL) {
1638
stackLast = MAX_EXPLICIT_LEVEL;
1639
state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */
1640
}
1641
/* Resolve direction of still unresolved open FSI sequences */
1642
while (stackLast >= 0) {
1643
if (state == SEEKING_STRONG_FOR_FSI) {
1644
/* no need for next statement, already set by default */
1645
/* dirProps[isolateStartStack[stackLast]] = LRI; */
1646
flags |= DirPropFlag(LRI);
1647
break;
1648
}
1649
state = previousStateStack[stackLast];
1650
stackLast--;
1651
}
1652
/* When streaming, ignore text after the last paragraph separator */
1653
if ((reorderingOptions & OPTION_STREAMING) != 0) {
1654
if (length < originalLength)
1655
paraCount--;
1656
} else {
1657
paras_limit[paraCount - 1] = originalLength;
1658
this.controlCount = controlCount;
1659
}
1660
/* For inverse bidi, default para direction is RTL if there is
1661
a strong R or AL at either end of the paragraph */
1662
if (isDefaultLevelInverse && lastStrong == R) {
1663
paras_level[paraCount - 1] = 1;
1664
}
1665
if (isDefaultLevel) {
1666
paraLevel = paras_level[0];
1667
}
1668
/* The following is needed to resolve the text direction for default level
1669
paragraphs containing no strong character */
1670
for (i = 0; i < paraCount; i++)
1671
flags |= DirPropFlagLR(paras_level[i]);
1672
1673
if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
1674
flags |= DirPropFlag(L);
1675
}
1676
}
1677
1678
/* determine the paragraph level at position index */
1679
byte GetParaLevelAt(int pindex)
1680
{
1681
if (defaultParaLevel == 0 || pindex < paras_limit[0])
1682
return paraLevel;
1683
int i;
1684
for (i = 1; i < paraCount; i++)
1685
if (pindex < paras_limit[i])
1686
break;
1687
if (i >= paraCount)
1688
i = paraCount - 1;
1689
return paras_level[i];
1690
}
1691
1692
/* Functions for handling paired brackets ----------------------------------- */
1693
1694
/* In the isoRuns array, the first entry is used for text outside of any
1695
isolate sequence. Higher entries are used for each more deeply nested
1696
isolate sequence. isoRunLast is the index of the last used entry. The
1697
openings array is used to note the data of opening brackets not yet
1698
matched by a closing bracket, or matched but still susceptible to change
1699
level.
1700
Each isoRun entry contains the index of the first and
1701
one-after-last openings entries for pending opening brackets it
1702
contains. The next openings entry to use is the one-after-last of the
1703
most deeply nested isoRun entry.
1704
isoRun entries also contain their current embedding level and the last
1705
encountered strong character, since these will be needed to resolve
1706
the level of paired brackets. */
1707
1708
private void bracketInit(BracketData bd) {
1709
bd.isoRunLast = 0;
1710
bd.isoRuns[0] = new IsoRun();
1711
bd.isoRuns[0].start = 0;
1712
bd.isoRuns[0].limit = 0;
1713
bd.isoRuns[0].level = GetParaLevelAt(0);
1714
bd.isoRuns[0].lastStrong = bd.isoRuns[0].lastBase = bd.isoRuns[0].contextDir = (byte)(GetParaLevelAt(0) & 1);
1715
bd.isoRuns[0].contextPos = 0;
1716
bd.openings = new Opening[SIMPLE_PARAS_COUNT];
1717
bd.isNumbersSpecial = reorderingMode == REORDER_NUMBERS_SPECIAL ||
1718
reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
1719
}
1720
1721
/* paragraph boundary */
1722
private void bracketProcessB(BracketData bd, byte level) {
1723
bd.isoRunLast = 0;
1724
bd.isoRuns[0].limit = 0;
1725
bd.isoRuns[0].level = level;
1726
bd.isoRuns[0].lastStrong = bd.isoRuns[0].lastBase = bd.isoRuns[0].contextDir = (byte)(level & 1);
1727
bd.isoRuns[0].contextPos = 0;
1728
}
1729
1730
/* LRE, LRO, RLE, RLO, PDF */
1731
private void bracketProcessBoundary(BracketData bd, int lastCcPos,
1732
byte contextLevel, byte embeddingLevel) {
1733
IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
1734
if ((DirPropFlag(dirProps[lastCcPos]) & MASK_ISO) != 0) /* after an isolate */
1735
return;
1736
if (NoOverride(embeddingLevel) > NoOverride(contextLevel)) /* not a PDF */
1737
contextLevel = embeddingLevel;
1738
pLastIsoRun.limit = pLastIsoRun.start;
1739
pLastIsoRun.level = embeddingLevel;
1740
pLastIsoRun.lastStrong = pLastIsoRun.lastBase = pLastIsoRun.contextDir = (byte)(contextLevel & 1);
1741
pLastIsoRun.contextPos = lastCcPos;
1742
}
1743
1744
/* LRI or RLI */
1745
private void bracketProcessLRI_RLI(BracketData bd, byte level) {
1746
IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
1747
short lastLimit;
1748
pLastIsoRun.lastBase = ON;
1749
lastLimit = pLastIsoRun.limit;
1750
bd.isoRunLast++;
1751
pLastIsoRun = bd.isoRuns[bd.isoRunLast];
1752
if (pLastIsoRun == null)
1753
pLastIsoRun = bd.isoRuns[bd.isoRunLast] = new IsoRun();
1754
pLastIsoRun.start = pLastIsoRun.limit = lastLimit;
1755
pLastIsoRun.level = level;
1756
pLastIsoRun.lastStrong = pLastIsoRun.lastBase = pLastIsoRun.contextDir = (byte)(level & 1);
1757
pLastIsoRun.contextPos = 0;
1758
}
1759
1760
/* PDI */
1761
private void bracketProcessPDI(BracketData bd) {
1762
IsoRun pLastIsoRun;
1763
bd.isoRunLast--;
1764
pLastIsoRun = bd.isoRuns[bd.isoRunLast];
1765
pLastIsoRun.lastBase = ON;
1766
}
1767
1768
/* newly found opening bracket: create an openings entry */
1769
private void bracketAddOpening(BracketData bd, char match, int position) {
1770
IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
1771
Opening pOpening;
1772
if (pLastIsoRun.limit >= bd.openings.length) { /* no available new entry */
1773
Opening[] saveOpenings = bd.openings;
1774
int count;
1775
try {
1776
count = bd.openings.length;
1777
bd.openings = new Opening[count * 2];
1778
} catch (Exception e) {
1779
throw new OutOfMemoryError("Failed to allocate memory for openings");
1780
}
1781
System.arraycopy(saveOpenings, 0, bd.openings, 0, count);
1782
}
1783
pOpening = bd.openings[pLastIsoRun.limit];
1784
if (pOpening == null)
1785
pOpening = bd.openings[pLastIsoRun.limit]= new Opening();
1786
pOpening.position = position;
1787
pOpening.match = match;
1788
pOpening.contextDir = pLastIsoRun.contextDir;
1789
pOpening.contextPos = pLastIsoRun.contextPos;
1790
pOpening.flags = 0;
1791
pLastIsoRun.limit++;
1792
}
1793
1794
/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
1795
private void fixN0c(BracketData bd, int openingIndex, int newPropPosition, byte newProp) {
1796
/* This function calls itself recursively */
1797
IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
1798
Opening qOpening;
1799
int k, openingPosition, closingPosition;
1800
for (k = openingIndex+1; k < pLastIsoRun.limit; k++) {
1801
qOpening = bd.openings[k];
1802
if (qOpening.match >= 0) /* not an N0c match */
1803
continue;
1804
if (newPropPosition < qOpening.contextPos)
1805
break;
1806
if (newPropPosition >= qOpening.position)
1807
continue;
1808
if (newProp == qOpening.contextDir)
1809
break;
1810
openingPosition = qOpening.position;
1811
dirProps[openingPosition] = newProp;
1812
closingPosition = -(qOpening.match);
1813
dirProps[closingPosition] = newProp;
1814
qOpening.match = 0; /* prevent further changes */
1815
fixN0c(bd, k, openingPosition, newProp);
1816
fixN0c(bd, k, closingPosition, newProp);
1817
}
1818
}
1819
1820
/* process closing bracket; return L or R if N0b or N0c, ON if N0d */
1821
private byte bracketProcessClosing(BracketData bd, int openIdx, int position) {
1822
IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
1823
Opening pOpening, qOpening;
1824
byte direction;
1825
boolean stable;
1826
byte newProp;
1827
pOpening = bd.openings[openIdx];
1828
direction = (byte)(pLastIsoRun.level & 1);
1829
stable = true; /* assume stable until proved otherwise */
1830
1831
/* The stable flag is set when brackets are paired and their
1832
level is resolved and cannot be changed by what will be
1833
found later in the source string.
1834
An unstable match can occur only when applying N0c, where
1835
the resolved level depends on the preceding context, and
1836
this context may be affected by text occurring later.
1837
Example: RTL paragraph containing: abc[(latin) HEBREW]
1838
When the closing parenthesis is encountered, it appears
1839
that N0c1 must be applied since 'abc' sets an opposite
1840
direction context and both parentheses receive level 2.
1841
However, when the closing square bracket is processed,
1842
N0b applies because of 'HEBREW' being included within the
1843
brackets, thus the square brackets are treated like R and
1844
receive level 1. However, this changes the preceding
1845
context of the opening parenthesis, and it now appears
1846
that N0c2 must be applied to the parentheses rather than
1847
N0c1. */
1848
1849
if ((direction == 0 && (pOpening.flags & FOUND_L) > 0) ||
1850
(direction == 1 && (pOpening.flags & FOUND_R) > 0)) { /* N0b */
1851
newProp = direction;
1852
}
1853
else if ((pOpening.flags & (FOUND_L | FOUND_R)) != 0) { /* N0c */
1854
/* it is stable if there is no preceding text or in
1855
conditions too complicated and not worth checking */
1856
stable = (openIdx == pLastIsoRun.start);
1857
if (direction != pOpening.contextDir)
1858
newProp = pOpening.contextDir; /* N0c1 */
1859
else
1860
newProp = direction; /* N0c2 */
1861
} else {
1862
/* forget this and any brackets nested within this pair */
1863
pLastIsoRun.limit = (short)openIdx;
1864
return ON; /* N0d */
1865
}
1866
dirProps[pOpening.position] = newProp;
1867
dirProps[position] = newProp;
1868
/* Update nested N0c pairs that may be affected */
1869
fixN0c(bd, openIdx, pOpening.position, newProp);
1870
if (stable) {
1871
pLastIsoRun.limit = (short)openIdx; /* forget any brackets nested within this pair */
1872
/* remove lower located synonyms if any */
1873
while (pLastIsoRun.limit > pLastIsoRun.start &&
1874
bd.openings[pLastIsoRun.limit - 1].position == pOpening.position)
1875
pLastIsoRun.limit--;
1876
} else {
1877
int k;
1878
pOpening.match = -position;
1879
/* neutralize lower located synonyms if any */
1880
k = openIdx - 1;
1881
while (k >= pLastIsoRun.start &&
1882
bd.openings[k].position == pOpening.position)
1883
bd.openings[k--].match = 0;
1884
/* neutralize any unmatched opening between the current pair;
1885
this will also neutralize higher located synonyms if any */
1886
for (k = openIdx + 1; k < pLastIsoRun.limit; k++) {
1887
qOpening =bd.openings[k];
1888
if (qOpening.position >= position)
1889
break;
1890
if (qOpening.match > 0)
1891
qOpening.match = 0;
1892
}
1893
}
1894
return newProp;
1895
}
1896
1897
/* handle strong characters, digits and candidates for closing brackets */
1898
private void bracketProcessChar(BracketData bd, int position) {
1899
IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
1900
byte dirProp, newProp;
1901
byte level;
1902
dirProp = dirProps[position];
1903
if (dirProp == ON) {
1904
char c, match;
1905
int idx;
1906
/* First see if it is a matching closing bracket. Hopefully, this is
1907
more efficient than checking if it is a closing bracket at all */
1908
c = text[position];
1909
for (idx = pLastIsoRun.limit - 1; idx >= pLastIsoRun.start; idx--) {
1910
if (bd.openings[idx].match != c)
1911
continue;
1912
/* We have a match */
1913
newProp = bracketProcessClosing(bd, idx, position);
1914
if(newProp == ON) { /* N0d */
1915
c = 0; /* prevent handling as an opening */
1916
break;
1917
}
1918
pLastIsoRun.lastBase = ON;
1919
pLastIsoRun.contextDir = newProp;
1920
pLastIsoRun.contextPos = position;
1921
level = levels[position];
1922
if ((level & LEVEL_OVERRIDE) != 0) { /* X4, X5 */
1923
short flag;
1924
int i;
1925
newProp = (byte)(level & 1);
1926
pLastIsoRun.lastStrong = newProp;
1927
flag = (short)DirPropFlag(newProp);
1928
for (i = pLastIsoRun.start; i < idx; i++)
1929
bd.openings[i].flags |= flag;
1930
/* matching brackets are not overridden by LRO/RLO */
1931
levels[position] &= ~LEVEL_OVERRIDE;
1932
}
1933
/* matching brackets are not overridden by LRO/RLO */
1934
levels[bd.openings[idx].position] &= ~LEVEL_OVERRIDE;
1935
return;
1936
}
1937
/* We get here only if the ON character is not a matching closing
1938
bracket or it is a case of N0d */
1939
/* Now see if it is an opening bracket */
1940
if (c != 0) {
1941
match = (char)UCharacter.getBidiPairedBracket(c); /* get the matching char */
1942
} else {
1943
match = 0;
1944
}
1945
if (match != c && /* has a matching char */
1946
UCharacter.getIntPropertyValue(c, BIDI_PAIRED_BRACKET_TYPE) ==
1947
/* opening bracket */ BidiPairedBracketType.OPEN) {
1948
/* special case: process synonyms
1949
create an opening entry for each synonym */
1950
if (match == 0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
1951
bracketAddOpening(bd, (char)0x3009, position);
1952
}
1953
else if (match == 0x3009) { /* RIGHT ANGLE BRACKET */
1954
bracketAddOpening(bd, (char)0x232A, position);
1955
}
1956
bracketAddOpening(bd, match, position);
1957
}
1958
}
1959
level = levels[position];
1960
if ((level & LEVEL_OVERRIDE) != 0) { /* X4, X5 */
1961
newProp = (byte)(level & 1);
1962
if (dirProp != S && dirProp != WS && dirProp != ON)
1963
dirProps[position] = newProp;
1964
pLastIsoRun.lastBase = newProp;
1965
pLastIsoRun.lastStrong = newProp;
1966
pLastIsoRun.contextDir = newProp;
1967
pLastIsoRun.contextPos = position;
1968
}
1969
else if (dirProp <= R || dirProp == AL) {
1970
newProp = DirFromStrong(dirProp);
1971
pLastIsoRun.lastBase = dirProp;
1972
pLastIsoRun.lastStrong = dirProp;
1973
pLastIsoRun.contextDir = newProp;
1974
pLastIsoRun.contextPos = position;
1975
}
1976
else if(dirProp == EN) {
1977
pLastIsoRun.lastBase = EN;
1978
if (pLastIsoRun.lastStrong == L) {
1979
newProp = L; /* W7 */
1980
if (!bd.isNumbersSpecial)
1981
dirProps[position] = ENL;
1982
pLastIsoRun.contextDir = L;
1983
pLastIsoRun.contextPos = position;
1984
}
1985
else {
1986
newProp = R; /* N0 */
1987
if (pLastIsoRun.lastStrong == AL)
1988
dirProps[position] = AN; /* W2 */
1989
else
1990
dirProps[position] = ENR;
1991
pLastIsoRun.contextDir = R;
1992
pLastIsoRun.contextPos = position;
1993
}
1994
}
1995
else if (dirProp == AN) {
1996
newProp = R; /* N0 */
1997
pLastIsoRun.lastBase = AN;
1998
pLastIsoRun.contextDir = R;
1999
pLastIsoRun.contextPos = position;
2000
}
2001
else if (dirProp == NSM) {
2002
/* if the last real char was ON, change NSM to ON so that it
2003
will stay ON even if the last real char is a bracket which
2004
may be changed to L or R */
2005
newProp = pLastIsoRun.lastBase;
2006
if (newProp == ON)
2007
dirProps[position] = newProp;
2008
}
2009
else {
2010
newProp = dirProp;
2011
pLastIsoRun.lastBase = dirProp;
2012
}
2013
if (newProp <= R || newProp == AL) {
2014
int i;
2015
short flag = (short)DirPropFlag(DirFromStrong(newProp));
2016
for (i = pLastIsoRun.start; i < pLastIsoRun.limit; i++)
2017
if (position > bd.openings[i].position)
2018
bd.openings[i].flags |= flag;
2019
}
2020
}
2021
2022
/* perform (X1)..(X9) ------------------------------------------------------- */
2023
2024
/* determine if the text is mixed-directional or single-directional */
2025
private byte directionFromFlags() {
2026
2027
/* if the text contains AN and neutrals, then some neutrals may become RTL */
2028
if (!((flags & MASK_RTL) != 0 ||
2029
((flags & DirPropFlag(AN)) != 0 &&
2030
(flags & MASK_POSSIBLE_N) != 0))) {
2031
return LTR;
2032
} else if ((flags & MASK_LTR) == 0) {
2033
return RTL;
2034
} else {
2035
return MIXED;
2036
}
2037
}
2038
2039
/*
2040
* Resolve the explicit levels as specified by explicit embedding codes.
2041
* Recalculate the flags to have them reflect the real properties
2042
* after taking the explicit embeddings into account.
2043
*
2044
* The BiDi algorithm is designed to result in the same behavior whether embedding
2045
* levels are externally specified (from "styled text", supposedly the preferred
2046
* method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
2047
* That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
2048
* However, in a real implementation, the removal of these codes and their index
2049
* positions in the plain text is undesirable since it would result in
2050
* reallocated, reindexed text.
2051
* Instead, this implementation leaves the codes in there and just ignores them
2052
* in the subsequent processing.
2053
* In order to get the same reordering behavior, positions with a BN or a not-isolate
2054
* explicit embedding code just get the same level assigned as the last "real"
2055
* character.
2056
*
2057
* Some implementations, not this one, then overwrite some of these
2058
* directionality properties at "real" same-level-run boundaries by
2059
* L or R codes so that the resolution of weak types can be performed on the
2060
* entire paragraph at once instead of having to parse it once more and
2061
* perform that resolution on same-level-runs.
2062
* This limits the scope of the implicit rules in effectively
2063
* the same way as the run limits.
2064
*
2065
* Instead, this implementation does not modify these codes, except for
2066
* paired brackets whose properties (ON) may be replaced by L or R.
2067
* On one hand, the paragraph has to be scanned for same-level-runs, but
2068
* on the other hand, this saves another loop to reset these codes,
2069
* or saves making and modifying a copy of dirProps[].
2070
*
2071
*
2072
* Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
2073
*
2074
*
2075
* Handling the stack of explicit levels (Xn):
2076
*
2077
* With the BiDi stack of explicit levels, as pushed with each
2078
* LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
2079
* the explicit level must never exceed MAX_EXPLICIT_LEVEL.
2080
*
2081
* In order to have a correct push-pop semantics even in the case of overflows,
2082
* overflow counters and a valid isolate counter are used as described in UAX#9
2083
* section 3.3.2 "Explicit Levels and Directions".
2084
*
2085
* This implementation assumes that MAX_EXPLICIT_LEVEL is odd.
2086
*
2087
* Returns the direction
2088
*
2089
*/
2090
private byte resolveExplicitLevels() {
2091
int i = 0;
2092
byte dirProp;
2093
byte level = GetParaLevelAt(0);
2094
byte dirct;
2095
isolateCount = 0;
2096
2097
/* determine if the text is mixed-directional or single-directional */
2098
dirct = directionFromFlags();
2099
2100
/* we may not need to resolve any explicit levels */
2101
if (dirct != MIXED) {
2102
/* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
2103
return dirct;
2104
}
2105
2106
if (reorderingMode > REORDER_LAST_LOGICAL_TO_VISUAL) {
2107
/* inverse BiDi: mixed, but all characters are at the same embedding level */
2108
/* set all levels to the paragraph level */
2109
int paraIndex, start, limit;
2110
for (paraIndex = 0; paraIndex < paraCount; paraIndex++) {
2111
if (paraIndex == 0)
2112
start = 0;
2113
else
2114
start = paras_limit[paraIndex - 1];
2115
limit = paras_limit[paraIndex];
2116
level = paras_level[paraIndex];
2117
for (i = start; i < limit; i++)
2118
levels[i] =level;
2119
}
2120
return dirct; /* no bracket matching for inverse BiDi */
2121
}
2122
if ((flags & (MASK_EXPLICIT | MASK_ISO)) == 0) {
2123
/* no embeddings, set all levels to the paragraph level */
2124
/* we still have to perform bracket matching */
2125
int paraIndex, start, limit;
2126
BracketData bracketData = new BracketData();
2127
bracketInit(bracketData);
2128
for (paraIndex = 0; paraIndex < paraCount; paraIndex++) {
2129
if (paraIndex == 0)
2130
start = 0;
2131
else
2132
start = paras_limit[paraIndex-1];
2133
limit = paras_limit[paraIndex];
2134
level = paras_level[paraIndex];
2135
for (i = start; i < limit; i++) {
2136
levels[i] = level;
2137
dirProp = dirProps[i];
2138
if (dirProp == BN)
2139
continue;
2140
if (dirProp == B) {
2141
if ((i + 1) < length) {
2142
if (text[i] == CR && text[i + 1] == LF)
2143
continue; /* skip CR when followed by LF */
2144
bracketProcessB(bracketData, level);
2145
}
2146
continue;
2147
}
2148
bracketProcessChar(bracketData, i);
2149
}
2150
}
2151
return dirct;
2152
}
2153
/* continue to perform (Xn) */
2154
2155
/* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
2156
/* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */
2157
byte embeddingLevel = level, newLevel;
2158
byte previousLevel = level; /* previous level for regular (not CC) characters */
2159
int lastCcPos = 0; /* index of last effective LRx,RLx, PDx */
2160
2161
/* The following stack remembers the embedding level and the ISOLATE flag of level runs.
2162
stackLast points to its current entry. */
2163
short[] stack = new short[MAX_EXPLICIT_LEVEL + 2]; /* we never push anything >= MAX_EXPLICIT_LEVEL
2164
but we need one more entry as base */
2165
int stackLast = 0;
2166
int overflowIsolateCount = 0;
2167
int overflowEmbeddingCount = 0;
2168
int validIsolateCount = 0;
2169
BracketData bracketData = new BracketData();
2170
bracketInit(bracketData);
2171
stack[0] = level; /* initialize base entry to para level, no override, no isolate */
2172
2173
/* recalculate the flags */
2174
flags = 0;
2175
2176
for (i = 0; i < length; i++) {
2177
dirProp = dirProps[i];
2178
switch (dirProp) {
2179
case LRE:
2180
case RLE:
2181
case LRO:
2182
case RLO:
2183
/* (X2, X3, X4, X5) */
2184
flags |= DirPropFlag(BN);
2185
levels[i] = previousLevel;
2186
if (dirProp == LRE || dirProp == LRO) {
2187
/* least greater even level */
2188
newLevel = (byte)((embeddingLevel+2) & ~(LEVEL_OVERRIDE | 1));
2189
} else {
2190
/* least greater odd level */
2191
newLevel = (byte)((NoOverride(embeddingLevel) + 1) | 1);
2192
}
2193
if (newLevel <= MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0 &&
2194
overflowEmbeddingCount == 0) {
2195
lastCcPos = i;
2196
embeddingLevel = newLevel;
2197
if (dirProp == LRO || dirProp == RLO)
2198
embeddingLevel |= LEVEL_OVERRIDE;
2199
stackLast++;
2200
stack[stackLast] = embeddingLevel;
2201
/* we don't need to set LEVEL_OVERRIDE off for LRE and RLE
2202
since this has already been done for newLevel which is
2203
the source for embeddingLevel.
2204
*/
2205
} else {
2206
if (overflowIsolateCount == 0)
2207
overflowEmbeddingCount++;
2208
}
2209
break;
2210
case PDF:
2211
/* (X7) */
2212
flags |= DirPropFlag(BN);
2213
levels[i] = previousLevel;
2214
/* handle all the overflow cases first */
2215
if (overflowIsolateCount > 0) {
2216
break;
2217
}
2218
if (overflowEmbeddingCount > 0) {
2219
overflowEmbeddingCount--;
2220
break;
2221
}
2222
if (stackLast > 0 && stack[stackLast] < ISOLATE) { /* not an isolate entry */
2223
lastCcPos = i;
2224
stackLast--;
2225
embeddingLevel = (byte)stack[stackLast];
2226
}
2227
break;
2228
case LRI:
2229
case RLI:
2230
flags |= DirPropFlag(ON) | DirPropFlagLR(embeddingLevel);
2231
levels[i] = NoOverride(embeddingLevel);
2232
if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
2233
bracketProcessBoundary(bracketData, lastCcPos,
2234
previousLevel, embeddingLevel);
2235
flags |= DirPropFlagMultiRuns;
2236
}
2237
previousLevel = embeddingLevel;
2238
/* (X5a, X5b) */
2239
if (dirProp == LRI)
2240
/* least greater even level */
2241
newLevel=(byte)((embeddingLevel+2)&~(LEVEL_OVERRIDE|1));
2242
else
2243
/* least greater odd level */
2244
newLevel=(byte)((NoOverride(embeddingLevel)+1)|1);
2245
if (newLevel <= MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0
2246
&& overflowEmbeddingCount == 0) {
2247
flags |= DirPropFlag(dirProp);
2248
lastCcPos = i;
2249
validIsolateCount++;
2250
if (validIsolateCount > isolateCount)
2251
isolateCount = validIsolateCount;
2252
embeddingLevel = newLevel;
2253
/* we can increment stackLast without checking because newLevel
2254
will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
2255
stackLast++;
2256
stack[stackLast] = (short)(embeddingLevel + ISOLATE);
2257
bracketProcessLRI_RLI(bracketData, embeddingLevel);
2258
} else {
2259
/* make it WS so that it is handled by adjustWSLevels() */
2260
dirProps[i] = WS;
2261
overflowIsolateCount++;
2262
}
2263
break;
2264
case PDI:
2265
if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
2266
bracketProcessBoundary(bracketData, lastCcPos,
2267
previousLevel, embeddingLevel);
2268
flags |= DirPropFlagMultiRuns;
2269
}
2270
/* (X6a) */
2271
if (overflowIsolateCount > 0) {
2272
overflowIsolateCount--;
2273
/* make it WS so that it is handled by adjustWSLevels() */
2274
dirProps[i] = WS;
2275
}
2276
else if (validIsolateCount > 0) {
2277
flags |= DirPropFlag(PDI);
2278
lastCcPos = i;
2279
overflowEmbeddingCount = 0;
2280
while (stack[stackLast] < ISOLATE) /* pop embedding entries */
2281
stackLast--; /* until the last isolate entry */
2282
stackLast--; /* pop also the last isolate entry */
2283
validIsolateCount--;
2284
bracketProcessPDI(bracketData);
2285
} else
2286
/* make it WS so that it is handled by adjustWSLevels() */
2287
dirProps[i] = WS;
2288
embeddingLevel = (byte)(stack[stackLast] & ~ISOLATE);
2289
flags |= DirPropFlag(ON) | DirPropFlagLR(embeddingLevel);
2290
previousLevel = embeddingLevel;
2291
levels[i] = NoOverride(embeddingLevel);
2292
break;
2293
case B:
2294
flags |= DirPropFlag(B);
2295
levels[i] = GetParaLevelAt(i);
2296
if ((i + 1) < length) {
2297
if (text[i] == CR && text[i + 1] == LF)
2298
break; /* skip CR when followed by LF */
2299
overflowEmbeddingCount = overflowIsolateCount = 0;
2300
validIsolateCount = 0;
2301
stackLast = 0;
2302
previousLevel = embeddingLevel = GetParaLevelAt(i + 1);
2303
stack[0] = embeddingLevel; /* initialize base entry to para level, no override, no isolate */
2304
bracketProcessB(bracketData, embeddingLevel);
2305
}
2306
break;
2307
case BN:
2308
/* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
2309
/* they will get their levels set correctly in adjustWSLevels() */
2310
levels[i] = previousLevel;
2311
flags |= DirPropFlag(BN);
2312
break;
2313
default:
2314
/* all other types are normal characters and get the "real" level */
2315
if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
2316
bracketProcessBoundary(bracketData, lastCcPos,
2317
previousLevel, embeddingLevel);
2318
flags |= DirPropFlagMultiRuns;
2319
if ((embeddingLevel & LEVEL_OVERRIDE) != 0)
2320
flags |= DirPropFlagO(embeddingLevel);
2321
else
2322
flags |= DirPropFlagE(embeddingLevel);
2323
}
2324
previousLevel = embeddingLevel;
2325
levels[i] = embeddingLevel;
2326
bracketProcessChar(bracketData, i);
2327
/* the dirProp may have been changed in bracketProcessChar() */
2328
flags |= DirPropFlag(dirProps[i]);
2329
break;
2330
}
2331
}
2332
if ((flags & MASK_EMBEDDING) != 0) {
2333
flags |= DirPropFlagLR(paraLevel);
2334
}
2335
if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
2336
flags |= DirPropFlag(L);
2337
}
2338
/* again, determine if the text is mixed-directional or single-directional */
2339
dirct = directionFromFlags();
2340
2341
return dirct;
2342
}
2343
2344
/*
2345
* Use a pre-specified embedding levels array:
2346
*
2347
* Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
2348
* ignore all explicit codes (X9),
2349
* and check all the preset levels.
2350
*
2351
* Recalculate the flags to have them reflect the real properties
2352
* after taking the explicit embeddings into account.
2353
*/
2354
private byte checkExplicitLevels() {
2355
byte dirProp;
2356
int i;
2357
int isolateCount = 0;
2358
2359
this.flags = 0; /* collect all directionalities in the text */
2360
byte level;
2361
this.isolateCount = 0;
2362
2363
for (i = 0; i < length; ++i) {
2364
if (levels[i] == 0) {
2365
levels[i] = paraLevel;
2366
}
2367
2368
// for backward compatibility
2369
if (MAX_EXPLICIT_LEVEL < (levels[i]&0x7f)) {
2370
if ((levels[i] & LEVEL_OVERRIDE) != 0) {
2371
levels[i] = (byte)(paraLevel|LEVEL_OVERRIDE);
2372
} else {
2373
levels[i] = paraLevel;
2374
}
2375
}
2376
2377
level = levels[i];
2378
dirProp = dirProps[i];
2379
if (dirProp == LRI || dirProp == RLI) {
2380
isolateCount++;
2381
if (isolateCount > this.isolateCount)
2382
this.isolateCount = isolateCount;
2383
}
2384
else if (dirProp == PDI) {
2385
isolateCount--;
2386
} else if (dirProp == B) {
2387
isolateCount = 0;
2388
}
2389
if ((level & LEVEL_OVERRIDE) != 0) {
2390
/* keep the override flag in levels[i] but adjust the flags */
2391
level &= ~LEVEL_OVERRIDE; /* make the range check below simpler */
2392
flags |= DirPropFlagO(level);
2393
} else {
2394
/* set the flags */
2395
flags |= DirPropFlagE(level) | DirPropFlag(dirProp);
2396
}
2397
if ((level < GetParaLevelAt(i) &&
2398
!((0 == level) && (dirProp == B))) ||
2399
(MAX_EXPLICIT_LEVEL < level)) {
2400
/* level out of bounds */
2401
throw new IllegalArgumentException("level " + level +
2402
" out of bounds at " + i);
2403
}
2404
}
2405
if ((flags & MASK_EMBEDDING) != 0) {
2406
flags |= DirPropFlagLR(paraLevel);
2407
}
2408
/* determine if the text is mixed-directional or single-directional */
2409
return directionFromFlags();
2410
}
2411
2412
/*********************************************************************/
2413
/* The Properties state machine table */
2414
/*********************************************************************/
2415
/* */
2416
/* All table cells are 8 bits: */
2417
/* bits 0..4: next state */
2418
/* bits 5..7: action to perform (if > 0) */
2419
/* */
2420
/* Cells may be of format "n" where n represents the next state */
2421
/* (except for the rightmost column). */
2422
/* Cells may also be of format "_(x,y)" where x represents an action */
2423
/* to perform and y represents the next state. */
2424
/* */
2425
/*********************************************************************/
2426
/* Definitions and type for properties state tables */
2427
/*********************************************************************/
2428
private static final int IMPTABPROPS_COLUMNS = 16;
2429
private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
2430
private static short GetStateProps(short cell) {
2431
return (short)(cell & 0x1f);
2432
}
2433
private static short GetActionProps(short cell) {
2434
return (short)(cell >> 5);
2435
}
2436
2437
private static final short groupProp[] = /* dirProp regrouped */
2438
{
2439
/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
2440
0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
2441
};
2442
private static final short _L = 0;
2443
private static final short _R = 1;
2444
private static final short _EN = 2;
2445
private static final short _AN = 3;
2446
private static final short _ON = 4;
2447
private static final short _S = 5;
2448
private static final short _B = 6; /* reduced dirProp */
2449
2450
/*********************************************************************/
2451
/* */
2452
/* PROPERTIES STATE TABLE */
2453
/* */
2454
/* In table impTabProps, */
2455
/* - the ON column regroups ON and WS, FSI, RLI, LRI and PDI */
2456
/* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */
2457
/* - the Res column is the reduced property assigned to a run */
2458
/* */
2459
/* Action 1: process current run1, init new run1 */
2460
/* 2: init new run2 */
2461
/* 3: process run1, process run2, init new run1 */
2462
/* 4: process run1, set run1=run2, init new run2 */
2463
/* */
2464
/* Notes: */
2465
/* 1) This table is used in resolveImplicitLevels(). */
2466
/* 2) This table triggers actions when there is a change in the Bidi*/
2467
/* property of incoming characters (action 1). */
2468
/* 3) Most such property sequences are processed immediately (in */
2469
/* fact, passed to processPropertySeq(). */
2470
/* 4) However, numbers are assembled as one sequence. This means */
2471
/* that undefined situations (like CS following digits, until */
2472
/* it is known if the next char will be a digit) are held until */
2473
/* following chars define them. */
2474
/* Example: digits followed by CS, then comes another CS or ON; */
2475
/* the digits will be processed, then the CS assigned */
2476
/* as the start of an ON sequence (action 3). */
2477
/* 5) There are cases where more than one sequence must be */
2478
/* processed, for instance digits followed by CS followed by L: */
2479
/* the digits must be processed as one sequence, and the CS */
2480
/* must be processed as an ON sequence, all this before starting */
2481
/* assembling chars for the opening L sequence. */
2482
/* */
2483
/* */
2484
private static final short impTabProps[][] =
2485
{
2486
/* L, R, EN, AN, ON, S, B, ES, ET, CS, BN, NSM, AL, ENL, ENR, Res */
2487
/* 0 Init */ { 1, 2, 4, 5, 7, 15, 17, 7, 9, 7, 0, 7, 3, 18, 21, _ON },
2488
/* 1 L */ { 1, 32+2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 1, 1, 32+3, 32+18, 32+21, _L },
2489
/* 2 R */ { 32+1, 2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 2, 2, 32+3, 32+18, 32+21, _R },
2490
/* 3 AL */ { 32+1, 32+2, 32+6, 32+6, 32+8, 32+16, 32+17, 32+8, 32+8, 32+8, 3, 3, 3, 32+18, 32+21, _R },
2491
/* 4 EN */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 64+10, 11, 64+10, 4, 4, 32+3, 18, 21, _EN },
2492
/* 5 AN */ { 32+1, 32+2, 32+4, 5, 32+7, 32+15, 32+17, 32+7, 32+9, 64+12, 5, 5, 32+3, 32+18, 32+21, _AN },
2493
/* 6 AL:EN/AN */ { 32+1, 32+2, 6, 6, 32+8, 32+16, 32+17, 32+8, 32+8, 64+13, 6, 6, 32+3, 18, 21, _AN },
2494
/* 7 ON */ { 32+1, 32+2, 32+4, 32+5, 7, 32+15, 32+17, 7, 64+14, 7, 7, 7, 32+3, 32+18, 32+21, _ON },
2495
/* 8 AL:ON */ { 32+1, 32+2, 32+6, 32+6, 8, 32+16, 32+17, 8, 8, 8, 8, 8, 32+3, 32+18, 32+21, _ON },
2496
/* 9 ET */ { 32+1, 32+2, 4, 32+5, 7, 32+15, 32+17, 7, 9, 7, 9, 9, 32+3, 18, 21, _ON },
2497
/*10 EN+ES/CS */ { 96+1, 96+2, 4, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 10, 128+7, 96+3, 18, 21, _EN },
2498
/*11 EN+ET */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 32+7, 11, 32+7, 11, 11, 32+3, 18, 21, _EN },
2499
/*12 AN+CS */ { 96+1, 96+2, 96+4, 5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 12, 128+7, 96+3, 96+18, 96+21, _AN },
2500
/*13 AL:EN/AN+CS */ { 96+1, 96+2, 6, 6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8, 13, 128+8, 96+3, 18, 21, _AN },
2501
/*14 ON+ET */ { 32+1, 32+2, 128+4, 32+5, 7, 32+15, 32+17, 7, 14, 7, 14, 14, 32+3,128+18,128+21, _ON },
2502
/*15 S */ { 32+1, 32+2, 32+4, 32+5, 32+7, 15, 32+17, 32+7, 32+9, 32+7, 15, 32+7, 32+3, 32+18, 32+21, _S },
2503
/*16 AL:S */ { 32+1, 32+2, 32+6, 32+6, 32+8, 16, 32+17, 32+8, 32+8, 32+8, 16, 32+8, 32+3, 32+18, 32+21, _S },
2504
/*17 B */ { 32+1, 32+2, 32+4, 32+5, 32+7, 32+15, 17, 32+7, 32+9, 32+7, 17, 32+7, 32+3, 32+18, 32+21, _B },
2505
/*18 ENL */ { 32+1, 32+2, 18, 32+5, 32+7, 32+15, 32+17, 64+19, 20, 64+19, 18, 18, 32+3, 18, 21, _L },
2506
/*19 ENL+ES/CS */ { 96+1, 96+2, 18, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 19, 128+7, 96+3, 18, 21, _L },
2507
/*20 ENL+ET */ { 32+1, 32+2, 18, 32+5, 32+7, 32+15, 32+17, 32+7, 20, 32+7, 20, 20, 32+3, 18, 21, _L },
2508
/*21 ENR */ { 32+1, 32+2, 21, 32+5, 32+7, 32+15, 32+17, 64+22, 23, 64+22, 21, 21, 32+3, 18, 21, _AN },
2509
/*22 ENR+ES/CS */ { 96+1, 96+2, 21, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 22, 128+7, 96+3, 18, 21, _AN },
2510
/*23 ENR+ET */ { 32+1, 32+2, 21, 32+5, 32+7, 32+15, 32+17, 32+7, 23, 32+7, 23, 23, 32+3, 18, 21, _AN }
2511
};
2512
2513
/*********************************************************************/
2514
/* The levels state machine tables */
2515
/*********************************************************************/
2516
/* */
2517
/* All table cells are 8 bits: */
2518
/* bits 0..3: next state */
2519
/* bits 4..7: action to perform (if > 0) */
2520
/* */
2521
/* Cells may be of format "n" where n represents the next state */
2522
/* (except for the rightmost column). */
2523
/* Cells may also be of format "_(x,y)" where x represents an action */
2524
/* to perform and y represents the next state. */
2525
/* */
2526
/* This format limits each table to 16 states each and to 15 actions.*/
2527
/* */
2528
/*********************************************************************/
2529
/* Definitions and type for levels state tables */
2530
/*********************************************************************/
2531
private static final int IMPTABLEVELS_COLUMNS = _B + 2;
2532
private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1;
2533
private static short GetState(byte cell) { return (short)(cell & 0x0f); }
2534
private static short GetAction(byte cell) { return (short)(cell >> 4); }
2535
2536
private static class ImpTabPair {
2537
byte[][][] imptab;
2538
short[][] impact;
2539
2540
ImpTabPair(byte[][] table1, byte[][] table2,
2541
short[] act1, short[] act2) {
2542
imptab = new byte[][][] {table1, table2};
2543
impact = new short[][] {act1, act2};
2544
}
2545
}
2546
2547
/*********************************************************************/
2548
/* */
2549
/* LEVELS STATE TABLES */
2550
/* */
2551
/* In all levels state tables, */
2552
/* - state 0 is the initial state */
2553
/* - the Res column is the increment to add to the text level */
2554
/* for this property sequence. */
2555
/* */
2556
/* The impact arrays for each table of a pair map the local action */
2557
/* numbers of the table to the total list of actions. For instance, */
2558
/* action 2 in a given table corresponds to the action number which */
2559
/* appears in entry [2] of the impact array for that table. */
2560
/* The first entry of all impact arrays must be 0. */
2561
/* */
2562
/* Action 1: init conditional sequence */
2563
/* 2: prepend conditional sequence to current sequence */
2564
/* 3: set ON sequence to new level - 1 */
2565
/* 4: init EN/AN/ON sequence */
2566
/* 5: fix EN/AN/ON sequence followed by R */
2567
/* 6: set previous level sequence to level 2 */
2568
/* */
2569
/* Notes: */
2570
/* 1) These tables are used in processPropertySeq(). The input */
2571
/* is property sequences as determined by resolveImplicitLevels. */
2572
/* 2) Most such property sequences are processed immediately */
2573
/* (levels are assigned). */
2574
/* 3) However, some sequences cannot be assigned a final level till */
2575
/* one or more following sequences are received. For instance, */
2576
/* ON following an R sequence within an even-level paragraph. */
2577
/* If the following sequence is R, the ON sequence will be */
2578
/* assigned basic run level+1, and so will the R sequence. */
2579
/* 4) S is generally handled like ON, since its level will be fixed */
2580
/* to paragraph level in adjustWSLevels(). */
2581
/* */
2582
2583
private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */
2584
/* In this table, conditional sequences receive the lower possible level
2585
until proven otherwise.
2586
*/
2587
{
2588
/* L, R, EN, AN, ON, S, B, Res */
2589
/* 0 : init */ { 0, 1, 0, 2, 0, 0, 0, 0 },
2590
/* 1 : R */ { 0, 1, 3, 3, 0x14, 0x14, 0, 1 },
2591
/* 2 : AN */ { 0, 1, 0, 2, 0x15, 0x15, 0, 2 },
2592
/* 3 : R+EN/AN */ { 0, 1, 3, 3, 0x14, 0x14, 0, 2 },
2593
/* 4 : R+ON */ { 0, 0x21, 0x33, 0x33, 4, 4, 0, 0 },
2594
/* 5 : AN+ON */ { 0, 0x21, 0, 0x32, 5, 5, 0, 0 }
2595
};
2596
2597
private static final byte impTabR_DEFAULT[][] = /* Odd paragraph level */
2598
/* In this table, conditional sequences receive the lower possible level
2599
until proven otherwise.
2600
*/
2601
{
2602
/* L, R, EN, AN, ON, S, B, Res */
2603
/* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 },
2604
/* 1 : L */ { 1, 0, 1, 3, 0x14, 0x14, 0, 1 },
2605
/* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 },
2606
/* 3 : L+AN */ { 1, 0, 1, 3, 5, 5, 0, 1 },
2607
/* 4 : L+ON */ { 0x21, 0, 0x21, 3, 4, 4, 0, 0 },
2608
/* 5 : L+AN+ON */ { 1, 0, 1, 3, 5, 5, 0, 0 }
2609
};
2610
2611
private static final short[] impAct0 = {0,1,2,3,4};
2612
2613
private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
2614
impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
2615
2616
private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */
2617
/* In this table, conditional sequences receive the lower possible
2618
level until proven otherwise.
2619
*/
2620
/* L, R, EN, AN, ON, S, B, Res */
2621
/* 0 : init */ { 0, 2, 0x11, 0x11, 0, 0, 0, 0 },
2622
/* 1 : L+EN/AN */ { 0, 0x42, 1, 1, 0, 0, 0, 0 },
2623
/* 2 : R */ { 0, 2, 4, 4, 0x13, 0x13, 0, 1 },
2624
/* 3 : R+ON */ { 0, 0x22, 0x34, 0x34, 3, 3, 0, 0 },
2625
/* 4 : R+EN/AN */ { 0, 2, 4, 4, 0x13, 0x13, 0, 2 }
2626
};
2627
private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
2628
impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
2629
2630
private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = {
2631
/* In this table, EN/AN+ON sequences receive levels as if associated with R
2632
until proven that there is L or sor/eor on both sides. AN is handled like EN.
2633
*/
2634
/* L, R, EN, AN, ON, S, B, Res */
2635
/* 0 init */ { 0, 3, 0x11, 0x11, 0, 0, 0, 0 },
2636
/* 1 EN/AN */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 2 },
2637
/* 2 EN/AN+ON */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 1 },
2638
/* 3 R */ { 0, 3, 5, 5, 0x14, 0, 0, 1 },
2639
/* 4 R+ON */ { 0x20, 3, 5, 5, 4, 0x20, 0x20, 1 },
2640
/* 5 R+EN/AN */ { 0, 3, 5, 5, 0x14, 0, 0, 2 }
2641
};
2642
private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = {
2643
/* In this table, EN/AN+ON sequences receive levels as if associated with R
2644
until proven that there is L on both sides. AN is handled like EN.
2645
*/
2646
/* L, R, EN, AN, ON, S, B, Res */
2647
/* 0 init */ { 2, 0, 1, 1, 0, 0, 0, 0 },
2648
/* 1 EN/AN */ { 2, 0, 1, 1, 0, 0, 0, 1 },
2649
/* 2 L */ { 2, 0, 0x14, 0x14, 0x13, 0, 0, 1 },
2650
/* 3 L+ON */ { 0x22, 0, 4, 4, 3, 0, 0, 0 },
2651
/* 4 L+EN/AN */ { 0x22, 0, 4, 4, 3, 0, 0, 1 }
2652
};
2653
private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new
2654
ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R,
2655
impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0);
2656
2657
private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = {
2658
/* This table is identical to the Default LTR table except that EN and AN
2659
are handled like L.
2660
*/
2661
/* L, R, EN, AN, ON, S, B, Res */
2662
/* 0 : init */ { 0, 1, 0, 0, 0, 0, 0, 0 },
2663
/* 1 : R */ { 0, 1, 0, 0, 0x14, 0x14, 0, 1 },
2664
/* 2 : AN */ { 0, 1, 0, 0, 0x15, 0x15, 0, 2 },
2665
/* 3 : R+EN/AN */ { 0, 1, 0, 0, 0x14, 0x14, 0, 2 },
2666
/* 4 : R+ON */ { 0x20, 1, 0x20, 0x20, 4, 4, 0x20, 1 },
2667
/* 5 : AN+ON */ { 0x20, 1, 0x20, 0x20, 5, 5, 0x20, 1 }
2668
};
2669
private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = {
2670
/* This table is identical to the Default RTL table except that EN and AN
2671
are handled like L.
2672
*/
2673
/* L, R, EN, AN, ON, S, B, Res */
2674
/* 0 : init */ { 1, 0, 1, 1, 0, 0, 0, 0 },
2675
/* 1 : L */ { 1, 0, 1, 1, 0x14, 0x14, 0, 1 },
2676
/* 2 : EN/AN */ { 1, 0, 1, 1, 0, 0, 0, 1 },
2677
/* 3 : L+AN */ { 1, 0, 1, 1, 5, 5, 0, 1 },
2678
/* 4 : L+ON */ { 0x21, 0, 0x21, 0x21, 4, 4, 0, 0 },
2679
/* 5 : L+AN+ON */ { 1, 0, 1, 1, 5, 5, 0, 0 }
2680
};
2681
private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair
2682
(impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L,
2683
impAct0, impAct0);
2684
2685
private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = { /* Odd paragraph level */
2686
/* In this table, conditional sequences receive the lower possible level
2687
until proven otherwise.
2688
*/
2689
/* L, R, EN, AN, ON, S, B, Res */
2690
/* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 },
2691
/* 1 : L */ { 1, 0, 1, 2, 0x13, 0x13, 0, 1 },
2692
/* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 },
2693
/* 3 : L+ON */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 0 },
2694
/* 4 : L+ON+AN */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 3 },
2695
/* 5 : L+AN+ON */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 2 },
2696
/* 6 : L+ON+EN */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 1 }
2697
};
2698
private static final short[] impAct1 = {0,1,13,14};
2699
private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
2700
impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
2701
2702
private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
2703
/* The case handled in this table is (visually): R EN L
2704
*/
2705
/* L, R, EN, AN, ON, S, B, Res */
2706
/* 0 : init */ { 0, 0x63, 0, 1, 0, 0, 0, 0 },
2707
/* 1 : L+AN */ { 0, 0x63, 0, 1, 0x12, 0x30, 0, 4 },
2708
/* 2 : L+AN+ON */ { 0x20, 0x63, 0x20, 1, 2, 0x30, 0x20, 3 },
2709
/* 3 : R */ { 0, 0x63, 0x55, 0x56, 0x14, 0x30, 0, 3 },
2710
/* 4 : R+ON */ { 0x30, 0x43, 0x55, 0x56, 4, 0x30, 0x30, 3 },
2711
/* 5 : R+EN */ { 0x30, 0x43, 5, 0x56, 0x14, 0x30, 0x30, 4 },
2712
/* 6 : R+AN */ { 0x30, 0x43, 0x55, 6, 0x14, 0x30, 0x30, 4 }
2713
};
2714
private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
2715
/* The cases handled in this table are (visually): R EN L
2716
R L AN L
2717
*/
2718
/* L, R, EN, AN, ON, S, B, Res */
2719
/* 0 : init */ { 0x13, 0, 1, 1, 0, 0, 0, 0 },
2720
/* 1 : R+EN/AN */ { 0x23, 0, 1, 1, 2, 0x40, 0, 1 },
2721
/* 2 : R+EN/AN+ON */ { 0x23, 0, 1, 1, 2, 0x40, 0, 0 },
2722
/* 3 : L */ { 3, 0, 3, 0x36, 0x14, 0x40, 0, 1 },
2723
/* 4 : L+ON */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 0 },
2724
/* 5 : L+ON+EN */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 1 },
2725
/* 6 : L+AN */ { 0x53, 0x40, 6, 6, 4, 0x40, 0x40, 3 }
2726
};
2727
private static final short[] impAct2 = {0,1,2,5,6,7,8};
2728
private static final short[] impAct3 = {0,1,9,10,11,12};
2729
private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
2730
new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
2731
impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
2732
2733
private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
2734
impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
2735
2736
private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = {
2737
/* The case handled in this table is (visually): R EN L
2738
*/
2739
/* L, R, EN, AN, ON, S, B, Res */
2740
/* 0 : init */ { 0, 0x62, 1, 1, 0, 0, 0, 0 },
2741
/* 1 : L+EN/AN */ { 0, 0x62, 1, 1, 0, 0x30, 0, 4 },
2742
/* 2 : R */ { 0, 0x62, 0x54, 0x54, 0x13, 0x30, 0, 3 },
2743
/* 3 : R+ON */ { 0x30, 0x42, 0x54, 0x54, 3, 0x30, 0x30, 3 },
2744
/* 4 : R+EN/AN */ { 0x30, 0x42, 4, 4, 0x13, 0x30, 0x30, 4 }
2745
};
2746
private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
2747
ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
2748
impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
2749
2750
private static class LevState {
2751
byte[][] impTab; /* level table pointer */
2752
short[] impAct; /* action map array */
2753
int startON; /* start of ON sequence */
2754
int startL2EN; /* start of level 2 sequence */
2755
int lastStrongRTL; /* index of last found R or AL */
2756
int runStart; /* start position of the run */
2757
short state; /* current state */
2758
byte runLevel; /* run level before implicit solving */
2759
}
2760
2761
/*------------------------------------------------------------------------*/
2762
2763
static final int FIRSTALLOC = 10;
2764
/*
2765
* param pos: position where to insert
2766
* param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
2767
*/
2768
private void addPoint(int pos, int flag)
2769
{
2770
Point point = new Point();
2771
2772
int len = insertPoints.points.length;
2773
if (len == 0) {
2774
insertPoints.points = new Point[FIRSTALLOC];
2775
len = FIRSTALLOC;
2776
}
2777
if (insertPoints.size >= len) { /* no room for new point */
2778
Point[] savePoints = insertPoints.points;
2779
insertPoints.points = new Point[len * 2];
2780
System.arraycopy(savePoints, 0, insertPoints.points, 0, len);
2781
}
2782
point.pos = pos;
2783
point.flag = flag;
2784
insertPoints.points[insertPoints.size] = point;
2785
insertPoints.size++;
2786
}
2787
2788
private void setLevelsOutsideIsolates(int start, int limit, byte level)
2789
{
2790
byte dirProp;
2791
int isolateCount = 0, k;
2792
for (k = start; k < limit; k++) {
2793
dirProp = dirProps[k];
2794
if (dirProp == PDI)
2795
isolateCount--;
2796
if (isolateCount == 0) {
2797
levels[k] = level;
2798
}
2799
if (dirProp == LRI || dirProp == RLI)
2800
isolateCount++;
2801
}
2802
}
2803
2804
/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
2805
2806
/*
2807
* This implementation of the (Wn) rules applies all rules in one pass.
2808
* In order to do so, it needs a look-ahead of typically 1 character
2809
* (except for W5: sequences of ET) and keeps track of changes
2810
* in a rule Wp that affect a later Wq (p<q).
2811
*
2812
* The (Nn) and (In) rules are also performed in that same single loop,
2813
* but effectively one iteration behind for white space.
2814
*
2815
* Since all implicit rules are performed in one step, it is not necessary
2816
* to actually store the intermediate directional properties in dirProps[].
2817
*/
2818
2819
private void processPropertySeq(LevState levState, short _prop,
2820
int start, int limit) {
2821
byte cell;
2822
byte[][] impTab = levState.impTab;
2823
short[] impAct = levState.impAct;
2824
short oldStateSeq,actionSeq;
2825
byte level, addLevel;
2826
int start0, k;
2827
2828
start0 = start; /* save original start position */
2829
oldStateSeq = levState.state;
2830
cell = impTab[oldStateSeq][_prop];
2831
levState.state = GetState(cell); /* isolate the new state */
2832
actionSeq = impAct[GetAction(cell)]; /* isolate the action */
2833
addLevel = impTab[levState.state][IMPTABLEVELS_RES];
2834
2835
if (actionSeq != 0) {
2836
switch (actionSeq) {
2837
case 1: /* init ON seq */
2838
levState.startON = start0;
2839
break;
2840
2841
case 2: /* prepend ON seq to current seq */
2842
start = levState.startON;
2843
break;
2844
2845
case 3: /* EN/AN after R+ON */
2846
level = (byte)(levState.runLevel + 1);
2847
setLevelsOutsideIsolates(levState.startON, start0, level);
2848
break;
2849
2850
case 4: /* EN/AN before R for NUMBERS_SPECIAL */
2851
level = (byte)(levState.runLevel + 2);
2852
setLevelsOutsideIsolates(levState.startON, start0, level);
2853
break;
2854
2855
case 5: /* L or S after possible relevant EN/AN */
2856
/* check if we had EN after R/AL */
2857
if (levState.startL2EN >= 0) {
2858
addPoint(levState.startL2EN, LRM_BEFORE);
2859
}
2860
levState.startL2EN = -1; /* not within previous if since could also be -2 */
2861
/* check if we had any relevant EN/AN after R/AL */
2862
if ((insertPoints.points.length == 0) ||
2863
(insertPoints.size <= insertPoints.confirmed)) {
2864
/* nothing, just clean up */
2865
levState.lastStrongRTL = -1;
2866
/* check if we have a pending conditional segment */
2867
level = impTab[oldStateSeq][IMPTABLEVELS_RES];
2868
if ((level & 1) != 0 && levState.startON > 0) { /* after ON */
2869
start = levState.startON; /* reset to basic run level */
2870
}
2871
if (_prop == _S) { /* add LRM before S */
2872
addPoint(start0, LRM_BEFORE);
2873
insertPoints.confirmed = insertPoints.size;
2874
}
2875
break;
2876
}
2877
/* reset previous RTL cont to level for LTR text */
2878
for (k = levState.lastStrongRTL + 1; k < start0; k++) {
2879
/* reset odd level, leave runLevel+2 as is */
2880
levels[k] = (byte)((levels[k] - 2) & ~1);
2881
}
2882
/* mark insert points as confirmed */
2883
insertPoints.confirmed = insertPoints.size;
2884
levState.lastStrongRTL = -1;
2885
if (_prop == _S) { /* add LRM before S */
2886
addPoint(start0, LRM_BEFORE);
2887
insertPoints.confirmed = insertPoints.size;
2888
}
2889
break;
2890
2891
case 6: /* R/AL after possible relevant EN/AN */
2892
/* just clean up */
2893
if (insertPoints.points.length > 0)
2894
/* remove all non confirmed insert points */
2895
insertPoints.size = insertPoints.confirmed;
2896
levState.startON = -1;
2897
levState.startL2EN = -1;
2898
levState.lastStrongRTL = limit - 1;
2899
break;
2900
2901
case 7: /* EN/AN after R/AL + possible cont */
2902
/* check for real AN */
2903
2904
if ((_prop == _AN) && (dirProps[start0] == AN) &&
2905
(reorderingMode != REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
2906
{
2907
/* real AN */
2908
if (levState.startL2EN == -1) { /* if no relevant EN already found */
2909
/* just note the rightmost digit as a strong RTL */
2910
levState.lastStrongRTL = limit - 1;
2911
break;
2912
}
2913
if (levState.startL2EN >= 0) { /* after EN, no AN */
2914
addPoint(levState.startL2EN, LRM_BEFORE);
2915
levState.startL2EN = -2;
2916
}
2917
/* note AN */
2918
addPoint(start0, LRM_BEFORE);
2919
break;
2920
}
2921
/* if first EN/AN after R/AL */
2922
if (levState.startL2EN == -1) {
2923
levState.startL2EN = start0;
2924
}
2925
break;
2926
2927
case 8: /* note location of latest R/AL */
2928
levState.lastStrongRTL = limit - 1;
2929
levState.startON = -1;
2930
break;
2931
2932
case 9: /* L after R+ON/EN/AN */
2933
/* include possible adjacent number on the left */
2934
for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) {
2935
}
2936
if (k >= 0) {
2937
addPoint(k, RLM_BEFORE); /* add RLM before */
2938
insertPoints.confirmed = insertPoints.size; /* confirm it */
2939
}
2940
levState.startON = start0;
2941
break;
2942
2943
case 10: /* AN after L */
2944
/* AN numbers between L text on both sides may be trouble. */
2945
/* tentatively bracket with LRMs; will be confirmed if followed by L */
2946
addPoint(start0, LRM_BEFORE); /* add LRM before */
2947
addPoint(start0, LRM_AFTER); /* add LRM after */
2948
break;
2949
2950
case 11: /* R after L+ON/EN/AN */
2951
/* false alert, infirm LRMs around previous AN */
2952
insertPoints.size=insertPoints.confirmed;
2953
if (_prop == _S) { /* add RLM before S */
2954
addPoint(start0, RLM_BEFORE);
2955
insertPoints.confirmed = insertPoints.size;
2956
}
2957
break;
2958
2959
case 12: /* L after L+ON/AN */
2960
level = (byte)(levState.runLevel + addLevel);
2961
for (k=levState.startON; k < start0; k++) {
2962
if (levels[k] < level) {
2963
levels[k] = level;
2964
}
2965
}
2966
insertPoints.confirmed = insertPoints.size; /* confirm inserts */
2967
levState.startON = start0;
2968
break;
2969
2970
case 13: /* L after L+ON+EN/AN/ON */
2971
level = levState.runLevel;
2972
for (k = start0-1; k >= levState.startON; k--) {
2973
if (levels[k] == level+3) {
2974
while (levels[k] == level+3) {
2975
levels[k--] -= 2;
2976
}
2977
while (levels[k] == level) {
2978
k--;
2979
}
2980
}
2981
if (levels[k] == level+2) {
2982
levels[k] = level;
2983
continue;
2984
}
2985
levels[k] = (byte)(level+1);
2986
}
2987
break;
2988
2989
case 14: /* R after L+ON+EN/AN/ON */
2990
level = (byte)(levState.runLevel+1);
2991
for (k = start0-1; k >= levState.startON; k--) {
2992
if (levels[k] > level) {
2993
levels[k] -= 2;
2994
}
2995
}
2996
break;
2997
2998
default: /* we should never get here */
2999
throw new IllegalStateException("Internal ICU error in processPropertySeq");
3000
}
3001
}
3002
if ((addLevel) != 0 || (start < start0)) {
3003
level = (byte)(levState.runLevel + addLevel);
3004
if (start >= levState.runStart) {
3005
for (k = start; k < limit; k++) {
3006
levels[k] = level;
3007
}
3008
} else {
3009
setLevelsOutsideIsolates(start, limit, level);
3010
}
3011
}
3012
}
3013
3014
private void resolveImplicitLevels(int start, int limit, short sor, short eor)
3015
{
3016
byte dirProp;
3017
LevState levState = new LevState();
3018
int i, start1, start2;
3019
short oldStateImp, stateImp, actionImp;
3020
short gprop, resProp, cell;
3021
boolean inverseRTL;
3022
short nextStrongProp = R;
3023
int nextStrongPos = -1;
3024
3025
/* check for RTL inverse Bidi mode */
3026
/* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to
3027
* loop on the text characters from end to start.
3028
* This would need a different properties state table (at least different
3029
* actions) and different levels state tables (maybe very similar to the
3030
* LTR corresponding ones.
3031
*/
3032
inverseRTL=((start<lastArabicPos) && ((GetParaLevelAt(start) & 1)>0) &&
3033
(reorderingMode == REORDER_INVERSE_LIKE_DIRECT ||
3034
reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
3035
/* initialize for property and levels state table */
3036
levState.startL2EN = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
3037
levState.lastStrongRTL = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
3038
levState.runStart = start;
3039
levState.runLevel = levels[start];
3040
levState.impTab = impTabPair.imptab[levState.runLevel & 1];
3041
levState.impAct = impTabPair.impact[levState.runLevel & 1];
3042
3043
/* The isolates[] entries contain enough information to
3044
resume the bidi algorithm in the same state as it was
3045
when it was interrupted by an isolate sequence. */
3046
if (dirProps[start] == PDI) {
3047
levState.startON = isolates[isolateCount].startON;
3048
start1 = isolates[isolateCount].start1;
3049
stateImp = isolates[isolateCount].stateImp;
3050
levState.state = isolates[isolateCount].state;
3051
isolateCount--;
3052
} else {
3053
levState.startON = -1;
3054
start1 = start;
3055
if (dirProps[start] == NSM)
3056
stateImp = (short)(1 + sor);
3057
else
3058
stateImp = 0;
3059
levState.state = 0;
3060
processPropertySeq(levState, sor, start, start);
3061
}
3062
start2 = start; /* to make the Java compiler happy */
3063
3064
for (i = start; i <= limit; i++) {
3065
if (i >= limit) {
3066
int k;
3067
for (k = limit - 1;
3068
k > start &&
3069
(DirPropFlag(dirProps[k]) & MASK_BN_EXPLICIT) != 0;
3070
k--);
3071
dirProp = dirProps[k];
3072
if (dirProp == LRI || dirProp == RLI)
3073
break; /* no forced closing for sequence ending with LRI/RLI */
3074
gprop = eor;
3075
} else {
3076
byte prop, prop1;
3077
prop = dirProps[i];
3078
if (prop == B)
3079
isolateCount = -1; /* current isolates stack entry == none */
3080
if (inverseRTL) {
3081
if (prop == AL) {
3082
/* AL before EN does not make it AN */
3083
prop = R;
3084
} else if (prop == EN) {
3085
if (nextStrongPos <= i) {
3086
/* look for next strong char (L/R/AL) */
3087
int j;
3088
nextStrongProp = R; /* set default */
3089
nextStrongPos = limit;
3090
for (j = i+1; j < limit; j++) {
3091
prop1 = dirProps[j];
3092
if (prop1 == L || prop1 == R || prop1 == AL) {
3093
nextStrongProp = prop1;
3094
nextStrongPos = j;
3095
break;
3096
}
3097
}
3098
}
3099
if (nextStrongProp == AL) {
3100
prop = AN;
3101
}
3102
}
3103
}
3104
gprop = groupProp[prop];
3105
}
3106
oldStateImp = stateImp;
3107
cell = impTabProps[oldStateImp][gprop];
3108
stateImp = GetStateProps(cell); /* isolate the new state */
3109
actionImp = GetActionProps(cell); /* isolate the action */
3110
if ((i == limit) && (actionImp == 0)) {
3111
/* there is an unprocessed sequence if its property == eor */
3112
actionImp = 1; /* process the last sequence */
3113
}
3114
if (actionImp != 0) {
3115
resProp = impTabProps[oldStateImp][IMPTABPROPS_RES];
3116
switch (actionImp) {
3117
case 1: /* process current seq1, init new seq1 */
3118
processPropertySeq(levState, resProp, start1, i);
3119
start1 = i;
3120
break;
3121
case 2: /* init new seq2 */
3122
start2 = i;
3123
break;
3124
case 3: /* process seq1, process seq2, init new seq1 */
3125
processPropertySeq(levState, resProp, start1, start2);
3126
processPropertySeq(levState, _ON, start2, i);
3127
start1 = i;
3128
break;
3129
case 4: /* process seq1, set seq1=seq2, init new seq2 */
3130
processPropertySeq(levState, resProp, start1, start2);
3131
start1 = start2;
3132
start2 = i;
3133
break;
3134
default: /* we should never get here */
3135
throw new IllegalStateException("Internal ICU error in resolveImplicitLevels");
3136
}
3137
}
3138
}
3139
3140
/* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
3141
for (i = limit - 1;
3142
i > start &&
3143
(DirPropFlag(dirProps[i]) & MASK_BN_EXPLICIT) != 0;
3144
i--);
3145
dirProp = dirProps[i];
3146
if ((dirProp == LRI || dirProp == RLI) && limit < length) {
3147
isolateCount++;
3148
if (isolates[isolateCount] == null)
3149
isolates[isolateCount] = new Isolate();
3150
isolates[isolateCount].stateImp = stateImp;
3151
isolates[isolateCount].state = levState.state;
3152
isolates[isolateCount].start1 = start1;
3153
isolates[isolateCount].startON = levState.startON;
3154
}
3155
else
3156
processPropertySeq(levState, eor, limit, limit);
3157
}
3158
3159
/* perform (L1) and (X9) ---------------------------------------------------- */
3160
3161
/*
3162
* Reset the embedding levels for some non-graphic characters (L1).
3163
* This method also sets appropriate levels for BN, and
3164
* explicit embedding types that are supposed to have been removed
3165
* from the paragraph in (X9).
3166
*/
3167
private void adjustWSLevels() {
3168
int i;
3169
3170
if ((flags & MASK_WS) != 0) {
3171
int flag;
3172
i = trailingWSStart;
3173
while (i > 0) {
3174
/* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
3175
while (i > 0 && ((flag = DirPropFlag(dirProps[--i])) & MASK_WS) != 0) {
3176
if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
3177
levels[i] = 0;
3178
} else {
3179
levels[i] = GetParaLevelAt(i);
3180
}
3181
}
3182
3183
/* reset BN to the next character's paraLevel until B/S, which restarts above loop */
3184
/* here, i+1 is guaranteed to be <length */
3185
while (i > 0) {
3186
flag = DirPropFlag(dirProps[--i]);
3187
if ((flag & MASK_BN_EXPLICIT) != 0) {
3188
levels[i] = levels[i + 1];
3189
} else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
3190
levels[i] = 0;
3191
break;
3192
} else if ((flag & MASK_B_S) != 0){
3193
levels[i] = GetParaLevelAt(i);
3194
break;
3195
}
3196
}
3197
}
3198
}
3199
}
3200
3201
private void setParaSuccess() {
3202
paraBidi = this; /* mark successful setPara */
3203
}
3204
3205
private int Bidi_Min(int x, int y) {
3206
return x < y ? x : y;
3207
}
3208
3209
private int Bidi_Abs(int x) {
3210
return x >= 0 ? x : -x;
3211
}
3212
3213
void setParaRunsOnly(char[] parmText, byte parmParaLevel) {
3214
int[] visualMap;
3215
String visualText;
3216
int saveLength, saveTrailingWSStart;
3217
byte[] saveLevels;
3218
byte saveDirection;
3219
int i, j, visualStart, logicalStart,
3220
oldRunCount, runLength, addedRuns, insertRemove,
3221
start, limit, step, indexOddBit, logicalPos,
3222
index, index1;
3223
int saveOptions;
3224
3225
reorderingMode = REORDER_DEFAULT;
3226
int parmLength = parmText.length;
3227
if (parmLength == 0) {
3228
setPara(parmText, parmParaLevel, null);
3229
reorderingMode = REORDER_RUNS_ONLY;
3230
return;
3231
}
3232
/* obtain memory for mapping table and visual text */
3233
saveOptions = reorderingOptions;
3234
if ((saveOptions & OPTION_INSERT_MARKS) > 0) {
3235
reorderingOptions &= ~OPTION_INSERT_MARKS;
3236
reorderingOptions |= OPTION_REMOVE_CONTROLS;
3237
}
3238
parmParaLevel &= 1; /* accept only 0 or 1 */
3239
setPara(parmText, parmParaLevel, null);
3240
/* we cannot access directly levels since it is not yet set if
3241
* direction is not MIXED
3242
*/
3243
saveLevels = new byte[this.length];
3244
System.arraycopy(getLevels(), 0, saveLevels, 0, this.length);
3245
saveTrailingWSStart = trailingWSStart;
3246
3247
/* FOOD FOR THOUGHT: instead of writing the visual text, we could use
3248
* the visual map and the dirProps array to drive the second call
3249
* to setPara (but must make provision for possible removal of
3250
* Bidi controls. Alternatively, only use the dirProps array via
3251
* customized classifier callback.
3252
*/
3253
visualText = writeReordered(DO_MIRRORING);
3254
visualMap = getVisualMap();
3255
this.reorderingOptions = saveOptions;
3256
saveLength = this.length;
3257
saveDirection=this.direction;
3258
3259
this.reorderingMode = REORDER_INVERSE_LIKE_DIRECT;
3260
parmParaLevel ^= 1;
3261
setPara(visualText, parmParaLevel, null);
3262
BidiLine.getRuns(this);
3263
/* check if some runs must be split, count how many splits */
3264
addedRuns = 0;
3265
oldRunCount = this.runCount;
3266
visualStart = 0;
3267
for (i = 0; i < oldRunCount; i++, visualStart += runLength) {
3268
runLength = runs[i].limit - visualStart;
3269
if (runLength < 2) {
3270
continue;
3271
}
3272
logicalStart = runs[i].start;
3273
for (j = logicalStart+1; j < logicalStart+runLength; j++) {
3274
index = visualMap[j];
3275
index1 = visualMap[j-1];
3276
if ((Bidi_Abs(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
3277
addedRuns++;
3278
}
3279
}
3280
}
3281
if (addedRuns > 0) {
3282
getRunsMemory(oldRunCount + addedRuns);
3283
if (runCount == 1) {
3284
/* because we switch from UBiDi.simpleRuns to UBiDi.runs */
3285
runsMemory[0] = runs[0];
3286
} else {
3287
System.arraycopy(runs, 0, runsMemory, 0, runCount);
3288
}
3289
runs = runsMemory;
3290
runCount += addedRuns;
3291
for (i = oldRunCount; i < runCount; i++) {
3292
if (runs[i] == null) {
3293
runs[i] = new BidiRun(0, 0, (byte)0);
3294
}
3295
}
3296
}
3297
/* split runs which are not consecutive in source text */
3298
int newI;
3299
for (i = oldRunCount-1; i >= 0; i--) {
3300
newI = i + addedRuns;
3301
runLength = i==0 ? runs[0].limit :
3302
runs[i].limit - runs[i-1].limit;
3303
logicalStart = runs[i].start;
3304
indexOddBit = runs[i].level & 1;
3305
if (runLength < 2) {
3306
if (addedRuns > 0) {
3307
runs[newI].copyFrom(runs[i]);
3308
}
3309
logicalPos = visualMap[logicalStart];
3310
runs[newI].start = logicalPos;
3311
runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
3312
continue;
3313
}
3314
if (indexOddBit > 0) {
3315
start = logicalStart;
3316
limit = logicalStart + runLength - 1;
3317
step = 1;
3318
} else {
3319
start = logicalStart + runLength - 1;
3320
limit = logicalStart;
3321
step = -1;
3322
}
3323
for (j = start; j != limit; j += step) {
3324
index = visualMap[j];
3325
index1 = visualMap[j+step];
3326
if ((Bidi_Abs(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
3327
logicalPos = Bidi_Min(visualMap[start], index);
3328
runs[newI].start = logicalPos;
3329
runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
3330
runs[newI].limit = runs[i].limit;
3331
runs[i].limit -= Bidi_Abs(j - start) + 1;
3332
insertRemove = runs[i].insertRemove & (LRM_AFTER|RLM_AFTER);
3333
runs[newI].insertRemove = insertRemove;
3334
runs[i].insertRemove &= ~insertRemove;
3335
start = j + step;
3336
addedRuns--;
3337
newI--;
3338
}
3339
}
3340
if (addedRuns > 0) {
3341
runs[newI].copyFrom(runs[i]);
3342
}
3343
logicalPos = Bidi_Min(visualMap[start], visualMap[limit]);
3344
runs[newI].start = logicalPos;
3345
runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
3346
}
3347
3348
cleanup1:
3349
/* restore initial paraLevel */
3350
this.paraLevel ^= 1;
3351
cleanup2:
3352
/* restore real text */
3353
this.text = parmText;
3354
this.length = saveLength;
3355
this.originalLength = parmLength;
3356
this.direction=saveDirection;
3357
this.levels = saveLevels;
3358
this.trailingWSStart = saveTrailingWSStart;
3359
if (runCount > 1) {
3360
this.direction = MIXED;
3361
}
3362
cleanup3:
3363
this.reorderingMode = REORDER_RUNS_ONLY;
3364
}
3365
3366
/**
3367
* Perform the Unicode Bidi algorithm. It is defined in the
3368
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
3369
* Unicode Bidirectional Algorithm</a>, version 13,
3370
* also described in The Unicode Standard, Version 4.0 .<p>
3371
*
3372
* This method takes a piece of plain text containing one or more paragraphs,
3373
* with or without externally specified embedding levels from <i>styled</i>
3374
* text and computes the left-right-directionality of each character.<p>
3375
*
3376
* If the entire text is all of the same directionality, then
3377
* the method may not perform all the steps described by the algorithm,
3378
* i.e., some levels may not be the same as if all steps were performed.
3379
* This is not relevant for unidirectional text.<br>
3380
* For example, in pure LTR text with numbers the numbers would get
3381
* a resolved level of 2 higher than the surrounding text according to
3382
* the algorithm. This implementation may set all resolved levels to
3383
* the same value in such a case.<p>
3384
*
3385
* The text can be composed of multiple paragraphs. Occurrence of a block
3386
* separator in the text terminates a paragraph, and whatever comes next starts
3387
* a new paragraph. The exception to this rule is when a Carriage Return (CR)
3388
* is followed by a Line Feed (LF). Both CR and LF are block separators, but
3389
* in that case, the pair of characters is considered as terminating the
3390
* preceding paragraph, and a new paragraph will be started by a character
3391
* coming after the LF.
3392
*
3393
* Although the text is passed here as a <code>String</code>, it is
3394
* stored internally as an array of characters. Therefore the
3395
* documentation will refer to indexes of the characters in the text.
3396
*
3397
* @param text contains the text that the Bidi algorithm will be performed
3398
* on. This text can be retrieved with <code>getText()</code> or
3399
* <code>getTextAsString</code>.<br>
3400
*
3401
* @param paraLevel specifies the default level for the text;
3402
* it is typically 0 (LTR) or 1 (RTL).
3403
* If the method shall determine the paragraph level from the text,
3404
* then <code>paraLevel</code> can be set to
3405
* either <code>LEVEL_DEFAULT_LTR</code>
3406
* or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
3407
* paragraphs, the paragraph level shall be determined separately for
3408
* each paragraph; if a paragraph does not include any strongly typed
3409
* character, then the desired default is used (0 for LTR or 1 for RTL).
3410
* Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
3411
* is also valid, with odd levels indicating RTL.
3412
*
3413
* @param embeddingLevels (in) may be used to preset the embedding and override levels,
3414
* ignoring characters like LRE and PDF in the text.
3415
* A level overrides the directional property of its corresponding
3416
* (same index) character if the level has the
3417
* <code>LEVEL_OVERRIDE</code> bit set.<br><br>
3418
* Except for that bit, it must be
3419
* <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>,
3420
* with one exception: a level of zero may be specified for a
3421
* paragraph separator even if <code>paraLevel&gt;0</code> when multiple
3422
* paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>
3423
* <strong>Caution: </strong>A reference to this array, not a copy
3424
* of the levels, will be stored in the <code>Bidi</code> object;
3425
* the <code>embeddingLevels</code>
3426
* should not be modified to avoid unexpected results on subsequent
3427
* Bidi operations. However, the <code>setPara()</code> and
3428
* <code>setLine()</code> methods may modify some or all of the
3429
* levels.<br><br>
3430
* <strong>Note:</strong> the <code>embeddingLevels</code> array must
3431
* have one entry for each character in <code>text</code>.
3432
*
3433
* @throws IllegalArgumentException if the values in embeddingLevels are
3434
* not within the allowed range
3435
*
3436
* @see #LEVEL_DEFAULT_LTR
3437
* @see #LEVEL_DEFAULT_RTL
3438
* @see #LEVEL_OVERRIDE
3439
* @see #MAX_EXPLICIT_LEVEL
3440
* @stable ICU 3.8
3441
*/
3442
void setPara(String text, byte paraLevel, byte[] embeddingLevels)
3443
{
3444
if (text == null) {
3445
setPara(new char[0], paraLevel, embeddingLevels);
3446
} else {
3447
setPara(text.toCharArray(), paraLevel, embeddingLevels);
3448
}
3449
}
3450
3451
/**
3452
* Perform the Unicode Bidi algorithm. It is defined in the
3453
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
3454
* Unicode Bidirectional Algorithm</a>, version 13,
3455
* also described in The Unicode Standard, Version 4.0 .<p>
3456
*
3457
* This method takes a piece of plain text containing one or more paragraphs,
3458
* with or without externally specified embedding levels from <i>styled</i>
3459
* text and computes the left-right-directionality of each character.<p>
3460
*
3461
* If the entire text is all of the same directionality, then
3462
* the method may not perform all the steps described by the algorithm,
3463
* i.e., some levels may not be the same as if all steps were performed.
3464
* This is not relevant for unidirectional text.<br>
3465
* For example, in pure LTR text with numbers the numbers would get
3466
* a resolved level of 2 higher than the surrounding text according to
3467
* the algorithm. This implementation may set all resolved levels to
3468
* the same value in such a case.
3469
*
3470
* The text can be composed of multiple paragraphs. Occurrence of a block
3471
* separator in the text terminates a paragraph, and whatever comes next starts
3472
* a new paragraph. The exception to this rule is when a Carriage Return (CR)
3473
* is followed by a Line Feed (LF). Both CR and LF are block separators, but
3474
* in that case, the pair of characters is considered as terminating the
3475
* preceding paragraph, and a new paragraph will be started by a character
3476
* coming after the LF.
3477
*
3478
* The text is stored internally as an array of characters. Therefore the
3479
* documentation will refer to indexes of the characters in the text.
3480
*
3481
* @param chars contains the text that the Bidi algorithm will be performed
3482
* on. This text can be retrieved with <code>getText()</code> or
3483
* <code>getTextAsString</code>.<br>
3484
*
3485
* @param paraLevel specifies the default level for the text;
3486
* it is typically 0 (LTR) or 1 (RTL).
3487
* If the method shall determine the paragraph level from the text,
3488
* then <code>paraLevel</code> can be set to
3489
* either <code>LEVEL_DEFAULT_LTR</code>
3490
* or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
3491
* paragraphs, the paragraph level shall be determined separately for
3492
* each paragraph; if a paragraph does not include any strongly typed
3493
* character, then the desired default is used (0 for LTR or 1 for RTL).
3494
* Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
3495
* is also valid, with odd levels indicating RTL.
3496
*
3497
* @param embeddingLevels (in) may be used to preset the embedding and
3498
* override levels, ignoring characters like LRE and PDF in the text.
3499
* A level overrides the directional property of its corresponding
3500
* (same index) character if the level has the
3501
* <code>LEVEL_OVERRIDE</code> bit set.<br><br>
3502
* Except for that bit, it must be
3503
* <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>,
3504
* with one exception: a level of zero may be specified for a
3505
* paragraph separator even if <code>paraLevel&gt;0</code> when multiple
3506
* paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>
3507
* <strong>Caution: </strong>A reference to this array, not a copy
3508
* of the levels, will be stored in the <code>Bidi</code> object;
3509
* the <code>embeddingLevels</code>
3510
* should not be modified to avoid unexpected results on subsequent
3511
* Bidi operations. However, the <code>setPara()</code> and
3512
* <code>setLine()</code> methods may modify some or all of the
3513
* levels.<br><br>
3514
* <strong>Note:</strong> the <code>embeddingLevels</code> array must
3515
* have one entry for each character in <code>text</code>.
3516
*
3517
* @throws IllegalArgumentException if the values in embeddingLevels are
3518
* not within the allowed range
3519
*
3520
* @see #LEVEL_DEFAULT_LTR
3521
* @see #LEVEL_DEFAULT_RTL
3522
* @see #LEVEL_OVERRIDE
3523
* @see #MAX_EXPLICIT_LEVEL
3524
* @stable ICU 3.8
3525
*/
3526
void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels)
3527
{
3528
/* check the argument values */
3529
if (paraLevel < LEVEL_DEFAULT_LTR) {
3530
verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1);
3531
}
3532
if (chars == null) {
3533
chars = new char[0];
3534
}
3535
3536
/* special treatment for RUNS_ONLY mode */
3537
if (reorderingMode == REORDER_RUNS_ONLY) {
3538
setParaRunsOnly(chars, paraLevel);
3539
return;
3540
}
3541
3542
/* initialize the Bidi object */
3543
this.paraBidi = null; /* mark unfinished setPara */
3544
this.text = chars;
3545
this.length = this.originalLength = this.resultLength = text.length;
3546
this.paraLevel = paraLevel;
3547
this.direction = (byte)(paraLevel & 1);
3548
this.paraCount = 1;
3549
3550
/* Allocate zero-length arrays instead of setting to null here; then
3551
* checks for null in various places can be eliminated.
3552
*/
3553
dirProps = new byte[0];
3554
levels = new byte[0];
3555
runs = new BidiRun[0];
3556
isGoodLogicalToVisualRunsMap = false;
3557
insertPoints.size = 0; /* clean up from last call */
3558
insertPoints.confirmed = 0; /* clean up from last call */
3559
3560
/*
3561
* Save the original paraLevel if contextual; otherwise, set to 0.
3562
*/
3563
defaultParaLevel = IsDefaultLevel(paraLevel) ? paraLevel : 0;
3564
3565
if (length == 0) {
3566
/*
3567
* For an empty paragraph, create a Bidi object with the paraLevel and
3568
* the flags and the direction set but without allocating zero-length arrays.
3569
* There is nothing more to do.
3570
*/
3571
if (IsDefaultLevel(paraLevel)) {
3572
this.paraLevel &= 1;
3573
defaultParaLevel = 0;
3574
}
3575
flags = DirPropFlagLR(paraLevel);
3576
runCount = 0;
3577
paraCount = 0;
3578
setParaSuccess();
3579
return;
3580
}
3581
3582
runCount = -1;
3583
3584
/*
3585
* Get the directional properties,
3586
* the flags bit-set, and
3587
* determine the paragraph level if necessary.
3588
*/
3589
getDirPropsMemory(length);
3590
dirProps = dirPropsMemory;
3591
getDirProps();
3592
/* the processed length may have changed if OPTION_STREAMING is set */
3593
trailingWSStart = length; /* the levels[] will reflect the WS run */
3594
3595
/* are explicit levels specified? */
3596
if (embeddingLevels == null) {
3597
/* no: determine explicit levels according to the (Xn) rules */
3598
getLevelsMemory(length);
3599
levels = levelsMemory;
3600
direction = resolveExplicitLevels();
3601
} else {
3602
/* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */
3603
levels = embeddingLevels;
3604
direction = checkExplicitLevels();
3605
}
3606
3607
/* allocate isolate memory */
3608
if (isolateCount > 0) {
3609
if (isolates == null || isolates.length < isolateCount)
3610
isolates = new Isolate[isolateCount + 3]; /* keep some reserve */
3611
}
3612
isolateCount = -1; /* current isolates stack entry == none */
3613
3614
/*
3615
* The steps after (X9) in the Bidi algorithm are performed only if
3616
* the paragraph text has mixed directionality!
3617
*/
3618
switch (direction) {
3619
case LTR:
3620
/* all levels are implicitly at paraLevel (important for getLevels()) */
3621
trailingWSStart = 0;
3622
break;
3623
case RTL:
3624
/* all levels are implicitly at paraLevel (important for getLevels()) */
3625
trailingWSStart = 0;
3626
break;
3627
default:
3628
/*
3629
* Choose the right implicit state table
3630
*/
3631
switch(reorderingMode) {
3632
case REORDER_DEFAULT:
3633
this.impTabPair = impTab_DEFAULT;
3634
break;
3635
case REORDER_NUMBERS_SPECIAL:
3636
this.impTabPair = impTab_NUMBERS_SPECIAL;
3637
break;
3638
case REORDER_GROUP_NUMBERS_WITH_R:
3639
this.impTabPair = impTab_GROUP_NUMBERS_WITH_R;
3640
break;
3641
case REORDER_RUNS_ONLY:
3642
/* we should never get here */
3643
throw new InternalError("Internal ICU error in setPara");
3644
/* break; */
3645
case REORDER_INVERSE_NUMBERS_AS_L:
3646
this.impTabPair = impTab_INVERSE_NUMBERS_AS_L;
3647
break;
3648
case REORDER_INVERSE_LIKE_DIRECT:
3649
if ((reorderingOptions & OPTION_INSERT_MARKS) != 0) {
3650
this.impTabPair = impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
3651
} else {
3652
this.impTabPair = impTab_INVERSE_LIKE_DIRECT;
3653
}
3654
break;
3655
case REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
3656
if ((reorderingOptions & OPTION_INSERT_MARKS) != 0) {
3657
this.impTabPair = impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
3658
} else {
3659
this.impTabPair = impTab_INVERSE_FOR_NUMBERS_SPECIAL;
3660
}
3661
break;
3662
}
3663
/*
3664
* If there are no external levels specified and there
3665
* are no significant explicit level codes in the text,
3666
* then we can treat the entire paragraph as one run.
3667
* Otherwise, we need to perform the following rules on runs of
3668
* the text with the same embedding levels. (X10)
3669
* "Significant" explicit level codes are ones that actually
3670
* affect non-BN characters.
3671
* Examples for "insignificant" ones are empty embeddings
3672
* LRE-PDF, LRE-RLE-PDF-PDF, etc.
3673
*/
3674
if (embeddingLevels == null && paraCount <= 1 &&
3675
(flags & DirPropFlagMultiRuns) == 0) {
3676
resolveImplicitLevels(0, length,
3677
GetLRFromLevel(GetParaLevelAt(0)),
3678
GetLRFromLevel(GetParaLevelAt(length - 1)));
3679
} else {
3680
/* sor, eor: start and end types of same-level-run */
3681
int start, limit = 0;
3682
byte level, nextLevel;
3683
short sor, eor;
3684
3685
/* determine the first sor and set eor to it because of the loop body (sor=eor there) */
3686
level = GetParaLevelAt(0);
3687
nextLevel = levels[0];
3688
if (level < nextLevel) {
3689
eor = GetLRFromLevel(nextLevel);
3690
} else {
3691
eor = GetLRFromLevel(level);
3692
}
3693
3694
do {
3695
/* determine start and limit of the run (end points just behind the run) */
3696
3697
/* the values for this run's start are the same as for the previous run's end */
3698
start = limit;
3699
level = nextLevel;
3700
if ((start > 0) && (dirProps[start - 1] == B)) {
3701
/* except if this is a new paragraph, then set sor = para level */
3702
sor = GetLRFromLevel(GetParaLevelAt(start));
3703
} else {
3704
sor = eor;
3705
}
3706
3707
/* search for the limit of this run */
3708
while ((++limit < length) &&
3709
((levels[limit] == level) ||
3710
((DirPropFlag(dirProps[limit]) & MASK_BN_EXPLICIT) != 0))) {}
3711
3712
/* get the correct level of the next run */
3713
if (limit < length) {
3714
nextLevel = levels[limit];
3715
} else {
3716
nextLevel = GetParaLevelAt(length - 1);
3717
}
3718
3719
/* determine eor from max(level, nextLevel); sor is last run's eor */
3720
if (NoOverride(level) < NoOverride(nextLevel)) {
3721
eor = GetLRFromLevel(nextLevel);
3722
} else {
3723
eor = GetLRFromLevel(level);
3724
}
3725
3726
/* if the run consists of overridden directional types, then there
3727
are no implicit types to be resolved */
3728
if ((level & LEVEL_OVERRIDE) == 0) {
3729
resolveImplicitLevels(start, limit, sor, eor);
3730
} else {
3731
/* remove the LEVEL_OVERRIDE flags */
3732
do {
3733
levels[start++] &= ~LEVEL_OVERRIDE;
3734
} while (start < limit);
3735
}
3736
} while (limit < length);
3737
}
3738
3739
/* reset the embedding levels for some non-graphic characters (L1), (X9) */
3740
adjustWSLevels();
3741
3742
break;
3743
}
3744
3745
/* add RLM for inverse Bidi with contextual orientation resolving
3746
* to RTL which would not round-trip otherwise
3747
*/
3748
if ((defaultParaLevel > 0) &&
3749
((reorderingOptions & OPTION_INSERT_MARKS) != 0) &&
3750
((reorderingMode == REORDER_INVERSE_LIKE_DIRECT) ||
3751
(reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
3752
int start, last;
3753
byte level;
3754
byte dirProp;
3755
for (int i = 0; i < paraCount; i++) {
3756
last = paras_limit[i] - 1;
3757
level = paras_level[i];
3758
if (level == 0)
3759
continue; /* LTR paragraph */
3760
start = i == 0 ? 0 : paras_limit[i - 1];
3761
for (int j = last; j >= start; j--) {
3762
dirProp = dirProps[j];
3763
if (dirProp == L) {
3764
if (j < last) {
3765
while (dirProps[last] == B) {
3766
last--;
3767
}
3768
}
3769
addPoint(last, RLM_BEFORE);
3770
break;
3771
}
3772
if ((DirPropFlag(dirProp) & MASK_R_AL) != 0) {
3773
break;
3774
}
3775
}
3776
}
3777
}
3778
3779
if ((reorderingOptions & OPTION_REMOVE_CONTROLS) != 0) {
3780
resultLength -= controlCount;
3781
} else {
3782
resultLength += insertPoints.size;
3783
}
3784
setParaSuccess();
3785
}
3786
3787
/**
3788
* Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
3789
* <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9:
3790
* Unicode Bidirectional Algorithm</a>, version 13,
3791
* also described in The Unicode Standard, Version 4.0 .<p>
3792
*
3793
* This method takes a paragraph of text and computes the
3794
* left-right-directionality of each character. The text should not
3795
* contain any Unicode block separators.<p>
3796
*
3797
* The RUN_DIRECTION attribute in the text, if present, determines the base
3798
* direction (left-to-right or right-to-left). If not present, the base
3799
* direction is computed using the Unicode Bidirectional Algorithm,
3800
* defaulting to left-to-right if there are no strong directional characters
3801
* in the text. This attribute, if present, must be applied to all the text
3802
* in the paragraph.<p>
3803
*
3804
* The BIDI_EMBEDDING attribute in the text, if present, represents
3805
* embedding level information. Negative values from -1 to -62 indicate
3806
* overrides at the absolute value of the level. Positive values from 1 to
3807
* 62 indicate embeddings. Where values are zero or not defined, the base
3808
* embedding level as determined by the base direction is assumed.<p>
3809
*
3810
* The NUMERIC_SHAPING attribute in the text, if present, converts European
3811
* digits to other decimal digits before running the bidi algorithm. This
3812
* attribute, if present, must be applied to all the text in the paragraph.
3813
*
3814
* If the entire text is all of the same directionality, then
3815
* the method may not perform all the steps described by the algorithm,
3816
* i.e., some levels may not be the same as if all steps were performed.
3817
* This is not relevant for unidirectional text.<br>
3818
* For example, in pure LTR text with numbers the numbers would get
3819
* a resolved level of 2 higher than the surrounding text according to
3820
* the algorithm. This implementation may set all resolved levels to
3821
* the same value in such a case.<p>
3822
*
3823
* @param paragraph a paragraph of text with optional character and
3824
* paragraph attribute information
3825
* @stable ICU 3.8
3826
*/
3827
public void setPara(AttributedCharacterIterator paragraph)
3828
{
3829
byte paraLvl;
3830
char ch = paragraph.first();
3831
Boolean runDirection =
3832
(Boolean) paragraph.getAttribute(TextAttributeConstants.RUN_DIRECTION);
3833
Object shaper = paragraph.getAttribute(TextAttributeConstants.NUMERIC_SHAPING);
3834
3835
if (runDirection == null) {
3836
paraLvl = LEVEL_DEFAULT_LTR;
3837
} else {
3838
paraLvl = (runDirection.equals(TextAttributeConstants.RUN_DIRECTION_LTR)) ?
3839
LTR : RTL;
3840
}
3841
3842
byte[] lvls = null;
3843
int len = paragraph.getEndIndex() - paragraph.getBeginIndex();
3844
byte[] embeddingLevels = new byte[len];
3845
char[] txt = new char[len];
3846
int i = 0;
3847
while (ch != AttributedCharacterIterator.DONE) {
3848
txt[i] = ch;
3849
Integer embedding =
3850
(Integer) paragraph.getAttribute(TextAttributeConstants.BIDI_EMBEDDING);
3851
if (embedding != null) {
3852
byte level = embedding.byteValue();
3853
if (level == 0) {
3854
/* no-op */
3855
} else if (level < 0) {
3856
lvls = embeddingLevels;
3857
embeddingLevels[i] = (byte)((0 - level) | LEVEL_OVERRIDE);
3858
} else {
3859
lvls = embeddingLevels;
3860
embeddingLevels[i] = level;
3861
}
3862
}
3863
ch = paragraph.next();
3864
++i;
3865
}
3866
3867
if (shaper != null) {
3868
NumericShapings.shape(shaper, txt, 0, len);
3869
}
3870
setPara(txt, paraLvl, lvls);
3871
}
3872
3873
/**
3874
* Specify whether block separators must be allocated level zero,
3875
* so that successive paragraphs will progress from left to right.
3876
* This method must be called before <code>setPara()</code>.
3877
* Paragraph separators (B) may appear in the text. Setting them to level zero
3878
* means that all paragraph separators (including one possibly appearing
3879
* in the last text position) are kept in the reordered text after the text
3880
* that they follow in the source text.
3881
* When this feature is not enabled, a paragraph separator at the last
3882
* position of the text before reordering will go to the first position
3883
* of the reordered text when the paragraph level is odd.
3884
*
3885
* @param ordarParaLTR specifies whether paragraph separators (B) must
3886
* receive level 0, so that successive paragraphs progress from left to right.
3887
*
3888
* @see #setPara
3889
* @stable ICU 3.8
3890
*/
3891
public void orderParagraphsLTR(boolean ordarParaLTR) {
3892
orderParagraphsLTR = ordarParaLTR;
3893
}
3894
3895
/**
3896
* Get the directionality of the text.
3897
*
3898
* @return a value of <code>LTR</code>, <code>RTL</code> or <code>MIXED</code>
3899
* that indicates if the entire text
3900
* represented by this object is unidirectional,
3901
* and which direction, or if it is mixed-directional.
3902
*
3903
* @throws IllegalStateException if this call is not preceded by a successful
3904
* call to <code>setPara</code> or <code>setLine</code>
3905
*
3906
* @see #LTR
3907
* @see #RTL
3908
* @see #MIXED
3909
* @stable ICU 3.8
3910
*/
3911
public byte getDirection()
3912
{
3913
verifyValidParaOrLine();
3914
return direction;
3915
}
3916
3917
/**
3918
* Get the length of the text.
3919
*
3920
* @return The length of the text that the <code>Bidi</code> object was
3921
* created for.
3922
*
3923
* @throws IllegalStateException if this call is not preceded by a successful
3924
* call to <code>setPara</code> or <code>setLine</code>
3925
* @stable ICU 3.8
3926
*/
3927
public int getLength()
3928
{
3929
verifyValidParaOrLine();
3930
return originalLength;
3931
}
3932
3933
/* paragraphs API methods ------------------------------------------------- */
3934
3935
/**
3936
* Get the paragraph level of the text.
3937
*
3938
* @return The paragraph level. If there are multiple paragraphs, their
3939
* level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or
3940
* LEVEL_DEFAULT_RTL. In that case, the level of the first paragraph
3941
* is returned.
3942
*
3943
* @throws IllegalStateException if this call is not preceded by a successful
3944
* call to <code>setPara</code> or <code>setLine</code>
3945
*
3946
* @see #LEVEL_DEFAULT_LTR
3947
* @see #LEVEL_DEFAULT_RTL
3948
* @see #getParagraph
3949
* @see #getParagraphByIndex
3950
* @stable ICU 3.8
3951
*/
3952
public byte getParaLevel()
3953
{
3954
verifyValidParaOrLine();
3955
return paraLevel;
3956
}
3957
3958
/**
3959
* Retrieves the Bidi class for a given code point.
3960
* <p>If a <code>BidiClassifier</code> is defined and returns a value
3961
* other than <code>CLASS_DEFAULT</code>, that value is used; otherwise
3962
* the default class determination mechanism is invoked.</p>
3963
*
3964
* @param c The code point to get a Bidi class for.
3965
*
3966
* @return The Bidi class for the character <code>c</code> that is in effect
3967
* for this <code>Bidi</code> instance.
3968
*
3969
* @stable ICU 3.8
3970
*/
3971
public int getCustomizedClass(int c) {
3972
int dir;
3973
3974
dir = bdp.getClass(c);
3975
if (dir >= CHAR_DIRECTION_COUNT)
3976
dir = ON;
3977
return dir;
3978
}
3979
3980
/**
3981
* <code>setLine()</code> returns a <code>Bidi</code> object to
3982
* contain the reordering information, especially the resolved levels,
3983
* for all the characters in a line of text. This line of text is
3984
* specified by referring to a <code>Bidi</code> object representing
3985
* this information for a piece of text containing one or more paragraphs,
3986
* and by specifying a range of indexes in this text.<p>
3987
* In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
3988
*
3989
* This is used after calling <code>setPara()</code>
3990
* for a piece of text, and after line-breaking on that text.
3991
* It is not necessary if each paragraph is treated as a single line.<p>
3992
*
3993
* After line-breaking, rules (L1) and (L2) for the treatment of
3994
* trailing WS and for reordering are performed on
3995
* a <code>Bidi</code> object that represents a line.<p>
3996
*
3997
* <strong>Important: </strong>the line <code>Bidi</code> object may
3998
* reference data within the global text <code>Bidi</code> object.
3999
* You should not alter the content of the global text object until
4000
* you are finished using the line object.
4001
*
4002
* @param start is the line's first index into the text.
4003
*
4004
* @param limit is just behind the line's last index into the text
4005
* (its last index +1).
4006
*
4007
* @return a <code>Bidi</code> object that will now represent a line of the text.
4008
*
4009
* @throws IllegalStateException if this call is not preceded by a successful
4010
* call to <code>setPara</code>
4011
* @throws IllegalArgumentException if start and limit are not in the range
4012
* <code>0&lt;=start&lt;limit&lt;=getProcessedLength()</code>,
4013
* or if the specified line crosses a paragraph boundary
4014
*
4015
* @see #setPara
4016
* @see #getProcessedLength
4017
* @stable ICU 3.8
4018
*/
4019
public Bidi setLine(Bidi bidi, BidiBase bidiBase, Bidi newBidi, BidiBase newBidiBase, int start, int limit)
4020
{
4021
verifyValidPara();
4022
verifyRange(start, 0, limit);
4023
verifyRange(limit, 0, length+1);
4024
4025
return BidiLine.setLine(this, newBidi, newBidiBase, start, limit);
4026
}
4027
4028
/**
4029
* Get the level for one character.
4030
*
4031
* @param charIndex the index of a character.
4032
*
4033
* @return The level for the character at <code>charIndex</code>.
4034
*
4035
* @throws IllegalStateException if this call is not preceded by a successful
4036
* call to <code>setPara</code> or <code>setLine</code>
4037
* @throws IllegalArgumentException if charIndex is not in the range
4038
* <code>0&lt;=charIndex&lt;getProcessedLength()</code>
4039
*
4040
* @see #getProcessedLength
4041
* @stable ICU 3.8
4042
*/
4043
public byte getLevelAt(int charIndex)
4044
{
4045
// for backward compatibility
4046
if (charIndex < 0 || charIndex >= length) {
4047
return (byte)getBaseLevel();
4048
}
4049
4050
verifyValidParaOrLine();
4051
verifyRange(charIndex, 0, length);
4052
return BidiLine.getLevelAt(this, charIndex);
4053
}
4054
4055
/**
4056
* Get an array of levels for each character.<p>
4057
*
4058
* Note that this method may allocate memory under some
4059
* circumstances, unlike <code>getLevelAt()</code>.
4060
*
4061
* @return The levels array for the text,
4062
* or <code>null</code> if an error occurs.
4063
*
4064
* @throws IllegalStateException if this call is not preceded by a successful
4065
* call to <code>setPara</code> or <code>setLine</code>
4066
* @stable ICU 3.8
4067
*/
4068
byte[] getLevels()
4069
{
4070
verifyValidParaOrLine();
4071
if (length <= 0) {
4072
return new byte[0];
4073
}
4074
return BidiLine.getLevels(this);
4075
}
4076
4077
/**
4078
* Get the number of runs.
4079
* This method may invoke the actual reordering on the
4080
* <code>Bidi</code> object, after <code>setPara()</code>
4081
* may have resolved only the levels of the text. Therefore,
4082
* <code>countRuns()</code> may have to allocate memory,
4083
* and may throw an exception if it fails to do so.
4084
*
4085
* @return The number of runs.
4086
*
4087
* @throws IllegalStateException if this call is not preceded by a successful
4088
* call to <code>setPara</code> or <code>setLine</code>
4089
* @stable ICU 3.8
4090
*/
4091
public int countRuns()
4092
{
4093
verifyValidParaOrLine();
4094
BidiLine.getRuns(this);
4095
return runCount;
4096
}
4097
4098
/**
4099
*
4100
* Get a <code>BidiRun</code> object according to its index. BidiRun methods
4101
* may be used to retrieve the run's logical start, length and level,
4102
* which can be even for an LTR run or odd for an RTL run.
4103
* In an RTL run, the character at the logical start is
4104
* visually on the right of the displayed run.
4105
* The length is the number of characters in the run.<p>
4106
* <code>countRuns()</code> is normally called
4107
* before the runs are retrieved.
4108
*
4109
* <p>
4110
* Example:
4111
* <pre>
4112
* Bidi bidi = new Bidi();
4113
* String text = "abc 123 DEFG xyz";
4114
* bidi.setPara(text, Bidi.RTL, null);
4115
* int i, count=bidi.countRuns(), logicalStart, visualIndex=0, length;
4116
* BidiRun run;
4117
* for (i = 0; i &lt; count; ++i) {
4118
* run = bidi.getVisualRun(i);
4119
* logicalStart = run.getStart();
4120
* length = run.getLength();
4121
* if (Bidi.LTR == run.getEmbeddingLevel()) {
4122
* do { // LTR
4123
* show_char(text.charAt(logicalStart++), visualIndex++);
4124
* } while (--length &gt; 0);
4125
* } else {
4126
* logicalStart += length; // logicalLimit
4127
* do { // RTL
4128
* show_char(text.charAt(--logicalStart), visualIndex++);
4129
* } while (--length &gt; 0);
4130
* }
4131
* }
4132
* </pre>
4133
* <p>
4134
* Note that in right-to-left runs, code like this places
4135
* second surrogates before first ones (which is generally a bad idea)
4136
* and combining characters before base characters.
4137
* <p>
4138
* Use of <code>{@link #writeReordered}</code>, optionally with the
4139
* <code>{@link #KEEP_BASE_COMBINING}</code> option, can be considered in
4140
* order to avoid these issues.
4141
*
4142
* @param runIndex is the number of the run in visual order, in the
4143
* range <code>[0..countRuns()-1]</code>.
4144
*
4145
* @return a BidiRun object containing the details of the run. The
4146
* directionality of the run is
4147
* <code>LTR==0</code> or <code>RTL==1</code>,
4148
* never <code>MIXED</code>.
4149
*
4150
* @throws IllegalStateException if this call is not preceded by a successful
4151
* call to <code>setPara</code> or <code>setLine</code>
4152
* @throws IllegalArgumentException if <code>runIndex</code> is not in
4153
* the range <code>0&lt;=runIndex&lt;countRuns()</code>
4154
*
4155
* @see #countRuns()
4156
* @see com.ibm.icu.text.BidiRun
4157
* @see com.ibm.icu.text.BidiRun#getStart()
4158
* @see com.ibm.icu.text.BidiRun#getLength()
4159
* @see com.ibm.icu.text.BidiRun#getEmbeddingLevel()
4160
* @stable ICU 3.8
4161
*/
4162
BidiRun getVisualRun(int runIndex)
4163
{
4164
verifyValidParaOrLine();
4165
BidiLine.getRuns(this);
4166
verifyRange(runIndex, 0, runCount);
4167
return BidiLine.getVisualRun(this, runIndex);
4168
}
4169
4170
/**
4171
* Get a visual-to-logical index map (array) for the characters in the
4172
* <code>Bidi</code> (paragraph or line) object.
4173
* <p>
4174
* Some values in the map may be <code>MAP_NOWHERE</code> if the
4175
* corresponding text characters are Bidi marks inserted in the visual
4176
* output by the option <code>OPTION_INSERT_MARKS</code>.
4177
* <p>
4178
* When the visual output is altered by using options of
4179
* <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
4180
* <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
4181
* <code>REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
4182
* be correct. It is advised to use, when possible, reordering options
4183
* such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
4184
*
4185
* @return an array of <code>getResultLength()</code>
4186
* indexes which will reflect the reordering of the characters.<br><br>
4187
* The index map will result in
4188
* <code>indexMap[visualIndex]==logicalIndex</code>, where
4189
* <code>indexMap</code> represents the returned array.
4190
*
4191
* @throws IllegalStateException if this call is not preceded by a successful
4192
* call to <code>setPara</code> or <code>setLine</code>
4193
*
4194
* @see #getLogicalMap
4195
* @see #getLogicalIndex
4196
* @see #getResultLength
4197
* @see #MAP_NOWHERE
4198
* @see #OPTION_INSERT_MARKS
4199
* @see #writeReordered
4200
* @stable ICU 3.8
4201
*/
4202
private int[] getVisualMap()
4203
{
4204
/* countRuns() checks successful call to setPara/setLine */
4205
countRuns();
4206
if (resultLength <= 0) {
4207
return new int[0];
4208
}
4209
return BidiLine.getVisualMap(this);
4210
}
4211
4212
/**
4213
* This is a convenience method that does not use a <code>Bidi</code> object.
4214
* It is intended to be used for when an application has determined the levels
4215
* of objects (character sequences) and just needs to have them reordered (L2).
4216
* This is equivalent to using <code>getVisualMap()</code> on a
4217
* <code>Bidi</code> object.
4218
*
4219
* @param levels is an array of levels that have been determined by
4220
* the application.
4221
*
4222
* @return an array of <code>levels.length</code>
4223
* indexes which will reflect the reordering of the characters.<p>
4224
* The index map will result in
4225
* <code>indexMap[visualIndex]==logicalIndex</code>, where
4226
* <code>indexMap</code> represents the returned array.
4227
*
4228
* @stable ICU 3.8
4229
*/
4230
private static int[] reorderVisual(byte[] levels)
4231
{
4232
return BidiLine.reorderVisual(levels);
4233
}
4234
4235
/**
4236
* Constant indicating that the base direction depends on the first strong
4237
* directional character in the text according to the Unicode Bidirectional
4238
* Algorithm. If no strong directional character is present, the base
4239
* direction is right-to-left.
4240
* @stable ICU 3.8
4241
*/
4242
public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = LEVEL_DEFAULT_RTL;
4243
4244
/**
4245
* Create Bidi from the given text, embedding, and direction information.
4246
* The embeddings array may be null. If present, the values represent
4247
* embedding level information. Negative values from -1 to -61 indicate
4248
* overrides at the absolute value of the level. Positive values from 1 to
4249
* 61 indicate embeddings. Where values are zero, the base embedding level
4250
* as determined by the base direction is assumed.<p>
4251
*
4252
* Note: this constructor calls setPara() internally.
4253
*
4254
* @param text an array containing the paragraph of text to process.
4255
* @param textStart the index into the text array of the start of the
4256
* paragraph.
4257
* @param embeddings an array containing embedding values for each character
4258
* in the paragraph. This can be null, in which case it is assumed
4259
* that there is no external embedding information.
4260
* @param embStart the index into the embedding array of the start of the
4261
* paragraph.
4262
* @param paragraphLength the length of the paragraph in the text and
4263
* embeddings arrays.
4264
* @param flags a collection of flags that control the algorithm. The
4265
* algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,
4266
* DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and
4267
* DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.
4268
*
4269
* @throws IllegalArgumentException if the values in embeddings are
4270
* not within the allowed range
4271
*
4272
* @see #DIRECTION_LEFT_TO_RIGHT
4273
* @see #DIRECTION_RIGHT_TO_LEFT
4274
* @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
4275
* @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
4276
* @stable ICU 3.8
4277
*/
4278
public BidiBase(char[] text,
4279
int textStart,
4280
byte[] embeddings,
4281
int embStart,
4282
int paragraphLength,
4283
int flags)
4284
{
4285
this(0, 0);
4286
byte paraLvl;
4287
switch (flags) {
4288
case Bidi.DIRECTION_LEFT_TO_RIGHT:
4289
default:
4290
paraLvl = LTR;
4291
break;
4292
case Bidi.DIRECTION_RIGHT_TO_LEFT:
4293
paraLvl = RTL;
4294
break;
4295
case Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT:
4296
paraLvl = LEVEL_DEFAULT_LTR;
4297
break;
4298
case Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT:
4299
paraLvl = LEVEL_DEFAULT_RTL;
4300
break;
4301
}
4302
byte[] paraEmbeddings;
4303
if (embeddings == null) {
4304
paraEmbeddings = null;
4305
} else {
4306
paraEmbeddings = new byte[paragraphLength];
4307
byte lev;
4308
for (int i = 0; i < paragraphLength; i++) {
4309
lev = embeddings[i + embStart];
4310
if (lev < 0) {
4311
lev = (byte)((- lev) | LEVEL_OVERRIDE);
4312
} else if (lev == 0) {
4313
lev = paraLvl;
4314
if (paraLvl > MAX_EXPLICIT_LEVEL) {
4315
lev &= 1;
4316
}
4317
}
4318
paraEmbeddings[i] = lev;
4319
}
4320
}
4321
4322
char[] paraText = new char[paragraphLength];
4323
System.arraycopy(text, textStart, paraText, 0, paragraphLength);
4324
setPara(paraText, paraLvl, paraEmbeddings);
4325
}
4326
4327
/**
4328
* Return true if the line is not left-to-right or right-to-left. This means
4329
* it either has mixed runs of left-to-right and right-to-left text, or the
4330
* base direction differs from the direction of the only run of text.
4331
*
4332
* @return true if the line is not left-to-right or right-to-left.
4333
*
4334
* @throws IllegalStateException if this call is not preceded by a successful
4335
* call to <code>setPara</code>
4336
* @stable ICU 3.8
4337
*/
4338
public boolean isMixed()
4339
{
4340
return (!isLeftToRight() && !isRightToLeft());
4341
}
4342
4343
/**
4344
* Return true if the line is all left-to-right text and the base direction
4345
* is left-to-right.
4346
*
4347
* @return true if the line is all left-to-right text and the base direction
4348
* is left-to-right.
4349
*
4350
* @throws IllegalStateException if this call is not preceded by a successful
4351
* call to <code>setPara</code>
4352
* @stable ICU 3.8
4353
*/
4354
public boolean isLeftToRight()
4355
{
4356
return (getDirection() == LTR && (paraLevel & 1) == 0);
4357
}
4358
4359
/**
4360
* Return true if the line is all right-to-left text, and the base direction
4361
* is right-to-left
4362
*
4363
* @return true if the line is all right-to-left text, and the base
4364
* direction is right-to-left
4365
*
4366
* @throws IllegalStateException if this call is not preceded by a successful
4367
* call to <code>setPara</code>
4368
* @stable ICU 3.8
4369
*/
4370
public boolean isRightToLeft()
4371
{
4372
return (getDirection() == RTL && (paraLevel & 1) == 1);
4373
}
4374
4375
/**
4376
* Return true if the base direction is left-to-right
4377
*
4378
* @return true if the base direction is left-to-right
4379
*
4380
* @throws IllegalStateException if this call is not preceded by a successful
4381
* call to <code>setPara</code> or <code>setLine</code>
4382
*
4383
* @stable ICU 3.8
4384
*/
4385
public boolean baseIsLeftToRight()
4386
{
4387
return (getParaLevel() == LTR);
4388
}
4389
4390
/**
4391
* Return the base level (0 if left-to-right, 1 if right-to-left).
4392
*
4393
* @return the base level
4394
*
4395
* @throws IllegalStateException if this call is not preceded by a successful
4396
* call to <code>setPara</code> or <code>setLine</code>
4397
*
4398
* @stable ICU 3.8
4399
*/
4400
public int getBaseLevel()
4401
{
4402
return getParaLevel();
4403
}
4404
4405
/**
4406
* Compute the logical to visual run mapping
4407
*/
4408
void getLogicalToVisualRunsMap()
4409
{
4410
if (isGoodLogicalToVisualRunsMap) {
4411
return;
4412
}
4413
int count = countRuns();
4414
if ((logicalToVisualRunsMap == null) ||
4415
(logicalToVisualRunsMap.length < count)) {
4416
logicalToVisualRunsMap = new int[count];
4417
}
4418
int i;
4419
long[] keys = new long[count];
4420
for (i = 0; i < count; i++) {
4421
keys[i] = ((long)(runs[i].start)<<32) + i;
4422
}
4423
Arrays.sort(keys);
4424
for (i = 0; i < count; i++) {
4425
logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF);
4426
}
4427
isGoodLogicalToVisualRunsMap = true;
4428
}
4429
4430
/**
4431
* Return the level of the nth logical run in this line.
4432
*
4433
* @param run the index of the run, between 0 and <code>countRuns()-1</code>
4434
*
4435
* @return the level of the run
4436
*
4437
* @throws IllegalStateException if this call is not preceded by a successful
4438
* call to <code>setPara</code> or <code>setLine</code>
4439
* @throws IllegalArgumentException if <code>run</code> is not in
4440
* the range <code>0&lt;=run&lt;countRuns()</code>
4441
* @stable ICU 3.8
4442
*/
4443
public int getRunLevel(int run)
4444
{
4445
verifyValidParaOrLine();
4446
BidiLine.getRuns(this);
4447
4448
// for backward compatibility
4449
if (run < 0 || run >= runCount) {
4450
return getParaLevel();
4451
}
4452
4453
getLogicalToVisualRunsMap();
4454
return runs[logicalToVisualRunsMap[run]].level;
4455
}
4456
4457
/**
4458
* Return the index of the character at the start of the nth logical run in
4459
* this line, as an offset from the start of the line.
4460
*
4461
* @param run the index of the run, between 0 and <code>countRuns()</code>
4462
*
4463
* @return the start of the run
4464
*
4465
* @throws IllegalStateException if this call is not preceded by a successful
4466
* call to <code>setPara</code> or <code>setLine</code>
4467
* @throws IllegalArgumentException if <code>run</code> is not in
4468
* the range <code>0&lt;=run&lt;countRuns()</code>
4469
* @stable ICU 3.8
4470
*/
4471
public int getRunStart(int run)
4472
{
4473
verifyValidParaOrLine();
4474
BidiLine.getRuns(this);
4475
4476
// for backward compatibility
4477
if (runCount == 1) {
4478
return 0;
4479
} else if (run == runCount) {
4480
return length;
4481
}
4482
4483
getLogicalToVisualRunsMap();
4484
return runs[logicalToVisualRunsMap[run]].start;
4485
}
4486
4487
/**
4488
* Return the index of the character past the end of the nth logical run in
4489
* this line, as an offset from the start of the line. For example, this
4490
* will return the length of the line for the last run on the line.
4491
*
4492
* @param run the index of the run, between 0 and <code>countRuns()</code>
4493
*
4494
* @return the limit of the run
4495
*
4496
* @throws IllegalStateException if this call is not preceded by a successful
4497
* call to <code>setPara</code> or <code>setLine</code>
4498
* @throws IllegalArgumentException if <code>run</code> is not in
4499
* the range <code>0&lt;=run&lt;countRuns()</code>
4500
* @stable ICU 3.8
4501
*/
4502
public int getRunLimit(int run)
4503
{
4504
verifyValidParaOrLine();
4505
BidiLine.getRuns(this);
4506
4507
// for backward compatibility
4508
if (runCount == 1) {
4509
return length;
4510
}
4511
4512
getLogicalToVisualRunsMap();
4513
int idx = logicalToVisualRunsMap[run];
4514
int len = idx == 0 ? runs[idx].limit :
4515
runs[idx].limit - runs[idx-1].limit;
4516
return runs[idx].start + len;
4517
}
4518
4519
/**
4520
* Return true if the specified text requires bidi analysis. If this returns
4521
* false, the text will display left-to-right. Clients can then avoid
4522
* constructing a Bidi object. Text in the Arabic Presentation Forms area of
4523
* Unicode is presumed to already be shaped and ordered for display, and so
4524
* will not cause this method to return true.
4525
*
4526
* @param text the text containing the characters to test
4527
* @param start the start of the range of characters to test
4528
* @param limit the limit of the range of characters to test
4529
*
4530
* @return true if the range of characters requires bidi analysis
4531
*
4532
* @stable ICU 3.8
4533
*/
4534
public static boolean requiresBidi(char[] text,
4535
int start,
4536
int limit)
4537
{
4538
final int RTLMask = (1 << R |
4539
1 << AL |
4540
1 << RLE |
4541
1 << RLO |
4542
1 << AN);
4543
4544
if (0 > start || start > limit || limit > text.length) {
4545
throw new IllegalArgumentException("Value start " + start +
4546
" is out of range 0 to " + limit + ", or limit " + limit +
4547
" is beyond the text length " + text.length);
4548
}
4549
4550
for (int i = start; i < limit; ++i) {
4551
if (Character.isHighSurrogate(text[i]) && i < (limit-1) &&
4552
Character.isLowSurrogate(text[i+1])) {
4553
if (((1 << UCharacter.getDirection(Character.codePointAt(text, i))) & RTLMask) != 0) {
4554
return true;
4555
}
4556
} else if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) {
4557
return true;
4558
}
4559
}
4560
4561
return false;
4562
}
4563
4564
/**
4565
* Reorder the objects in the array into visual order based on their levels.
4566
* This is a utility method to use when you have a collection of objects
4567
* representing runs of text in logical order, each run containing text at a
4568
* single level. The elements at <code>index</code> from
4569
* <code>objectStart</code> up to <code>objectStart + count</code> in the
4570
* objects array will be reordered into visual order assuming
4571
* each run of text has the level indicated by the corresponding element in
4572
* the levels array (at <code>index - objectStart + levelStart</code>).
4573
*
4574
* @param levels an array representing the bidi level of each object
4575
* @param levelStart the start position in the levels array
4576
* @param objects the array of objects to be reordered into visual order
4577
* @param objectStart the start position in the objects array
4578
* @param count the number of objects to reorder
4579
* @stable ICU 3.8
4580
*/
4581
public static void reorderVisually(byte[] levels,
4582
int levelStart,
4583
Object[] objects,
4584
int objectStart,
4585
int count)
4586
{
4587
// for backward compatibility
4588
if (0 > levelStart || levels.length <= levelStart) {
4589
throw new IllegalArgumentException("Value levelStart " +
4590
levelStart + " is out of range 0 to " +
4591
(levels.length-1));
4592
}
4593
if (0 > objectStart || objects.length <= objectStart) {
4594
throw new IllegalArgumentException("Value objectStart " +
4595
objectStart + " is out of range 0 to " +
4596
(objects.length-1));
4597
}
4598
if (0 > count || objects.length < (objectStart+count)) {
4599
throw new IllegalArgumentException("Value count " +
4600
count + " is less than zero, or objectStart + count" +
4601
" is beyond objects length " + objects.length);
4602
}
4603
4604
byte[] reorderLevels = new byte[count];
4605
System.arraycopy(levels, levelStart, reorderLevels, 0, count);
4606
int[] indexMap = reorderVisual(reorderLevels);
4607
Object[] temp = new Object[count];
4608
System.arraycopy(objects, objectStart, temp, 0, count);
4609
for (int i = 0; i < count; ++i) {
4610
objects[objectStart + i] = temp[indexMap[i]];
4611
}
4612
}
4613
4614
/**
4615
* Take a <code>Bidi</code> object containing the reordering
4616
* information for a piece of text (one or more paragraphs) set by
4617
* <code>setPara()</code> or for a line of text set by <code>setLine()</code>
4618
* and return a string containing the reordered text.
4619
*
4620
* <p>The text may have been aliased (only a reference was stored
4621
* without copying the contents), thus it must not have been modified
4622
* since the <code>setPara()</code> call.</p>
4623
*
4624
* This method preserves the integrity of characters with multiple
4625
* code units and (optionally) combining characters.
4626
* Characters in RTL runs can be replaced by mirror-image characters
4627
* in the returned string. Note that "real" mirroring has to be done in a
4628
* rendering engine by glyph selection and that for many "mirrored"
4629
* characters there are no Unicode characters as mirror-image equivalents.
4630
* There are also options to insert or remove Bidi control
4631
* characters; see the descriptions of the return value and the
4632
* <code>options</code> parameter, and of the option bit flags.
4633
*
4634
* @param options A bit set of options for the reordering that control
4635
* how the reordered text is written.
4636
* The options include mirroring the characters on a code
4637
* point basis and inserting LRM characters, which is used
4638
* especially for transforming visually stored text
4639
* to logically stored text (although this is still an
4640
* imperfect implementation of an "inverse Bidi" algorithm
4641
* because it uses the "forward Bidi" algorithm at its core).
4642
* The available options are:
4643
* <code>DO_MIRRORING</code>,
4644
* <code>INSERT_LRM_FOR_NUMERIC</code>,
4645
* <code>KEEP_BASE_COMBINING</code>,
4646
* <code>OUTPUT_REVERSE</code>,
4647
* <code>REMOVE_BIDI_CONTROLS</code>,
4648
* <code>STREAMING</code>
4649
*
4650
* @return The reordered text.
4651
* If the <code>INSERT_LRM_FOR_NUMERIC</code> option is set, then
4652
* the length of the returned string could be as large as
4653
* <code>getLength()+2*countRuns()</code>.<br>
4654
* If the <code>REMOVE_BIDI_CONTROLS</code> option is set, then the
4655
* length of the returned string may be less than
4656
* <code>getLength()</code>.<br>
4657
* If none of these options is set, then the length of the returned
4658
* string will be exactly <code>getProcessedLength()</code>.
4659
*
4660
* @throws IllegalStateException if this call is not preceded by a successful
4661
* call to <code>setPara</code> or <code>setLine</code>
4662
*
4663
* @see #DO_MIRRORING
4664
* @see #INSERT_LRM_FOR_NUMERIC
4665
* @see #KEEP_BASE_COMBINING
4666
* @see #OUTPUT_REVERSE
4667
* @see #REMOVE_BIDI_CONTROLS
4668
* @see #OPTION_STREAMING
4669
* @see #getProcessedLength
4670
* @stable ICU 3.8
4671
*/
4672
public String writeReordered(int options)
4673
{
4674
verifyValidParaOrLine();
4675
if (length == 0) {
4676
/* nothing to do */
4677
return "";
4678
}
4679
return BidiWriter.writeReordered(this, options);
4680
}
4681
4682
/**
4683
* Display the bidi internal state, used in debugging.
4684
*/
4685
public String toString() {
4686
StringBuilder buf = new StringBuilder(getClass().getName());
4687
4688
buf.append("[dir: ");
4689
buf.append(direction);
4690
buf.append(" baselevel: ");
4691
buf.append(paraLevel);
4692
buf.append(" length: ");
4693
buf.append(length);
4694
buf.append(" runs: ");
4695
if (levels == null) {
4696
buf.append("none");
4697
} else {
4698
buf.append('[');
4699
buf.append(levels[0]);
4700
for (int i = 1; i < levels.length; i++) {
4701
buf.append(' ');
4702
buf.append(levels[i]);
4703
}
4704
buf.append(']');
4705
}
4706
buf.append(" text: [0x");
4707
buf.append(Integer.toHexString(text[0]));
4708
for (int i = 1; i < text.length; i++) {
4709
buf.append(" 0x");
4710
buf.append(Integer.toHexString(text[i]));
4711
}
4712
buf.append("]]");
4713
4714
return buf.toString();
4715
}
4716
4717
/**
4718
* A class that provides access to constants defined by
4719
* java.awt.font.TextAttribute without creating a static dependency.
4720
*/
4721
private static class TextAttributeConstants {
4722
// Make sure to load the AWT's TextAttribute class before using the constants, if any.
4723
static {
4724
try {
4725
Class.forName("java.awt.font.TextAttribute", true, null);
4726
} catch (ClassNotFoundException e) {}
4727
}
4728
static final JavaAWTFontAccess jafa = SharedSecrets.getJavaAWTFontAccess();
4729
4730
/**
4731
* TextAttribute instances (or a fake Attribute type if
4732
* java.awt.font.TextAttribute is not present)
4733
*/
4734
static final AttributedCharacterIterator.Attribute RUN_DIRECTION =
4735
getTextAttribute("RUN_DIRECTION");
4736
static final AttributedCharacterIterator.Attribute NUMERIC_SHAPING =
4737
getTextAttribute("NUMERIC_SHAPING");
4738
static final AttributedCharacterIterator.Attribute BIDI_EMBEDDING =
4739
getTextAttribute("BIDI_EMBEDDING");
4740
4741
/**
4742
* TextAttribute.RUN_DIRECTION_LTR
4743
*/
4744
static final Boolean RUN_DIRECTION_LTR = (jafa == null) ?
4745
Boolean.FALSE : (Boolean)jafa.getTextAttributeConstant("RUN_DIRECTION_LTR");
4746
4747
@SuppressWarnings("serial")
4748
private static AttributedCharacterIterator.Attribute
4749
getTextAttribute(String name)
4750
{
4751
if (jafa == null) {
4752
// fake attribute
4753
return new AttributedCharacterIterator.Attribute(name) { };
4754
} else {
4755
return (AttributedCharacterIterator.Attribute)jafa.getTextAttributeConstant(name);
4756
}
4757
}
4758
}
4759
4760
/**
4761
* A class that provides access to java.awt.font.NumericShaper without
4762
* creating a static dependency.
4763
*/
4764
private static class NumericShapings {
4765
// Make sure to load the AWT's NumericShaper class before calling shape, if any.
4766
static {
4767
try {
4768
Class.forName("java.awt.font.NumericShaper", true, null);
4769
} catch (ClassNotFoundException e) {}
4770
}
4771
static final JavaAWTFontAccess jafa = SharedSecrets.getJavaAWTFontAccess();
4772
4773
/**
4774
* Invokes NumericShaping shape(text,start,count) method.
4775
*/
4776
static void shape(Object shaper, char[] text, int start, int count) {
4777
if (jafa != null) {
4778
jafa.shape(shaper, text, start, count);
4779
}
4780
}
4781
}
4782
4783
}
4784
4785