Path: blob/master/src/java.base/share/classes/jdk/internal/icu/text/BidiWriter.java
41161 views
/*1* Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425/*26*******************************************************************************27* Copyright (C) 2001-2010, International Business Machines28* Corporation and others. All Rights Reserved.29*******************************************************************************30*/31/* Written by Simon Montagu, Matitiahu Allouche32* (ported from C code written by Markus W. Scherer)33*/3435package jdk.internal.icu.text;3637import jdk.internal.icu.lang.UCharacter;3839final class BidiWriter {4041/** Bidi control code points */42static final char LRM_CHAR = 0x200e;43static final char RLM_CHAR = 0x200f;44static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |451 << UCharacter.RIGHT_TO_LEFT_ARABIC);4647private static boolean IsCombining(int type) {48return ((1<<type &49(1<<UCharacter.NON_SPACING_MARK |501<<UCharacter.COMBINING_SPACING_MARK |511<<UCharacter.ENCLOSING_MARK)) != 0);52}5354/*55* When we have OUTPUT_REVERSE set on writeReordered(), then we56* semantically write RTL runs in reverse and later reverse them again.57* Instead, we actually write them in forward order to begin with.58* However, if the RTL run was to be mirrored, we need to mirror here now59* since the implicit second reversal must not do it.60* It looks strange to do mirroring in LTR output, but it is only because61* we are writing RTL output in reverse.62*/63private static String doWriteForward(String src, int options) {64/* optimize for several combinations of options */65switch(options&(BidiBase.REMOVE_BIDI_CONTROLS|BidiBase.DO_MIRRORING)) {66case 0: {67/* simply return the LTR run */68return src;69}70case BidiBase.DO_MIRRORING: {71StringBuffer dest = new StringBuffer(src.length());7273/* do mirroring */74int i=0;75int c;7677do {78c = UTF16.charAt(src, i);79i += UTF16.getCharCount(c);80UTF16.append(dest, UCharacter.getMirror(c));81} while(i < src.length());82return dest.toString();83}84case BidiBase.REMOVE_BIDI_CONTROLS: {85StringBuilder dest = new StringBuilder(src.length());8687/* copy the LTR run and remove any Bidi control characters */88int i = 0;89char c;90do {91c = src.charAt(i++);92if(!BidiBase.IsBidiControlChar(c)) {93dest.append(c);94}95} while(i < src.length());96return dest.toString();97}98default: {99StringBuffer dest = new StringBuffer(src.length());100101/* remove Bidi control characters and do mirroring */102int i = 0;103int c;104do {105c = UTF16.charAt(src, i);106i += UTF16.getCharCount(c);107if(!BidiBase.IsBidiControlChar(c)) {108UTF16.append(dest, UCharacter.getMirror(c));109}110} while(i < src.length());111return dest.toString();112}113} /* end of switch */114}115116private static String doWriteForward(char[] text, int start, int limit,117int options) {118return doWriteForward(new String(text, start, limit - start), options);119}120121static String writeReverse(String src, int options) {122/*123* RTL run -124*125* RTL runs need to be copied to the destination in reverse order126* of code points, not code units, to keep Unicode characters intact.127*128* The general strategy for this is to read the source text129* in backward order, collect all code units for a code point130* (and optionally following combining characters, see below),131* and copy all these code units in ascending order132* to the destination for this run.133*134* Several options request whether combining characters135* should be kept after their base characters,136* whether Bidi control characters should be removed, and137* whether characters should be replaced by their mirror-image138* equivalent Unicode characters.139*/140StringBuffer dest = new StringBuffer(src.length());141142/* optimize for several combinations of options */143switch (options &144(BidiBase.REMOVE_BIDI_CONTROLS |145BidiBase.DO_MIRRORING |146BidiBase.KEEP_BASE_COMBINING)) {147148case 0:149/*150* With none of the "complicated" options set, the destination151* run will have the same length as the source run,152* and there is no mirroring and no keeping combining characters153* with their base characters.154*155* XXX: or dest = UTF16.reverse(new StringBuffer(src));156*/157158int srcLength = src.length();159160/* preserve character integrity */161do {162/* i is always after the last code unit known to need to be kept163* in this segment */164int i = srcLength;165166/* collect code units for one base character */167srcLength -= UTF16.getCharCount(UTF16.charAt(src,168srcLength - 1));169170/* copy this base character */171dest.append(src.substring(srcLength, i));172} while(srcLength > 0);173break;174175case BidiBase.KEEP_BASE_COMBINING:176/*177* Here, too, the destination178* run will have the same length as the source run,179* and there is no mirroring.180* We do need to keep combining characters with their base181* characters.182*/183srcLength = src.length();184185/* preserve character integrity */186do {187/* i is always after the last code unit known to need to be kept188* in this segment */189int c;190int i = srcLength;191192/* collect code units and modifier letters for one base193* character */194do {195c = UTF16.charAt(src, srcLength - 1);196srcLength -= UTF16.getCharCount(c);197} while(srcLength > 0 && IsCombining(UCharacter.getType(c)));198199/* copy this "user character" */200dest.append(src.substring(srcLength, i));201} while(srcLength > 0);202break;203204default:205/*206* With several "complicated" options set, this is the most207* general and the slowest copying of an RTL run.208* We will do mirroring, remove Bidi controls, and209* keep combining characters with their base characters210* as requested.211*/212srcLength = src.length();213214/* preserve character integrity */215do {216/* i is always after the last code unit known to need to be kept217* in this segment */218int i = srcLength;219220/* collect code units for one base character */221int c = UTF16.charAt(src, srcLength - 1);222srcLength -= UTF16.getCharCount(c);223if ((options & BidiBase.KEEP_BASE_COMBINING) != 0) {224/* collect modifier letters for this base character */225while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {226c = UTF16.charAt(src, srcLength - 1);227srcLength -= UTF16.getCharCount(c);228}229}230231if ((options & BidiBase.REMOVE_BIDI_CONTROLS) != 0 &&232BidiBase.IsBidiControlChar(c)) {233/* do not copy this Bidi control character */234continue;235}236237/* copy this "user character" */238int j = srcLength;239if((options & BidiBase.DO_MIRRORING) != 0) {240/* mirror only the base character */241c = UCharacter.getMirror(c);242UTF16.append(dest, c);243j += UTF16.getCharCount(c);244}245dest.append(src.substring(j, i));246} while(srcLength > 0);247break;248} /* end of switch */249250return dest.toString();251}252253static String doWriteReverse(char[] text, int start, int limit, int options) {254return writeReverse(new String(text, start, limit - start), options);255}256257static String writeReordered(BidiBase bidi, int options) {258int run, runCount;259StringBuilder dest;260char[] text = bidi.text;261runCount = bidi.countRuns();262263/*264* Option "insert marks" implies BidiBase.INSERT_LRM_FOR_NUMERIC if the265* reordering mode (checked below) is appropriate.266*/267if ((bidi.reorderingOptions & BidiBase.OPTION_INSERT_MARKS) != 0) {268options |= BidiBase.INSERT_LRM_FOR_NUMERIC;269options &= ~BidiBase.REMOVE_BIDI_CONTROLS;270}271/*272* Option "remove controls" implies BidiBase.REMOVE_BIDI_CONTROLS273* and cancels BidiBase.INSERT_LRM_FOR_NUMERIC.274*/275if ((bidi.reorderingOptions & BidiBase.OPTION_REMOVE_CONTROLS) != 0) {276options |= BidiBase.REMOVE_BIDI_CONTROLS;277options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC;278}279/*280* If we do not perform the "inverse Bidi" algorithm, then we281* don't need to insert any LRMs, and don't need to test for it.282*/283if ((bidi.reorderingMode != BidiBase.REORDER_INVERSE_NUMBERS_AS_L) &&284(bidi.reorderingMode != BidiBase.REORDER_INVERSE_LIKE_DIRECT) &&285(bidi.reorderingMode != BidiBase.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&286(bidi.reorderingMode != BidiBase.REORDER_RUNS_ONLY)) {287options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC;288}289dest = new StringBuilder((options & BidiBase.INSERT_LRM_FOR_NUMERIC) != 0 ?290bidi.length * 2 : bidi.length);291/*292* Iterate through all visual runs and copy the run text segments to293* the destination, according to the options.294*295* The tests for where to insert LRMs ignore the fact that there may be296* BN codes or non-BMP code points at the beginning and end of a run;297* they may insert LRMs unnecessarily but the tests are faster this way298* (this would have to be improved for UTF-8).299*/300if ((options & BidiBase.OUTPUT_REVERSE) == 0) {301/* forward output */302if ((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) {303/* do not insert Bidi controls */304for (run = 0; run < runCount; ++run) {305BidiRun bidiRun = bidi.getVisualRun(run);306if (bidiRun.isEvenRun()) {307dest.append(doWriteForward(text, bidiRun.start,308bidiRun.limit,309options & ~BidiBase.DO_MIRRORING));310} else {311dest.append(doWriteReverse(text, bidiRun.start,312bidiRun.limit, options));313}314}315} else {316/* insert Bidi controls for "inverse Bidi" */317byte[] dirProps = bidi.dirProps;318char uc;319int markFlag;320321for (run = 0; run < runCount; ++run) {322BidiRun bidiRun = bidi.getVisualRun(run);323markFlag=0;324/* check if something relevant in insertPoints */325markFlag = bidi.runs[run].insertRemove;326if (markFlag < 0) { /* bidi controls count */327markFlag = 0;328}329if (bidiRun.isEvenRun()) {330if (bidi.isInverse() &&331dirProps[bidiRun.start] != BidiBase.L) {332markFlag |= BidiBase.LRM_BEFORE;333}334if ((markFlag & BidiBase.LRM_BEFORE) != 0) {335uc = LRM_CHAR;336} else if ((markFlag & BidiBase.RLM_BEFORE) != 0) {337uc = RLM_CHAR;338} else {339uc = 0;340}341if (uc != 0) {342dest.append(uc);343}344dest.append(doWriteForward(text,345bidiRun.start, bidiRun.limit,346options & ~BidiBase.DO_MIRRORING));347348if (bidi.isInverse() &&349dirProps[bidiRun.limit - 1] != BidiBase.L) {350markFlag |= BidiBase.LRM_AFTER;351}352if ((markFlag & BidiBase.LRM_AFTER) != 0) {353uc = LRM_CHAR;354} else if ((markFlag & BidiBase.RLM_AFTER) != 0) {355uc = RLM_CHAR;356} else {357uc = 0;358}359if (uc != 0) {360dest.append(uc);361}362} else { /* RTL run */363if (bidi.isInverse() &&364!bidi.testDirPropFlagAt(MASK_R_AL,365bidiRun.limit - 1)) {366markFlag |= BidiBase.RLM_BEFORE;367}368if ((markFlag & BidiBase.LRM_BEFORE) != 0) {369uc = LRM_CHAR;370} else if ((markFlag & BidiBase.RLM_BEFORE) != 0) {371uc = RLM_CHAR;372} else {373uc = 0;374}375if (uc != 0) {376dest.append(uc);377}378dest.append(doWriteReverse(text, bidiRun.start,379bidiRun.limit, options));380381if(bidi.isInverse() &&382(MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) {383markFlag |= BidiBase.RLM_AFTER;384}385if ((markFlag & BidiBase.LRM_AFTER) != 0) {386uc = LRM_CHAR;387} else if ((markFlag & BidiBase.RLM_AFTER) != 0) {388uc = RLM_CHAR;389} else {390uc = 0;391}392if (uc != 0) {393dest.append(uc);394}395}396}397}398} else {399/* reverse output */400if((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) {401/* do not insert Bidi controls */402for(run = runCount; --run >= 0; ) {403BidiRun bidiRun = bidi.getVisualRun(run);404if (bidiRun.isEvenRun()) {405dest.append(doWriteReverse(text,406bidiRun.start, bidiRun.limit,407options & ~BidiBase.DO_MIRRORING));408} else {409dest.append(doWriteForward(text, bidiRun.start,410bidiRun.limit, options));411}412}413} else {414/* insert Bidi controls for "inverse Bidi" */415416byte[] dirProps = bidi.dirProps;417418for (run = runCount; --run >= 0; ) {419/* reverse output */420BidiRun bidiRun = bidi.getVisualRun(run);421if (bidiRun.isEvenRun()) {422if (dirProps[bidiRun.limit - 1] != BidiBase.L) {423dest.append(LRM_CHAR);424}425426dest.append(doWriteReverse(text, bidiRun.start,427bidiRun.limit, options & ~BidiBase.DO_MIRRORING));428429if (dirProps[bidiRun.start] != BidiBase.L) {430dest.append(LRM_CHAR);431}432} else {433if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) {434dest.append(RLM_CHAR);435}436437dest.append(doWriteForward(text, bidiRun.start,438bidiRun.limit, options));439440if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {441dest.append(RLM_CHAR);442}443}444}445}446}447448return dest.toString();449}450}451452453