Path: blob/master/src/java.desktop/share/classes/sun/font/ScriptRun.java
41155 views
/*1* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.2*3* This code is free software; you can redistribute it and/or modify it4* under the terms of the GNU General Public License version 2 only, as5* published by the Free Software Foundation. Oracle designates this6* particular file as subject to the "Classpath" exception as provided7* by Oracle in the LICENSE file that accompanied this code.8*9* This code is distributed in the hope that it will be useful, but WITHOUT10* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or11* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License12* version 2 for more details (a copy is included in the LICENSE file that13* accompanied this code).14*15* You should have received a copy of the GNU General Public License version16* 2 along with this work; if not, write to the Free Software Foundation,17* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.18*19* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA20* or visit www.oracle.com if you need additional information or have any21* questions.22*23*/2425/*26*******************************************************************************27*28* Copyright (C) 1999-2003, International Business Machines29* Corporation and others. All Rights Reserved.30*31*******************************************************************************32*/3334package sun.font;3536/**37* {@code ScriptRun} is used to find runs of characters in38* the same script, as defined in the {@code Script} class.39* It implements a simple iterator over an array of characters.40* The iterator will assign {@code COMMON} and {@code INHERITED}41* characters to the same script as the preceding characters. If the42* COMMON and INHERITED characters are first, they will be assigned to43* the same script as the following characters.44*45* The iterator will try to match paired punctuation. If it sees an46* opening punctuation character, it will remember the script that47* was assigned to that character, and assign the same script to the48* matching closing punctuation.49*50* No attempt is made to combine related scripts into a single run. In51* particular, Hiragana, Katakana, and Han characters will appear in seperate52* runs.5354* Here is an example of how to iterate over script runs:55* <pre>56* void printScriptRuns(char[] text)57* {58* ScriptRun scriptRun = new ScriptRun(text, 0, text.length);59*60* while (scriptRun.next()) {61* int start = scriptRun.getScriptStart();62* int limit = scriptRun.getScriptLimit();63* int script = scriptRun.getScriptCode();64*65* System.out.println("Script \"" + Script.getName(script) + "\" from " +66* start + " to " + limit + ".");67* }68* }69* </pre>70*71*/72public final class ScriptRun73{74private char[] text; // fixed once set by constructor75private int textStart;76private int textLimit;7778private int scriptStart; // change during iteration79private int scriptLimit;80private int scriptCode;8182private int[] stack; // stack used to handle paired punctuation if encountered83private int parenSP;8485public ScriptRun() {86// must call init later or we die.87}8889/**90* Construct a {@code ScriptRun} object which iterates over a subrange91* of the given characetrs.92*93* @param chars the array of characters over which to iterate.94* @param start the index of the first character over which to iterate95* @param count the number of characters over which to iterate96*/97public ScriptRun(char[] chars, int start, int count)98{99init(chars, start, count);100}101102public void init(char[] chars, int start, int count)103{104if (chars == null || start < 0 || count < 0 || count > chars.length - start) {105throw new IllegalArgumentException();106}107108text = chars;109textStart = start;110textLimit = start + count;111112scriptStart = textStart;113scriptLimit = textStart;114scriptCode = Script.INVALID_CODE;115parenSP = 0;116}117118/**119* Get the starting index of the current script run.120*121* @return the index of the first character in the current script run.122*/123public int getScriptStart() {124return scriptStart;125}126127/**128* Get the index of the first character after the current script run.129*130* @return the index of the first character after the current script run.131*/132public int getScriptLimit() {133return scriptLimit;134}135136/**137* Get the script code for the script of the current script run.138*139* @return the script code for the script of the current script run.140* @see Script141*/142public int getScriptCode() {143return scriptCode;144}145146/**147* Find the next script run. Returns {@code false} if there148* isn't another run, returns {@code true} if there is.149*150* @return {@code false} if there isn't another run, {@code true} if there is.151*/152public boolean next() {153int startSP = parenSP; // used to find the first new open character154155// if we've fallen off the end of the text, we're done156if (scriptLimit >= textLimit) {157return false;158}159160scriptCode = Script.COMMON;161scriptStart = scriptLimit;162163int ch;164165while ((ch = nextCodePoint()) != DONE) {166int sc = ScriptRunData.getScript(ch);167int pairIndex = sc == Script.COMMON ? getPairIndex(ch) : -1;168169// Paired character handling:170//171// if it's an open character, push it onto the stack.172// if it's a close character, find the matching open on the173// stack, and use that script code. Any non-matching open174// characters above it on the stack will be popped.175if (pairIndex >= 0) {176if ((pairIndex & 1) == 0) {177if (stack == null) {178stack = new int[32];179} else if (parenSP == stack.length) {180int[] newstack = new int[stack.length + 32];181System.arraycopy(stack, 0, newstack, 0, stack.length);182stack = newstack;183}184185stack[parenSP++] = pairIndex;186stack[parenSP++] = scriptCode;187} else if (parenSP > 0) {188int pi = pairIndex & ~1;189190while ((parenSP -= 2) >= 0 && stack[parenSP] != pi);191192if (parenSP >= 0) {193sc = stack[parenSP+1];194} else {195parenSP = 0;196}197if (parenSP < startSP) {198startSP = parenSP;199}200}201}202203if (sameScript(scriptCode, sc)) {204if (scriptCode <= Script.INHERITED && sc > Script.INHERITED) {205scriptCode = sc;206207// now that we have a final script code, fix any open208// characters we pushed before we knew the script code.209while (startSP < parenSP) {210stack[startSP+1] = scriptCode;211startSP += 2;212}213}214215// if this character is a close paired character,216// pop it from the stack217if (pairIndex > 0 && (pairIndex & 1) != 0 && parenSP > 0) {218parenSP -= 2;219}220} else {221// We've just seen the first character of222// the next run. Back over it so we'll see223// it again the next time.224pushback(ch);225226// we're outta here227break;228}229}230231return true;232}233234static final int SURROGATE_START = 0x10000;235static final int LEAD_START = 0xd800;236static final int LEAD_LIMIT = 0xdc00;237static final int TAIL_START = 0xdc00;238static final int TAIL_LIMIT = 0xe000;239static final int LEAD_SURROGATE_SHIFT = 10;240static final int SURROGATE_OFFSET = SURROGATE_START - (LEAD_START << LEAD_SURROGATE_SHIFT) - TAIL_START;241242static final int DONE = -1;243244private int nextCodePoint() {245if (scriptLimit >= textLimit) {246return DONE;247}248int ch = text[scriptLimit++];249if (ch >= LEAD_START && ch < LEAD_LIMIT && scriptLimit < textLimit) {250int nch = text[scriptLimit];251if (nch >= TAIL_START && nch < TAIL_LIMIT) {252++scriptLimit;253ch = (ch << LEAD_SURROGATE_SHIFT) + nch + SURROGATE_OFFSET;254}255}256return ch;257}258259private void pushback(int ch) {260if (ch >= 0) {261if (ch >= 0x10000) {262scriptLimit -= 2;263} else {264scriptLimit -= 1;265}266}267}268269/**270* Compare two script codes to see if they are in the same script. If one script is271* a strong script, and the other is INHERITED or COMMON, it will compare equal.272*273* @param scriptOne one of the script codes.274* @param scriptTwo the other script code.275* @return {@code true} if the two scripts are the same.276* @see Script277*/278private static boolean sameScript(int scriptOne, int scriptTwo) {279return scriptOne == scriptTwo || scriptOne <= Script.INHERITED || scriptTwo <= Script.INHERITED;280}281282/**283* Find the highest bit that's set in a word. Uses a binary search through284* the bits.285*286* @param n the word in which to find the highest bit that's set.287* @return the bit number (counting from the low order bit) of the highest bit.288*/289private static byte highBit(int n)290{291if (n <= 0) {292return -32;293}294295byte bit = 0;296297if (n >= 1 << 16) {298n >>= 16;299bit += 16;300}301302if (n >= 1 << 8) {303n >>= 8;304bit += 8;305}306307if (n >= 1 << 4) {308n >>= 4;309bit += 4;310}311312if (n >= 1 << 2) {313n >>= 2;314bit += 2;315}316317if (n >= 1 << 1) {318n >>= 1;319bit += 1;320}321322return bit;323}324325/**326* Search the pairedChars array for the given character.327*328* @param ch the character for which to search.329* @return the index of the character in the table, or -1 if it's not there.330*/331private static int getPairIndex(int ch)332{333int probe = pairedCharPower;334int index = 0;335336if (ch >= pairedChars[pairedCharExtra]) {337index = pairedCharExtra;338}339340while (probe > (1 << 0)) {341probe >>= 1;342343if (ch >= pairedChars[index + probe]) {344index += probe;345}346}347348if (pairedChars[index] != ch) {349index = -1;350}351352return index;353}354355// all common356private static int[] pairedChars = {3570x0028, 0x0029, // ascii paired punctuation // common3580x003c, 0x003e, // common3590x005b, 0x005d, // common3600x007b, 0x007d, // common3610x00ab, 0x00bb, // guillemets // common3620x2018, 0x2019, // general punctuation // common3630x201c, 0x201d, // common3640x2039, 0x203a, // common3650x3008, 0x3009, // chinese paired punctuation // common3660x300a, 0x300b,3670x300c, 0x300d,3680x300e, 0x300f,3690x3010, 0x3011,3700x3014, 0x3015,3710x3016, 0x3017,3720x3018, 0x3019,3730x301a, 0x301b374};375376private static final int pairedCharPower = 1 << highBit(pairedChars.length);377private static final int pairedCharExtra = pairedChars.length - pairedCharPower;378379}380381382