Path: blob/master/test/jdk/java/text/BreakIterator/BreakIteratorTest.java
41152 views
/*1* Copyright (c) 1996, 2021, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*/2223/*24* @test25* @bug 4035266 4052418 4068133 4068137 4068139 4086052 4095322 409777926* 4097920 4098467 4111338 4113835 4117554 4143071 4146175 415211727* 4152416 4153072 4158381 4214367 4217703 4638433 826476528* @library /java/text/testlib29* @run main/timeout=2000 BreakIteratorTest30* @summary test BreakIterator31*/3233/*34*35*36* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved37* (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved38*39* Portions copyright (c) 2007 Sun Microsystems, Inc.40* All Rights Reserved.41*42* The original version of this source code and documentation43* is copyrighted and owned by Taligent, Inc., a wholly-owned44* subsidiary of IBM. These materials are provided under terms45* of a License Agreement between Taligent and Sun. This technology46* is protected by multiple US and International patents.47*48* This notice and attribution to Taligent may not be removed.49* Taligent is a registered trademark of Taligent, Inc.50*51* Permission to use, copy, modify, and distribute this software52* and its documentation for NON-COMMERCIAL purposes and without53* fee is hereby granted provided that this copyright notice54* appears in all copies. Please refer to the file "copyright.html"55* for further important copyright and licensing information.56*57* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF58* THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED59* TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A60* PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR61* ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR62* DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.63*64*/6566import java.text.BreakIterator;67import java.text.CharacterIterator;68import java.text.StringCharacterIterator;69import java.util.Locale;70import java.util.Vector;71import java.util.Enumeration;72import java.io.*;7374public class BreakIteratorTest extends IntlTest75{76private BreakIterator characterBreak;77private BreakIterator wordBreak;78private BreakIterator lineBreak;79private BreakIterator sentenceBreak;8081public static void main(String[] args) throws Exception {82new BreakIteratorTest().run(args);83}8485public BreakIteratorTest()86{87characterBreak = BreakIterator.getCharacterInstance();88wordBreak = BreakIterator.getWordInstance();89lineBreak = BreakIterator.getLineInstance();90sentenceBreak = BreakIterator.getSentenceInstance();91}9293//=========================================================================94// general test subroutines95//=========================================================================9697private void generalIteratorTest(BreakIterator bi, Vector expectedResult) {98StringBuffer buffer = new StringBuffer();99String text;100for (int i = 0; i < expectedResult.size(); i++) {101text = (String)expectedResult.elementAt(i);102buffer.append(text);103}104text = buffer.toString();105106bi.setText(text);107108Vector nextResults = testFirstAndNext(bi, text);109Vector previousResults = testLastAndPrevious(bi, text);110111logln("comparing forward and backward...");112int errs = getErrorCount();113compareFragmentLists("forward iteration", "backward iteration", nextResults,114previousResults);115if (getErrorCount() == errs) {116logln("comparing expected and actual...");117compareFragmentLists("expected result", "actual result", expectedResult,118nextResults);119}120121int[] boundaries = new int[expectedResult.size() + 3];122boundaries[0] = BreakIterator.DONE;123boundaries[1] = 0;124for (int i = 0; i < expectedResult.size(); i++)125boundaries[i + 2] = boundaries[i + 1] + ((String)expectedResult.elementAt(i)).126length();127boundaries[boundaries.length - 1] = BreakIterator.DONE;128129testFollowing(bi, text, boundaries);130testPreceding(bi, text, boundaries);131testIsBoundary(bi, text, boundaries);132133doMultipleSelectionTest(bi, text);134}135136private Vector testFirstAndNext(BreakIterator bi, String text) {137int p = bi.first();138int lastP = p;139Vector<String> result = new Vector<String>();140141if (p != 0)142errln("first() returned " + p + " instead of 0");143while (p != BreakIterator.DONE) {144p = bi.next();145if (p != BreakIterator.DONE) {146if (p <= lastP)147errln("next() failed to move forward: next() on position "148+ lastP + " yielded " + p);149150result.addElement(text.substring(lastP, p));151}152else {153if (lastP != text.length())154errln("next() returned DONE prematurely: offset was "155+ lastP + " instead of " + text.length());156}157lastP = p;158}159return result;160}161162private Vector testLastAndPrevious(BreakIterator bi, String text) {163int p = bi.last();164int lastP = p;165Vector<String> result = new Vector<String>();166167if (p != text.length())168errln("last() returned " + p + " instead of " + text.length());169while (p != BreakIterator.DONE) {170p = bi.previous();171if (p != BreakIterator.DONE) {172if (p >= lastP)173errln("previous() failed to move backward: previous() on position "174+ lastP + " yielded " + p);175176result.insertElementAt(text.substring(p, lastP), 0);177}178else {179if (lastP != 0)180errln("previous() returned DONE prematurely: offset was "181+ lastP + " instead of 0");182}183lastP = p;184}185return result;186}187188private void compareFragmentLists(String f1Name, String f2Name, Vector f1, Vector f2) {189int p1 = 0;190int p2 = 0;191String s1;192String s2;193int t1 = 0;194int t2 = 0;195196while (p1 < f1.size() && p2 < f2.size()) {197s1 = (String)f1.elementAt(p1);198s2 = (String)f2.elementAt(p2);199t1 += s1.length();200t2 += s2.length();201202if (s1.equals(s2)) {203debugLogln(" >" + s1 + "<");204++p1;205++p2;206}207else {208int tempT1 = t1;209int tempT2 = t2;210int tempP1 = p1;211int tempP2 = p2;212213while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {214while (tempT1 < tempT2 && tempP1 < f1.size()) {215tempT1 += ((String)f1.elementAt(tempP1)).length();216++tempP1;217}218while (tempT2 < tempT1 && tempP2 < f2.size()) {219tempT2 += ((String)f2.elementAt(tempP2)).length();220++tempP2;221}222}223logln("*** " + f1Name + " has:");224while (p1 <= tempP1 && p1 < f1.size()) {225s1 = (String)f1.elementAt(p1);226t1 += s1.length();227debugLogln(" *** >" + s1 + "<");228++p1;229}230logln("***** " + f2Name + " has:");231while (p2 <= tempP2 && p2 < f2.size()) {232s2 = (String)f2.elementAt(p2);233t2 += s2.length();234debugLogln(" ***** >" + s2 + "<");235++p2;236}237errln("Discrepancy between " + f1Name + " and " + f2Name + "\n---\n" + f1 +"\n---\n" + f2);238}239}240}241242private void testFollowing(BreakIterator bi, String text, int[] boundaries) {243logln("testFollowing():");244int p = 2;245int i = 0;246try {247for (i = 0; i <= text.length(); i++) { // change to <= when new BI code goes in248if (i == boundaries[p])249++p;250251int b = bi.following(i);252logln("bi.following(" + i + ") -> " + b);253if (b != boundaries[p])254errln("Wrong result from following() for " + i + ": expected " + boundaries[p]255+ ", got " + b);256}257} catch (IllegalArgumentException illargExp) {258errln("IllegalArgumentException caught from following() for offset: " + i);259}260}261262private void testPreceding(BreakIterator bi, String text, int[] boundaries) {263logln("testPreceding():");264int p = 0;265int i = 0;266try {267for (i = 0; i <= text.length(); i++) { // change to <= when new BI code goes in268int b = bi.preceding(i);269logln("bi.preceding(" + i + ") -> " + b);270if (b != boundaries[p])271errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]272+ ", got " + b);273274if (i == boundaries[p + 1])275++p;276}277} catch (IllegalArgumentException illargExp) {278errln("IllegalArgumentException caught from preceding() for offset: " + i);279}280}281282private void testIsBoundary(BreakIterator bi, String text, int[] boundaries) {283logln("testIsBoundary():");284int p = 1;285boolean isB;286for (int i = 0; i <= text.length(); i++) { // change to <= when new BI code goes in287isB = bi.isBoundary(i);288logln("bi.isBoundary(" + i + ") -> " + isB);289290if (i == boundaries[p]) {291if (!isB)292errln("Wrong result from isBoundary() for " + i + ": expected true, got false");293++p;294}295else {296if (isB)297errln("Wrong result from isBoundary() for " + i + ": expected false, got true");298}299}300}301302private void doMultipleSelectionTest(BreakIterator iterator, String testText)303{304logln("Multiple selection test...");305BreakIterator testIterator = (BreakIterator)iterator.clone();306int offset = iterator.first();307int testOffset;308int count = 0;309310do {311testOffset = testIterator.first();312testOffset = testIterator.next(count);313logln("next(" + count + ") -> " + testOffset);314if (offset != testOffset)315errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);316317if (offset != BreakIterator.DONE) {318count++;319offset = iterator.next();320}321} while (offset != BreakIterator.DONE);322323// now do it backwards...324offset = iterator.last();325count = 0;326327do {328testOffset = testIterator.last();329testOffset = testIterator.next(count);330logln("next(" + count + ") -> " + testOffset);331if (offset != testOffset)332errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);333334if (offset != BreakIterator.DONE) {335count--;336offset = iterator.previous();337}338} while (offset != BreakIterator.DONE);339}340341private void doBreakInvariantTest(BreakIterator tb, String testChars)342{343StringBuffer work = new StringBuffer("aaa");344int errorCount = 0;345346// a break should always occur after CR (unless followed by LF), LF, PS, and LS347String breaks = /*"\r\n\u2029\u2028"*/"\n\u2029\u2028";348// change this back when new BI code is added349350for (int i = 0; i < breaks.length(); i++) {351work.setCharAt(1, breaks.charAt(i));352for (int j = 0; j < testChars.length(); j++) {353work.setCharAt(0, testChars.charAt(j));354for (int k = 0; k < testChars.length(); k++) {355char c = testChars.charAt(k);356357// if a cr is followed by lf, don't do the check (they stay together)358if (work.charAt(1) == '\r' && (c == '\n'))359continue;360361// CONTROL (Cc) and FORMAT (Cf) Characters are to be ignored362// for breaking purposes as per UTR14363int type1 = Character.getType(work.charAt(1));364int type2 = Character.getType(c);365if (type1 == Character.CONTROL || type1 == Character.FORMAT ||366type2 == Character.CONTROL || type2 == Character.FORMAT) {367continue;368}369370work.setCharAt(2, c);371tb.setText(work.toString());372boolean seen2 = false;373for (int l = tb.first(); l != BreakIterator.DONE; l = tb.next()) {374if (l == 2)375seen2 = true;376}377if (!seen2) {378errln("No break between U+" + Integer.toHexString((int)(work.charAt(1)))379+ " and U+" + Integer.toHexString((int)(work.charAt(2))));380errorCount++;381if (errorCount >= 75)382return;383}384}385}386}387}388389private void doOtherInvariantTest(BreakIterator tb, String testChars)390{391StringBuffer work = new StringBuffer("a\r\na");392int errorCount = 0;393394// a break should never occur between CR and LF395for (int i = 0; i < testChars.length(); i++) {396work.setCharAt(0, testChars.charAt(i));397for (int j = 0; j < testChars.length(); j++) {398work.setCharAt(3, testChars.charAt(j));399tb.setText(work.toString());400for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())401if (k == 2) {402errln("Break between CR and LF in string U+" + Integer.toHexString(403(int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(404(int)(work.charAt(3))));405errorCount++;406if (errorCount >= 75)407return;408}409}410}411412// a break should never occur before a non-spacing mark, unless it's preceded413// by a line terminator414work.setLength(0);415work.append("aaaa");416for (int i = 0; i < testChars.length(); i++) {417char c = testChars.charAt(i);418if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')419continue;420work.setCharAt(1, c);421for (int j = 0; j < testChars.length(); j++) {422c = testChars.charAt(j);423if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c)424!= Character.ENCLOSING_MARK)425continue;426work.setCharAt(2, c);427428// CONTROL (Cc) and FORMAT (Cf) Characters are to be ignored429// for breaking purposes as per UTR14430int type1 = Character.getType(work.charAt(1));431int type2 = Character.getType(work.charAt(2));432if (type1 == Character.CONTROL || type1 == Character.FORMAT ||433type2 == Character.CONTROL || type2 == Character.FORMAT) {434continue;435}436437tb.setText(work.toString());438for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())439if (k == 2) {440errln("Break between U+" + Integer.toHexString((int)(work.charAt(1)))441+ " and U+" + Integer.toHexString((int)(work.charAt(2))));442errorCount++;443if (errorCount >= 75)444return;445}446}447}448}449450public void debugLogln(String s) {451final String zeros = "0000";452String temp;453StringBuffer out = new StringBuffer();454for (int i = 0; i < s.length(); i++) {455char c = s.charAt(i);456if (c >= ' ' && c < '\u007f')457out.append(c);458else {459out.append("\\u");460temp = Integer.toHexString((int)c);461out.append(zeros.substring(0, 4 - temp.length()));462out.append(temp);463}464}465logln(out.toString());466}467468//=========================================================================469// tests470//=========================================================================471472public void TestWordBreak() {473474Vector<String> wordSelectionData = new Vector<String>();475476wordSelectionData.addElement("12,34");477478wordSelectionData.addElement(" ");479wordSelectionData.addElement("\u00A2"); //cent sign480wordSelectionData.addElement("\u00A3"); //pound sign481wordSelectionData.addElement("\u00A4"); //currency sign482wordSelectionData.addElement("\u00A5"); //yen sign483wordSelectionData.addElement("alpha-beta-gamma");484wordSelectionData.addElement(".");485wordSelectionData.addElement(" ");486wordSelectionData.addElement("Badges");487wordSelectionData.addElement("?");488wordSelectionData.addElement(" ");489wordSelectionData.addElement("BADGES");490wordSelectionData.addElement("!");491wordSelectionData.addElement("?");492wordSelectionData.addElement("!");493wordSelectionData.addElement(" ");494wordSelectionData.addElement("We");495wordSelectionData.addElement(" ");496wordSelectionData.addElement("don't");497wordSelectionData.addElement(" ");498wordSelectionData.addElement("need");499wordSelectionData.addElement(" ");500wordSelectionData.addElement("no");501wordSelectionData.addElement(" ");502wordSelectionData.addElement("STINKING");503wordSelectionData.addElement(" ");504wordSelectionData.addElement("BADGES");505wordSelectionData.addElement("!");506wordSelectionData.addElement("!");507wordSelectionData.addElement("!");508509wordSelectionData.addElement("012.566,5");510wordSelectionData.addElement(" ");511wordSelectionData.addElement("123.3434,900");512wordSelectionData.addElement(" ");513wordSelectionData.addElement("1000,233,456.000");514wordSelectionData.addElement(" ");515wordSelectionData.addElement("1,23.322%");516wordSelectionData.addElement(" ");517wordSelectionData.addElement("123.1222");518519wordSelectionData.addElement(" ");520wordSelectionData.addElement("\u0024123,000.20");521522wordSelectionData.addElement(" ");523wordSelectionData.addElement("179.01\u0025");524525wordSelectionData.addElement("Hello");526wordSelectionData.addElement(",");527wordSelectionData.addElement(" ");528wordSelectionData.addElement("how");529wordSelectionData.addElement(" ");530wordSelectionData.addElement("are");531wordSelectionData.addElement(" ");532wordSelectionData.addElement("you");533wordSelectionData.addElement(" ");534wordSelectionData.addElement("X");535wordSelectionData.addElement(" ");536537wordSelectionData.addElement("Now");538wordSelectionData.addElement("\r");539wordSelectionData.addElement("is");540wordSelectionData.addElement("\n");541wordSelectionData.addElement("the");542wordSelectionData.addElement("\r\n");543wordSelectionData.addElement("time");544wordSelectionData.addElement("\n");545wordSelectionData.addElement("\r");546wordSelectionData.addElement("for");547wordSelectionData.addElement("\r");548wordSelectionData.addElement("\r");549wordSelectionData.addElement("all");550wordSelectionData.addElement(" ");551552generalIteratorTest(wordBreak, wordSelectionData);553}554555public void TestBug4097779() {556Vector<String> wordSelectionData = new Vector<String>();557558wordSelectionData.addElement("aa\u0300a");559wordSelectionData.addElement(" ");560561generalIteratorTest(wordBreak, wordSelectionData);562}563564public void TestBug4098467Words() {565Vector<String> wordSelectionData = new Vector<String>();566567// What follows is a string of Korean characters (I found it in the Yellow Pages568// ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed569// it correctly), first as precomposed syllables, and then as conjoining jamo.570// Both sequences should be semantically identical and break the same way.571// precomposed syllables...572wordSelectionData.addElement("\uc0c1\ud56d");573wordSelectionData.addElement(" ");574wordSelectionData.addElement("\ud55c\uc778");575wordSelectionData.addElement(" ");576wordSelectionData.addElement("\uc5f0\ud569");577wordSelectionData.addElement(" ");578wordSelectionData.addElement("\uc7a5\ub85c\uad50\ud68c");579wordSelectionData.addElement(" ");580// conjoining jamo...581wordSelectionData.addElement("\u1109\u1161\u11bc\u1112\u1161\u11bc");582wordSelectionData.addElement(" ");583wordSelectionData.addElement("\u1112\u1161\u11ab\u110b\u1175\u11ab");584wordSelectionData.addElement(" ");585wordSelectionData.addElement("\u110b\u1167\u11ab\u1112\u1161\u11b8");586wordSelectionData.addElement(" ");587wordSelectionData.addElement("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");588wordSelectionData.addElement(" ");589590generalIteratorTest(wordBreak, wordSelectionData);591}592593public void TestBug4117554Words() {594Vector<String> wordSelectionData = new Vector<String>();595596// this is a test for bug #4117554: the ideographic iteration mark (U+3005) should597// count as a Kanji character for the purposes of word breaking598wordSelectionData.addElement("abc");599wordSelectionData.addElement("\u4e01\u4e02\u3005\u4e03\u4e03");600wordSelectionData.addElement("abc");601602generalIteratorTest(wordBreak, wordSelectionData);603}604605public void TestSentenceBreak() {606Vector<String> sentenceSelectionData = new Vector<String>();607608sentenceSelectionData.addElement("This is a simple sample sentence. ");609sentenceSelectionData.addElement("(This is it.) ");610sentenceSelectionData.addElement("This is a simple sample sentence. ");611sentenceSelectionData.addElement("\"This isn\'t it.\" ");612sentenceSelectionData.addElement("Hi! ");613sentenceSelectionData.addElement("This is a simple sample sentence. ");614sentenceSelectionData.addElement("It does not have to make any sense as you can see. ");615sentenceSelectionData.addElement("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ");616sentenceSelectionData.addElement("Che la dritta via aveo smarrita. ");617sentenceSelectionData.addElement("He said, that I said, that you said!! ");618619sentenceSelectionData.addElement("Don't rock the boat.\u2029");620621sentenceSelectionData.addElement("Because I am the daddy, that is why. ");622sentenceSelectionData.addElement("Not on my time (el timo.)! ");623624sentenceSelectionData.addElement("So what!!\u2029");625626sentenceSelectionData.addElement("\"But now,\" he said, \"I know!\" ");627sentenceSelectionData.addElement("Harris thumbed down several, including \"Away We Go\" (which became the huge success Oklahoma!). ");628sentenceSelectionData.addElement("One species, B. anthracis, is highly virulent.\n");629sentenceSelectionData.addElement("Wolf said about Sounder:\"Beautifully thought-out and directed.\" ");630sentenceSelectionData.addElement("Have you ever said, \"This is where \tI shall live\"? ");631sentenceSelectionData.addElement("He answered, \"You may not!\" ");632sentenceSelectionData.addElement("Another popular saying is: \"How do you do?\". ");633sentenceSelectionData.addElement("Yet another popular saying is: \'I\'m fine thanks.\' ");634sentenceSelectionData.addElement("What is the proper use of the abbreviation pp.? ");635sentenceSelectionData.addElement("Yes, I am definatelly 12\" tall!!");636637generalIteratorTest(sentenceBreak, sentenceSelectionData);638}639640public void TestBug4113835() {641Vector<String> sentenceSelectionData = new Vector<String>();642643// test for bug #4113835: \n and \r count as spaces, not as paragraph breaks644sentenceSelectionData.addElement("Now\ris\nthe\r\ntime\n\rfor\r\rall\u2029");645646generalIteratorTest(sentenceBreak, sentenceSelectionData);647}648649public void TestBug4111338() {650Vector<String> sentenceSelectionData = new Vector<String>();651652// test for bug #4111338: Don't break sentences at the boundary between CJK653// and other letters654sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c"655+ "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba"656+ "\u611d\u57b6\u2510\u5d46\".\u2029");657sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"658+ "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"659+ "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");660sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4"661+ "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8"662+ "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");663sentenceSelectionData.addElement("He said, \"I can go there.\"\u2029");664665generalIteratorTest(sentenceBreak, sentenceSelectionData);666}667668public void TestBug4117554Sentences() {669Vector<String> sentenceSelectionData = new Vector<String>();670671// Treat fullwidth variants of .!? the same as their672// normal counterparts673sentenceSelectionData.addElement("I know I'm right\uff0e ");674sentenceSelectionData.addElement("Right\uff1f ");675sentenceSelectionData.addElement("Right\uff01 ");676677// Don't break sentences at boundary between CJK and digits678sentenceSelectionData.addElement("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"679+ "\u97e48888\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"680+ "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");681682// Break sentence between a sentence terminator and683// opening punctuation684sentenceSelectionData.addElement("no?");685sentenceSelectionData.addElement("(yes)");686687generalIteratorTest(sentenceBreak, sentenceSelectionData);688}689690public void TestBug4158381() {691Vector<String> sentenceSelectionData = new Vector<String>();692693// Don't break sentence after period if it isn't followed by a space694sentenceSelectionData.addElement("Test <code>Flags.Flag</code> class. ");695sentenceSelectionData.addElement("Another test.\u2029");696697// No breaks when there are no terminators around698sentenceSelectionData.addElement("<P>Provides a set of "699+ ""lightweight" (all-java<FONT SIZE=\"-2\"><SUP>TM"700+ "</SUP></FONT> language) components that, "701+ "to the maximum degree possible, work the same on all platforms. ");702sentenceSelectionData.addElement("Another test.\u2029");703704generalIteratorTest(sentenceBreak, sentenceSelectionData);705}706707public void TestBug4143071() {708Vector<String> sentenceSelectionData = new Vector<String>();709710// Make sure sentences that end with digits work right711sentenceSelectionData.addElement("Today is the 27th of May, 1998. ");712sentenceSelectionData.addElement("Tomorrow with be 28 May 1998. ");713sentenceSelectionData.addElement("The day after will be the 30th.\u2029");714715generalIteratorTest(sentenceBreak, sentenceSelectionData);716}717718public void TestBug4152416() {719Vector<String> sentenceSelectionData = new Vector<String>();720721// Make sure sentences ending with a capital letter are treated correctly722sentenceSelectionData.addElement("The type of all primitive "723+ "<code>boolean</code> values accessed in the target VM. ");724sentenceSelectionData.addElement("Calls to xxx will return an "725+ "implementor of this interface.\u2029");726727generalIteratorTest(sentenceBreak, sentenceSelectionData);728}729730public void TestBug4152117() {731Vector<String> sentenceSelectionData = new Vector<String>();732733// Make sure sentence breaking is handling punctuation correctly734// [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE735// IT DOESN'T CROP UP]736sentenceSelectionData.addElement("Constructs a randomly generated "737+ "BigInteger, uniformly distributed over the range <tt>0</tt> "738+ "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive. ");739sentenceSelectionData.addElement("The uniformity of the distribution "740+ "assumes that a fair source of random bits is provided in "741+ "<tt>rnd</tt>. ");742sentenceSelectionData.addElement("Note that this constructor always "743+ "constructs a non-negative BigInteger.\u2029");744745generalIteratorTest(sentenceBreak, sentenceSelectionData);746}747748public void TestBug8264765() {749Vector<String> sentenceSelectionData = new Vector<String>();750751// Comma should not be regarded as the start of a sentence,752// otherwise the backwards rule would break the following sentence.753sentenceSelectionData.addElement(754"Due to a problem (e.g., software bug), the server is down. ");755756generalIteratorTest(sentenceBreak, sentenceSelectionData);757}758759public void TestLineBreak() {760Vector<String> lineSelectionData = new Vector<String>();761762lineSelectionData.addElement("Multi-");763lineSelectionData.addElement("Level ");764lineSelectionData.addElement("example ");765lineSelectionData.addElement("of ");766lineSelectionData.addElement("a ");767lineSelectionData.addElement("semi-");768lineSelectionData.addElement("idiotic ");769lineSelectionData.addElement("non-");770lineSelectionData.addElement("sensical ");771lineSelectionData.addElement("(non-");772lineSelectionData.addElement("important) ");773lineSelectionData.addElement("sentence. ");774775lineSelectionData.addElement("Hi ");776lineSelectionData.addElement("Hello ");777lineSelectionData.addElement("How\n");778lineSelectionData.addElement("are\r");779lineSelectionData.addElement("you\u2028");780lineSelectionData.addElement("fine.\t");781lineSelectionData.addElement("good. ");782783lineSelectionData.addElement("Now\r");784lineSelectionData.addElement("is\n");785lineSelectionData.addElement("the\r\n");786lineSelectionData.addElement("time\n");787lineSelectionData.addElement("\r");788lineSelectionData.addElement("for\r");789lineSelectionData.addElement("\r");790lineSelectionData.addElement("all");791792generalIteratorTest(lineBreak, lineSelectionData);793}794795public void TestBug4068133() {796Vector<String> lineSelectionData = new Vector<String>();797798lineSelectionData.addElement("\u96f6");799lineSelectionData.addElement("\u4e00\u3002");800lineSelectionData.addElement("\u4e8c\u3001");801lineSelectionData.addElement("\u4e09\u3002\u3001");802lineSelectionData.addElement("\u56db\u3001\u3002\u3001");803lineSelectionData.addElement("\u4e94,");804lineSelectionData.addElement("\u516d.");805lineSelectionData.addElement("\u4e03.\u3001,\u3002");806lineSelectionData.addElement("\u516b");807808generalIteratorTest(lineBreak, lineSelectionData);809}810811public void TestBug4086052() {812Vector<String> lineSelectionData = new Vector<String>();813814lineSelectionData.addElement("foo\u00a0bar ");815// lineSelectionData.addElement("foo\ufeffbar");816817generalIteratorTest(lineBreak, lineSelectionData);818}819820public void TestBug4097920() {821Vector<String> lineSelectionData = new Vector<String>();822823lineSelectionData.addElement("dog,");824lineSelectionData.addElement("cat,");825lineSelectionData.addElement("mouse ");826lineSelectionData.addElement("(one)");827lineSelectionData.addElement("(two)\n");828829generalIteratorTest(lineBreak, lineSelectionData);830}831/*832public void TestBug4035266() {833Vector<String> lineSelectionData = new Vector<String>();834835lineSelectionData.addElement("The ");836lineSelectionData.addElement("balance ");837lineSelectionData.addElement("is ");838lineSelectionData.addElement("$-23,456.78, ");839lineSelectionData.addElement("not ");840lineSelectionData.addElement("-$32,456.78!\n");841842generalIteratorTest(lineBreak, lineSelectionData);843}844*/845public void TestBug4098467Lines() {846Vector<String> lineSelectionData = new Vector<String>();847848// What follows is a string of Korean characters (I found it in the Yellow Pages849// ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed850// it correctly), first as precomposed syllables, and then as conjoining jamo.851// Both sequences should be semantically identical and break the same way.852// precomposed syllables...853lineSelectionData.addElement("\uc0c1");854lineSelectionData.addElement("\ud56d ");855lineSelectionData.addElement("\ud55c");856lineSelectionData.addElement("\uc778 ");857lineSelectionData.addElement("\uc5f0");858lineSelectionData.addElement("\ud569 ");859lineSelectionData.addElement("\uc7a5");860lineSelectionData.addElement("\ub85c");861lineSelectionData.addElement("\uad50");862lineSelectionData.addElement("\ud68c ");863// conjoining jamo...864lineSelectionData.addElement("\u1109\u1161\u11bc\u1112\u1161\u11bc ");865lineSelectionData.addElement("\u1112\u1161\u11ab\u110b\u1175\u11ab ");866lineSelectionData.addElement("\u110b\u1167\u11ab\u1112\u1161\u11b8 ");867lineSelectionData.addElement("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");868869if (Locale.getDefault().getLanguage().equals("th")) {870logln("This test is skipped in th locale.");871return;872}873874generalIteratorTest(lineBreak, lineSelectionData);875}876877public void TestBug4117554Lines() {878Vector<String> lineSelectionData = new Vector<String>();879880// Fullwidth .!? should be treated as postJwrd881lineSelectionData.addElement("\u4e01\uff0e");882lineSelectionData.addElement("\u4e02\uff01");883lineSelectionData.addElement("\u4e03\uff1f");884885generalIteratorTest(lineBreak, lineSelectionData);886}887888public void TestBug4217703() {889if (Locale.getDefault().getLanguage().equals("th")) {890logln("This test is skipped in th locale.");891return;892}893894Vector<String> lineSelectionData = new Vector<String>();895896// There shouldn't be a line break between sentence-ending punctuation897// and a closing quote898lineSelectionData.addElement("He ");899lineSelectionData.addElement("said ");900lineSelectionData.addElement("\"Go!\" ");901lineSelectionData.addElement("I ");902lineSelectionData.addElement("went. ");903904lineSelectionData.addElement("Hashtable$Enumeration ");905lineSelectionData.addElement("getText().");906lineSelectionData.addElement("getIndex()");907908generalIteratorTest(lineBreak, lineSelectionData);909}910911private static final String graveS = "S\u0300";912private static final String acuteBelowI = "i\u0317";913private static final String acuteE = "e\u0301";914private static final String circumflexA = "a\u0302";915private static final String tildeE = "e\u0303";916917public void TestCharacterBreak() {918Vector<String> characterSelectionData = new Vector<String>();919920characterSelectionData.addElement(graveS);921characterSelectionData.addElement(acuteBelowI);922characterSelectionData.addElement("m");923characterSelectionData.addElement("p");924characterSelectionData.addElement("l");925characterSelectionData.addElement(acuteE);926characterSelectionData.addElement(" ");927characterSelectionData.addElement("s");928characterSelectionData.addElement(circumflexA);929characterSelectionData.addElement("m");930characterSelectionData.addElement("p");931characterSelectionData.addElement("l");932characterSelectionData.addElement(tildeE);933characterSelectionData.addElement(".");934characterSelectionData.addElement("w");935characterSelectionData.addElement(circumflexA);936characterSelectionData.addElement("w");937characterSelectionData.addElement("a");938characterSelectionData.addElement("f");939characterSelectionData.addElement("q");940characterSelectionData.addElement("\n");941characterSelectionData.addElement("\r");942characterSelectionData.addElement("\r\n");943characterSelectionData.addElement("\n");944945generalIteratorTest(characterBreak, characterSelectionData);946}947948public void TestBug4098467Characters() {949Vector<String> characterSelectionData = new Vector<String>();950951// What follows is a string of Korean characters (I found it in the Yellow Pages952// ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed953// it correctly), first as precomposed syllables, and then as conjoining jamo.954// Both sequences should be semantically identical and break the same way.955// precomposed syllables...956characterSelectionData.addElement("\uc0c1");957characterSelectionData.addElement("\ud56d");958characterSelectionData.addElement(" ");959characterSelectionData.addElement("\ud55c");960characterSelectionData.addElement("\uc778");961characterSelectionData.addElement(" ");962characterSelectionData.addElement("\uc5f0");963characterSelectionData.addElement("\ud569");964characterSelectionData.addElement(" ");965characterSelectionData.addElement("\uc7a5");966characterSelectionData.addElement("\ub85c");967characterSelectionData.addElement("\uad50");968characterSelectionData.addElement("\ud68c");969characterSelectionData.addElement(" ");970// conjoining jamo...971characterSelectionData.addElement("\u1109\u1161\u11bc");972characterSelectionData.addElement("\u1112\u1161\u11bc");973characterSelectionData.addElement(" ");974characterSelectionData.addElement("\u1112\u1161\u11ab");975characterSelectionData.addElement("\u110b\u1175\u11ab");976characterSelectionData.addElement(" ");977characterSelectionData.addElement("\u110b\u1167\u11ab");978characterSelectionData.addElement("\u1112\u1161\u11b8");979characterSelectionData.addElement(" ");980characterSelectionData.addElement("\u110c\u1161\u11bc");981characterSelectionData.addElement("\u1105\u1169");982characterSelectionData.addElement("\u1100\u116d");983characterSelectionData.addElement("\u1112\u116c");984985generalIteratorTest(characterBreak, characterSelectionData);986}987988public void TestBug4153072() {989BreakIterator iter = BreakIterator.getWordInstance();990String str = "...Hello, World!...";991int begin = 3;992int end = str.length() - 3;993boolean gotException = false;994boolean dummy;995996iter.setText(new StringCharacterIterator(str, begin, end, begin));997for (int index = -1; index < begin + 1; ++index) {998try {999dummy = iter.isBoundary(index);1000if (index < begin)1001errln("Didn't get exception with offset = " + index +1002" and begin index = " + begin);1003}1004catch (IllegalArgumentException e) {1005if (index >= begin)1006errln("Got exception with offset = " + index +1007" and begin index = " + begin);1008}1009}1010}10111012public void TestBug4146175Sentences() {1013Vector<String> sentenceSelectionData = new Vector<String>();10141015// break between periods and opening punctuation even when there's no1016// intervening space1017sentenceSelectionData.addElement("end.");1018sentenceSelectionData.addElement("(This is\u2029");10191020// treat the fullwidth period as an unambiguous sentence terminator1021sentenceSelectionData.addElement("\u7d42\u308f\u308a\uff0e");1022sentenceSelectionData.addElement("\u300c\u3053\u308c\u306f");10231024generalIteratorTest(sentenceBreak, sentenceSelectionData);1025}10261027public void TestBug4146175Lines() {1028if (Locale.getDefault().getLanguage().equals("th")) {1029logln("This test is skipped in th locale.");1030return;1031}10321033Vector<String> lineSelectionData = new Vector<String>();10341035// the fullwidth comma should stick to the preceding Japanese character1036lineSelectionData.addElement("\u7d42\uff0c");1037lineSelectionData.addElement("\u308f");10381039generalIteratorTest(lineBreak, lineSelectionData);1040}10411042public void TestBug4214367() {1043if (Locale.getDefault().getLanguage().equals("th")) {1044logln("This test is skipped in th locale.");1045return;1046}10471048Vector<String> wordSelectionData = new Vector<String>();10491050// the hiragana and katakana iteration marks and the long vowel mark1051// are not being treated correctly by the word-break iterator1052wordSelectionData.addElement("\u3042\u3044\u309d\u3042\u309e\u3042\u30fc\u3042");1053wordSelectionData.addElement("\u30a2\u30a4\u30fd\u30a2\u30fe\u30a2\u30fc\u30a2");10541055generalIteratorTest(wordBreak, wordSelectionData);1056}10571058private static final String cannedTestChars // characters fo the class Cc are ignorable for breaking1059= /*"\u0000\u0001\u0002\u0003\u0004*/" !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"1060+ "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3"1061+ "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303"1062+ "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000"1063+ "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f"1064+ "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164";10651066public void TestSentenceInvariants()1067{1068BreakIterator e = BreakIterator.getSentenceInstance();1069doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");1070}10711072public void TestWordInvariants()1073{1074if (Locale.getDefault().getLanguage().equals("th")) {1075logln("This test is skipped in th locale.");1076return;1077}10781079BreakIterator e = BreakIterator.getWordInstance();1080doBreakInvariantTest(e, cannedTestChars + "\',.\u3041\u3042\u3043\u309b\u309c\u30a1\u30a2"1081+ "\u30a3\u4e00\u4e01\u4e02");1082doOtherInvariantTest(e, cannedTestChars + "\',.\u3041\u3042\u3043\u309b\u309c\u30a1\u30a2"1083+ "\u30a3\u4e00\u4e01\u4e02");1084}10851086public void TestLineInvariants()1087{1088if (Locale.getDefault().getLanguage().equals("th")) {1089logln("This test is skipped in th locale.");1090return;1091}10921093BreakIterator e = BreakIterator.getLineInstance();1094String testChars = cannedTestChars + ".,;:\u3001\u3002\u3041\u3042\u3043\u3044\u3045"1095+ "\u30a3\u4e00\u4e01\u4e02";1096doBreakInvariantTest(e, testChars);1097doOtherInvariantTest(e, testChars);10981099int errorCount = 0;11001101// in addition to the other invariants, a line-break iterator should make sure that:1102// it doesn't break around the non-breaking characters1103String noBreak = "\u00a0\u2007\u2011\ufeff";1104StringBuffer work = new StringBuffer("aaa");1105for (int i = 0; i < testChars.length(); i++) {1106char c = testChars.charAt(i);1107if (c == '\r' || c == '\n' || c == '\u2029' || c == '\u2028' || c == '\u0003')1108continue;1109work.setCharAt(0, c);1110for (int j = 0; j < noBreak.length(); j++) {1111work.setCharAt(1, noBreak.charAt(j));1112for (int k = 0; k < testChars.length(); k++) {1113work.setCharAt(2, testChars.charAt(k));1114// CONTROL (Cc) and FORMAT (Cf) Characters are to be ignored1115// for breaking purposes as per UTR141116int type1 = Character.getType(work.charAt(1));1117int type2 = Character.getType(work.charAt(2));1118if (type1 == Character.CONTROL || type1 == Character.FORMAT ||1119type2 == Character.CONTROL || type2 == Character.FORMAT) {1120continue;1121}1122e.setText(work.toString());1123for (int l = e.first(); l != BreakIterator.DONE; l = e.next()) {1124if (l == 1 || l == 2) {1125//errln("Got break between U+" + Integer.toHexString((int)1126// (work.charAt(l - 1))) + " and U+" + Integer.toHexString(1127// (int)(work.charAt(l))) + "\ntype1 = " + type1 + "\ntype2 = " + type2);1128// as per UTR14 spaces followed by a GLUE character should allow1129// line breaking1130if (work.charAt(l-1) == '\u0020' && (work.charAt(l) == '\u00a0' ||1131work.charAt(l) == '\u0f0c' ||1132work.charAt(l) == '\u2007' ||1133work.charAt(l) == '\u2011' ||1134work.charAt(l) == '\u202f' ||1135work.charAt(l) == '\ufeff')) {1136continue;1137}1138errln("Got break between U+" + Integer.toHexString((int)1139(work.charAt(l - 1))) + " and U+" + Integer.toHexString(1140(int)(work.charAt(l))));1141errorCount++;1142if (errorCount >= 75)1143return;1144}1145}1146}1147}1148}11491150// The following test has so many exceptions that it would be better to write a new set of data1151// that tested exactly what should be tested1152// Until that point it will be commented out1153/*11541155// it does break after dashes (unless they're followed by a digit, a non-spacing mark,1156// a currency symbol, a space, a format-control character, a regular control character,1157// a line or paragraph separator, or another dash)1158String dashes = "-\u00ad\u2010\u2012\u2013\u2014";1159for (int i = 0; i < testChars.length(); i++) {1160work.setCharAt(0, testChars.charAt(i));1161for (int j = 0; j < dashes.length(); j++) {1162work.setCharAt(1, dashes.charAt(j));1163for (int k = 0; k < testChars.length(); k++) {1164char c = testChars.charAt(k);1165if (Character.getType(c) == Character.DECIMAL_DIGIT_NUMBER ||1166Character.getType(c) == Character.OTHER_NUMBER ||1167Character.getType(c) == Character.NON_SPACING_MARK ||1168Character.getType(c) == Character.ENCLOSING_MARK ||1169Character.getType(c) == Character.CURRENCY_SYMBOL ||1170Character.getType(c) == Character.DASH_PUNCTUATION ||1171Character.getType(c) == Character.SPACE_SEPARATOR ||1172Character.getType(c) == Character.FORMAT ||1173Character.getType(c) == Character.CONTROL ||1174Character.getType(c) == Character.END_PUNCTUATION ||1175Character.getType(c) == Character.FINAL_QUOTE_PUNCTUATION ||1176Character.getType(c) == Character.OTHER_PUNCTUATION ||1177c == '\'' || c == '\"' ||1178// category EX as per UTR141179c == '!' || c == '?' || c == '\ufe56' || c == '\ufe57' || c == '\uff01' || c == '\uff1f' ||1180c == '\n' || c == '\r' || c == '\u2028' || c == '\u2029' ||1181c == '\u0003' || c == '\u2007' || c == '\u2011' ||1182c == '\ufeff')1183continue;1184work.setCharAt(2, c);1185e.setText(work.toString());1186boolean saw2 = false;1187for (int l = e.first(); l != BreakIterator.DONE; l = e.next())1188if (l == 2)1189saw2 = true;1190if (!saw2) {1191errln("Didn't get break between U+" + Integer.toHexString((int)1192(work.charAt(1))) + " and U+" + Integer.toHexString(1193(int)(work.charAt(2))));1194errorCount++;1195if (errorCount >= 75)1196return;1197}1198}1199}1200}1201*/1202}12031204public void TestCharacterInvariants()1205{1206BreakIterator e = BreakIterator.getCharacterInstance();1207doBreakInvariantTest(e, cannedTestChars + "\u1100\u1101\u1102\u1160\u1161\u1162\u11a8"1208+ "\u11a9\u11aa");1209doOtherInvariantTest(e, cannedTestChars + "\u1100\u1101\u1102\u1160\u1161\u1162\u11a8"1210+ "\u11a9\u11aa");1211}12121213public void TestEmptyString()1214{1215String text = "";1216Vector<String> x = new Vector<String>();1217x.addElement(text);12181219generalIteratorTest(lineBreak, x);1220}12211222public void TestGetAvailableLocales()1223{1224Locale[] locList = BreakIterator.getAvailableLocales();12251226if (locList.length == 0)1227errln("getAvailableLocales() returned an empty list!");1228// I have no idea how to test this function...1229}123012311232/**1233* Bug 40953221234*/1235public void TestJapaneseLineBreak()1236{1237StringBuffer testString = new StringBuffer("\u4e00x\u4e8c");1238// Breaking on <Kanji>$<Kanji> is inconsistent12391240/* Characters in precedingChars and followingChars have been updated1241* from Unicode 2.0.14-based to 3.0.0-based when 4638433 was fixed.1242* In concrete terms,1243* 0x301F : Its category was changed from Ps to Pe since Unicode 2.1.1244* 0x169B & 0x169C : added since Unicode 3.0.0.1245*/1246String precedingChars =1247/* Puctuation, Open */1248"([{\u201a\u201e\u2045\u207d\u208d\u2329\u3008\u300a\u300c\u300e\u3010\u3014\u3016\u3018\u301a\u301d\ufe35\ufe37\ufe39\ufe3b\ufe3d\ufe3f\ufe41\ufe43\ufe59\ufe5b\ufe5d\uff08\uff3b\uff5b\uff62\u169b"1249/* Punctuation, Initial quote */1250+ "\u00ab\u2018\u201b\u201c\u201f\u2039"1251/* Symbol, Currency */1252+ "\u00a5\u00a3\u00a4\u20a0";12531254String followingChars =1255/* Puctuation, Close */1256")]}\u2046\u207e\u208e\u232a\u3009\u300b\u300d\u300f\u3011\u3015\u3017\u3019\u301b\u301e\u301f\ufd3e\ufe36\ufe38\ufe3a\ufe3c\ufe3e\ufe40\ufe42\ufe44\ufe5a\ufe5c\ufe5e\uff09\uff3d\uff5d\uff63\u169c"1257/* Punctuation, Final quote */1258+ "\u00bb\u2019\u201d\u203a"1259/* Punctuation, Other */1260+ "!%,.:;\u3001\u3002\u2030\u2031\u2032\u2033\u2034"1261/* Punctuation, Dash */1262+ "\u2103\u2109"1263/* Symbol, Currency */1264+ "\u00a2"1265/* Letter, Modifier */1266+ "\u3005\u309d\u309e"1267/* Letter, Other */1268+ "\u3063\u3083\u3085\u3087\u30c3\u30e3\u30e5\u30e7\u30fc\u30fd\u30fe"1269/* Mark, Non-Spacing */1270+ "\u0300\u0301\u0302"1271/* Symbol, Modifier */1272+ "\u309b\u309c"1273/* Symbol, Other */1274+ "\u00b0";12751276BreakIterator iter = BreakIterator.getLineInstance(Locale.JAPAN);12771278for (int i = 0; i < precedingChars.length(); i++) {1279testString.setCharAt(1, precedingChars.charAt(i));1280iter.setText(testString.toString());1281int j = iter.first();1282if (j != 0) {1283errln("ja line break failure: failed to start at 0 and bounced at " + j);1284}1285j = iter.next();1286if (j != 1) {1287errln("ja line break failure: failed to stop before '"1288+ precedingChars.charAt(i) + "' (\\u"1289+ Integer.toString(precedingChars.charAt(i), 16)1290+ ") at 1 and bounded at " + j);1291}1292j = iter.next();1293if (j != 3) {1294errln("ja line break failure: failed to skip position after '"1295+ precedingChars.charAt(i) + "' (\\u"1296+ Integer.toString(precedingChars.charAt(i), 16)1297+ ") at 3 and bounded at " + j);1298}1299}13001301for (int i = 0; i < followingChars.length(); i++) {1302testString.setCharAt(1, followingChars.charAt(i));1303iter.setText(testString.toString());1304int j = iter.first();1305if (j != 0) {1306errln("ja line break failure: failed to start at 0 and bounded at " + j);1307}1308j = iter.next();1309if (j != 2) {1310errln("ja line break failure: failed to skip position before '"1311+ followingChars.charAt(i) + "' (\\u"1312+ Integer.toString(followingChars.charAt(i), 16)1313+ ") at 2 and bounded at " + j);1314}1315j = iter.next();1316if (j != 3) {1317errln("ja line break failure: failed to stop after '"1318+ followingChars.charAt(i) + "' (\\u"1319+ Integer.toString(followingChars.charAt(i), 16)1320+ ") at 3 and bounded at " + j);1321}1322}1323}13241325/**1326* Bug 46384331327*/1328public void TestLineBreakBasedOnUnicode3_0_0()1329{1330BreakIterator iter;1331int i;13321333/* Latin Extend-B characters1334* 0x0218-0x0233 which have been added since Unicode 3.0.0.1335*/1336iter = BreakIterator.getWordInstance(Locale.US);1337iter.setText("\u0216\u0217\u0218\u0219\u021A");1338i = iter.first();1339i = iter.next();1340if (i != 5) {1341errln("Word break failure: failed to stop at 5 and bounded at " + i);1342}134313441345iter = BreakIterator.getLineInstance(Locale.US);13461347/* <Three(Nd)><Two(Nd)><Low Double Prime Quotation Mark(Pe)><One(Nd)>1348* \u301f has changed its category from Ps to Pe since Unicode 2.1.1349*/1350iter.setText("32\u301f1");1351i = iter.first();1352i = iter.next();1353if (i != 3) {1354errln("Line break failure: failed to skip before \\u301F(Pe) at 3 and bounded at " + i);1355}13561357/* Mongolian <Letter A(Lo)><Todo Soft Hyphen(Pd)><Letter E(Lo)>1358* which have been added since Unicode 3.0.0.1359*/1360iter.setText("\u1820\u1806\u1821");1361i = iter.first();1362i = iter.next();1363if (i != 2) {1364errln("Mongolian line break failure: failed to skip position before \\u1806(Pd) at 2 and bounded at " + i);1365}13661367/* Khmer <ZERO(Nd)><Currency Symbol(Sc)><ONE(Nd)> which have1368* been added since Unicode 3.0.0.1369*/1370iter.setText("\u17E0\u17DB\u17E1");1371i = iter.first();1372i = iter.next();1373if (i != 1) {1374errln("Khmer line break failure: failed to stop before \\u17DB(Sc) at 1 and bounded at " + i);1375}1376i = iter.next();1377if (i != 3) {1378errln("Khmer line break failure: failed to skip position after \\u17DB(Sc) at 3 and bounded at " + i);1379}13801381/* Ogham <Letter UR(Lo)><Space Mark(Zs)><Letter OR(Lo)> which have1382* been added since Unicode 3.0.0.1383*/1384iter.setText("\u1692\u1680\u1696");1385i = iter.first();1386i = iter.next();1387if (i != 2) {1388errln("Ogham line break failure: failed to skip postion before \\u1680(Zs) at 2 and bounded at " + i);1389}139013911392// Confirm changes in BreakIteratorRules_th.java have been reflected.1393iter = BreakIterator.getLineInstance(new Locale("th", ""));13941395/* Thai <Seven(Nd)>1396* <Left Double Quotation Mark(Pi)>1397* <Five(Nd)>1398* <Right Double Quotation Mark(Pf)>1399* <Three(Nd)>1400*/1401iter.setText("\u0E57\u201C\u0E55\u201D\u0E53");1402i = iter.first();1403i = iter.next();1404if (i != 1) {1405errln("Thai line break failure: failed to stop before \\u201C(Pi) at 1 and bounded at " + i);1406}1407i = iter.next();1408if (i != 4) {1409errln("Thai line break failure: failed to stop after \\u201D(Pf) at 4 and bounded at " + i);1410}1411}14121413/**1414* Bug 40681371415*/1416public void TestEndBehavior()1417{1418String testString = "boo.";1419BreakIterator wb = BreakIterator.getWordInstance();1420wb.setText(testString);14211422if (wb.first() != 0)1423errln("Didn't get break at beginning of string.");1424if (wb.next() != 3)1425errln("Didn't get break before period in \"boo.\"");1426if (wb.current() != 4 && wb.next() != 4)1427errln("Didn't get break at end of string.");1428}14291430// [serialization test has been removed pursuant to bug #4152965]14311432/**1433* Bug 44508041434*/1435public void TestLineBreakContractions() {1436Vector<String> expected = new Vector<String>();14371438expected.add("These ");1439expected.add("are ");1440expected.add("'foobles'. ");1441expected.add("Don't ");1442expected.add("you ");1443expected.add("like ");1444expected.add("them?");1445generalIteratorTest(lineBreak, expected);1446}14471448}144914501451