Path: blob/master/src/java.base/share/classes/sun/util/locale/InternalLocaleBuilder.java
41159 views
/*1* Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425/*26*******************************************************************************27* Copyright (C) 2009-2010, International Business Machines Corporation and *28* others. All Rights Reserved. *29*******************************************************************************30*/31package sun.util.locale;3233import java.util.ArrayList;34import java.util.HashMap;35import java.util.HashSet;36import java.util.List;37import java.util.Map;38import java.util.Set;3940public final class InternalLocaleBuilder {4142private static final CaseInsensitiveChar PRIVATEUSE_KEY43= new CaseInsensitiveChar(LanguageTag.PRIVATEUSE);4445private String language = "";46private String script = "";47private String region = "";48private String variant = "";4950private Map<CaseInsensitiveChar, String> extensions;51private Set<CaseInsensitiveString> uattributes;52private Map<CaseInsensitiveString, String> ukeywords;535455public InternalLocaleBuilder() {56}5758public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException {59if (LocaleUtils.isEmpty(language)) {60this.language = "";61} else {62if (!LanguageTag.isLanguage(language)) {63throw new LocaleSyntaxException("Ill-formed language: " + language, 0);64}65this.language = language;66}67return this;68}6970public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException {71if (LocaleUtils.isEmpty(script)) {72this.script = "";73} else {74if (!LanguageTag.isScript(script)) {75throw new LocaleSyntaxException("Ill-formed script: " + script, 0);76}77this.script = script;78}79return this;80}8182public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException {83if (LocaleUtils.isEmpty(region)) {84this.region = "";85} else {86if (!LanguageTag.isRegion(region)) {87throw new LocaleSyntaxException("Ill-formed region: " + region, 0);88}89this.region = region;90}91return this;92}9394public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException {95if (LocaleUtils.isEmpty(variant)) {96this.variant = "";97} else {98// normalize separators to "_"99String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP);100int errIdx = checkVariants(var, BaseLocale.SEP);101if (errIdx != -1) {102throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);103}104this.variant = var;105}106return this;107}108109public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException {110if (!UnicodeLocaleExtension.isAttribute(attribute)) {111throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute);112}113// Use case insensitive string to prevent duplication114if (uattributes == null) {115uattributes = new HashSet<>(4);116}117uattributes.add(new CaseInsensitiveString(attribute));118return this;119}120121public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException {122if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) {123throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute);124}125if (uattributes != null) {126uattributes.remove(new CaseInsensitiveString(attribute));127}128return this;129}130131public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException {132if (!UnicodeLocaleExtension.isKey(key)) {133throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key);134}135136CaseInsensitiveString cikey = new CaseInsensitiveString(key);137if (type == null) {138if (ukeywords != null) {139// null type is used for remove the key140ukeywords.remove(cikey);141}142} else {143if (type.length() != 0) {144// normalize separator to "-"145String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP);146// validate147StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP);148while (!itr.isDone()) {149String s = itr.current();150if (!UnicodeLocaleExtension.isTypeSubtag(s)) {151throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: "152+ type,153itr.currentStart());154}155itr.next();156}157}158if (ukeywords == null) {159ukeywords = new HashMap<>(4);160}161ukeywords.put(cikey, type);162}163return this;164}165166public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException {167// validate key168boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton);169if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) {170throw new LocaleSyntaxException("Ill-formed extension key: " + singleton);171}172173boolean remove = LocaleUtils.isEmpty(value);174CaseInsensitiveChar key = new CaseInsensitiveChar(singleton);175176if (remove) {177if (UnicodeLocaleExtension.isSingletonChar(key.value())) {178// clear entire Unicode locale extension179if (uattributes != null) {180uattributes.clear();181}182if (ukeywords != null) {183ukeywords.clear();184}185} else {186if (extensions != null && extensions.containsKey(key)) {187extensions.remove(key);188}189}190} else {191// validate value192String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP);193StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP);194while (!itr.isDone()) {195String s = itr.current();196boolean validSubtag;197if (isBcpPrivateuse) {198validSubtag = LanguageTag.isPrivateuseSubtag(s);199} else {200validSubtag = LanguageTag.isExtensionSubtag(s);201}202if (!validSubtag) {203throw new LocaleSyntaxException("Ill-formed extension value: " + s,204itr.currentStart());205}206itr.next();207}208209if (UnicodeLocaleExtension.isSingletonChar(key.value())) {210setUnicodeLocaleExtension(val);211} else {212if (extensions == null) {213extensions = new HashMap<>(4);214}215extensions.put(key, val);216}217}218return this;219}220221/*222* Set extension/private subtags in a single string representation223*/224public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException {225if (LocaleUtils.isEmpty(subtags)) {226clearExtensions();227return this;228}229subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP);230StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);231232List<String> extensions = null;233String privateuse = null;234235int parsed = 0;236int start;237238// Make a list of extension subtags239while (!itr.isDone()) {240String s = itr.current();241if (LanguageTag.isExtensionSingleton(s)) {242start = itr.currentStart();243String singleton = s;244StringBuilder sb = new StringBuilder(singleton);245246itr.next();247while (!itr.isDone()) {248s = itr.current();249if (LanguageTag.isExtensionSubtag(s)) {250sb.append(LanguageTag.SEP).append(s);251parsed = itr.currentEnd();252} else {253break;254}255itr.next();256}257258if (parsed < start) {259throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'",260start);261}262263if (extensions == null) {264extensions = new ArrayList<>(4);265}266extensions.add(sb.toString());267} else {268break;269}270}271if (!itr.isDone()) {272String s = itr.current();273if (LanguageTag.isPrivateusePrefix(s)) {274start = itr.currentStart();275StringBuilder sb = new StringBuilder(s);276277itr.next();278while (!itr.isDone()) {279s = itr.current();280if (!LanguageTag.isPrivateuseSubtag(s)) {281break;282}283sb.append(LanguageTag.SEP).append(s);284parsed = itr.currentEnd();285286itr.next();287}288if (parsed <= start) {289throw new LocaleSyntaxException("Incomplete privateuse:"290+ subtags.substring(start),291start);292} else {293privateuse = sb.toString();294}295}296}297298if (!itr.isDone()) {299throw new LocaleSyntaxException("Ill-formed extension subtags:"300+ subtags.substring(itr.currentStart()),301itr.currentStart());302}303304return setExtensions(extensions, privateuse);305}306307/*308* Set a list of BCP47 extensions and private use subtags309* BCP47 extensions are already validated and well-formed, but may contain duplicates310*/311private InternalLocaleBuilder setExtensions(List<String> bcpExtensions, String privateuse) {312clearExtensions();313314if (!LocaleUtils.isEmpty(bcpExtensions)) {315Set<CaseInsensitiveChar> done = new HashSet<>(bcpExtensions.size());316for (String bcpExt : bcpExtensions) {317CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt);318// ignore duplicates319if (!done.contains(key)) {320// each extension string contains singleton, e.g. "a-abc-def"321if (UnicodeLocaleExtension.isSingletonChar(key.value())) {322setUnicodeLocaleExtension(bcpExt.substring(2));323} else {324if (extensions == null) {325extensions = new HashMap<>(4);326}327extensions.put(key, bcpExt.substring(2));328}329}330done.add(key);331}332}333if (privateuse != null && !privateuse.isEmpty()) {334// privateuse string contains prefix, e.g. "x-abc-def"335if (extensions == null) {336extensions = new HashMap<>(1);337}338extensions.put(new CaseInsensitiveChar(privateuse), privateuse.substring(2));339}340341return this;342}343344/*345* Reset Builder's internal state with the given language tag346*/347public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) {348clear();349if (!langtag.getExtlangs().isEmpty()) {350language = langtag.getExtlangs().get(0);351} else {352String lang = langtag.getLanguage();353if (!lang.equals(LanguageTag.UNDETERMINED)) {354language = lang;355}356}357script = langtag.getScript();358region = langtag.getRegion();359360List<String> bcpVariants = langtag.getVariants();361if (!bcpVariants.isEmpty()) {362StringBuilder var = new StringBuilder(bcpVariants.get(0));363int size = bcpVariants.size();364for (int i = 1; i < size; i++) {365var.append(BaseLocale.SEP).append(bcpVariants.get(i));366}367variant = var.toString();368}369370setExtensions(langtag.getExtensions(), langtag.getPrivateuse());371372return this;373}374375public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions localeExtensions) throws LocaleSyntaxException {376String language = base.getLanguage();377String script = base.getScript();378String region = base.getRegion();379String variant = base.getVariant();380381// Special backward compatibility support382383// Exception 1 - ja_JP_JP384if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) {385// When locale ja_JP_JP is created, ca-japanese is always there.386// The builder ignores the variant "JP"387assert("japanese".equals(localeExtensions.getUnicodeLocaleType("ca")));388variant = "";389}390// Exception 2 - th_TH_TH391else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) {392// When locale th_TH_TH is created, nu-thai is always there.393// The builder ignores the variant "TH"394assert("thai".equals(localeExtensions.getUnicodeLocaleType("nu")));395variant = "";396}397// Exception 3 - no_NO_NY398else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) {399// no_NO_NY is a valid locale and used by Java 6 or older versions.400// The build ignores the variant "NY" and change the language to "nn".401language = "nn";402variant = "";403}404405// Validate base locale fields before updating internal state.406// LocaleExtensions always store validated/canonicalized values,407// so no checks are necessary.408if (!language.isEmpty() && !LanguageTag.isLanguage(language)) {409throw new LocaleSyntaxException("Ill-formed language: " + language);410}411412if (!script.isEmpty() && !LanguageTag.isScript(script)) {413throw new LocaleSyntaxException("Ill-formed script: " + script);414}415416if (!region.isEmpty() && !LanguageTag.isRegion(region)) {417throw new LocaleSyntaxException("Ill-formed region: " + region);418}419420if (!variant.isEmpty()) {421int errIdx = checkVariants(variant, BaseLocale.SEP);422if (errIdx != -1) {423throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);424}425}426427// The input locale is validated at this point.428// Now, updating builder's internal fields.429this.language = language;430this.script = script;431this.region = region;432this.variant = variant;433clearExtensions();434435Set<Character> extKeys = (localeExtensions == null) ? null : localeExtensions.getKeys();436if (extKeys != null) {437// map localeExtensions back to builder's internal format438for (Character key : extKeys) {439Extension e = localeExtensions.getExtension(key);440if (e instanceof UnicodeLocaleExtension) {441UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e;442for (String uatr : ue.getUnicodeLocaleAttributes()) {443if (uattributes == null) {444uattributes = new HashSet<>(4);445}446uattributes.add(new CaseInsensitiveString(uatr));447}448for (String ukey : ue.getUnicodeLocaleKeys()) {449if (ukeywords == null) {450ukeywords = new HashMap<>(4);451}452ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey));453}454} else {455if (extensions == null) {456extensions = new HashMap<>(4);457}458extensions.put(new CaseInsensitiveChar(key), e.getValue());459}460}461}462return this;463}464465public InternalLocaleBuilder clear() {466language = "";467script = "";468region = "";469variant = "";470clearExtensions();471return this;472}473474public InternalLocaleBuilder clearExtensions() {475if (extensions != null) {476extensions.clear();477}478if (uattributes != null) {479uattributes.clear();480}481if (ukeywords != null) {482ukeywords.clear();483}484return this;485}486487public BaseLocale getBaseLocale() {488String language = this.language;489String script = this.script;490String region = this.region;491String variant = this.variant;492493// Special private use subtag sequence identified by "lvariant" will be494// interpreted as Java variant.495if (extensions != null) {496String privuse = extensions.get(PRIVATEUSE_KEY);497if (privuse != null) {498StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP);499boolean sawPrefix = false;500int privVarStart = -1;501while (!itr.isDone()) {502if (sawPrefix) {503privVarStart = itr.currentStart();504break;505}506if (LocaleUtils.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) {507sawPrefix = true;508}509itr.next();510}511if (privVarStart != -1) {512StringBuilder sb = new StringBuilder(variant);513if (sb.length() != 0) {514sb.append(BaseLocale.SEP);515}516sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP,517BaseLocale.SEP));518variant = sb.toString();519}520}521}522523return BaseLocale.getInstance(language, script, region, variant);524}525526public LocaleExtensions getLocaleExtensions() {527if (LocaleUtils.isEmpty(extensions) && LocaleUtils.isEmpty(uattributes)528&& LocaleUtils.isEmpty(ukeywords)) {529return null;530}531532LocaleExtensions lext = new LocaleExtensions(extensions, uattributes, ukeywords);533return lext.isEmpty() ? null : lext;534}535536/*537* Remove special private use subtag sequence identified by "lvariant"538* and return the rest. Only used by LocaleExtensions539*/540static String removePrivateuseVariant(String privuseVal) {541StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP);542543// Note: privateuse value "abc-lvariant" is unchanged544// because no subtags after "lvariant".545546int prefixStart = -1;547boolean sawPrivuseVar = false;548while (!itr.isDone()) {549if (prefixStart != -1) {550// Note: privateuse value "abc-lvariant" is unchanged551// because no subtags after "lvariant".552sawPrivuseVar = true;553break;554}555if (LocaleUtils.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) {556prefixStart = itr.currentStart();557}558itr.next();559}560if (!sawPrivuseVar) {561return privuseVal;562}563564assert(prefixStart == 0 || prefixStart > 1);565return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1);566}567568/*569* Check if the given variant subtags separated by the given570* separator(s) are valid571*/572private int checkVariants(String variants, String sep) {573StringTokenIterator itr = new StringTokenIterator(variants, sep);574while (!itr.isDone()) {575String s = itr.current();576if (!LanguageTag.isVariant(s)) {577return itr.currentStart();578}579itr.next();580}581return -1;582}583584/*585* Private methods parsing Unicode Locale Extension subtags.586* Duplicated attributes/keywords will be ignored.587* The input must be a valid extension subtags (excluding singleton).588*/589private void setUnicodeLocaleExtension(String subtags) {590// wipe out existing attributes/keywords591if (uattributes != null) {592uattributes.clear();593}594if (ukeywords != null) {595ukeywords.clear();596}597598StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);599600// parse attributes601while (!itr.isDone()) {602if (!UnicodeLocaleExtension.isAttribute(itr.current())) {603break;604}605if (uattributes == null) {606uattributes = new HashSet<>(4);607}608uattributes.add(new CaseInsensitiveString(itr.current()));609itr.next();610}611612// parse keywords613CaseInsensitiveString key = null;614String type;615int typeStart = -1;616int typeEnd = -1;617while (!itr.isDone()) {618if (key != null) {619if (UnicodeLocaleExtension.isKey(itr.current())) {620// next keyword - emit previous one621assert(typeStart == -1 || typeEnd != -1);622type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd);623if (ukeywords == null) {624ukeywords = new HashMap<>(4);625}626ukeywords.put(key, type);627628// reset keyword info629CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current());630key = ukeywords.containsKey(tmpKey) ? null : tmpKey;631typeStart = typeEnd = -1;632} else {633if (typeStart == -1) {634typeStart = itr.currentStart();635}636typeEnd = itr.currentEnd();637}638} else if (UnicodeLocaleExtension.isKey(itr.current())) {639// 1. first keyword or640// 2. next keyword, but previous one was duplicate641key = new CaseInsensitiveString(itr.current());642if (ukeywords != null && ukeywords.containsKey(key)) {643// duplicate644key = null;645}646}647648if (!itr.hasNext()) {649if (key != null) {650// last keyword651assert(typeStart == -1 || typeEnd != -1);652type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd);653if (ukeywords == null) {654ukeywords = new HashMap<>(4);655}656ukeywords.put(key, type);657}658break;659}660661itr.next();662}663}664665static final class CaseInsensitiveString {666private final String str, lowerStr;667668CaseInsensitiveString(String s) {669str = s;670lowerStr = LocaleUtils.toLowerString(s);671}672673public String value() {674return str;675}676677@Override678public int hashCode() {679return lowerStr.hashCode();680}681682@Override683public boolean equals(Object obj) {684if (this == obj) {685return true;686}687if (!(obj instanceof CaseInsensitiveString)) {688return false;689}690return lowerStr.equals(((CaseInsensitiveString)obj).lowerStr);691}692}693694static final class CaseInsensitiveChar {695private final char ch, lowerCh;696697/**698* Constructs a CaseInsensitiveChar with the first char of the699* given s.700*/701private CaseInsensitiveChar(String s) {702this(s.charAt(0));703}704705CaseInsensitiveChar(char c) {706ch = c;707lowerCh = LocaleUtils.toLower(ch);708}709710public char value() {711return ch;712}713714@Override715public int hashCode() {716return lowerCh;717}718719@Override720public boolean equals(Object obj) {721if (this == obj) {722return true;723}724if (!(obj instanceof CaseInsensitiveChar)) {725return false;726}727return lowerCh == ((CaseInsensitiveChar)obj).lowerCh;728}729}730}731732733