Path: blob/master/src/java.base/share/classes/sun/util/locale/LocaleMatcher.java
41159 views
/*1* Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package sun.util.locale;2627import java.util.ArrayList;28import java.util.Collection;29import java.util.HashMap;30import java.util.List;31import java.util.Locale;32import java.util.Locale.*;33import static java.util.Locale.FilteringMode.*;34import static java.util.Locale.LanguageRange.*;35import java.util.Map;36import java.util.Set;37import java.util.TreeSet;3839/**40* Implementation for BCP47 Locale matching41*42*/43public final class LocaleMatcher {4445public static List<Locale> filter(List<LanguageRange> priorityList,46Collection<Locale> locales,47FilteringMode mode) {48if (priorityList.isEmpty() || locales.isEmpty()) {49return new ArrayList<>(); // need to return a empty mutable List50}5152// Create a list of language tags to be matched.53List<String> tags = new ArrayList<>();54for (Locale locale : locales) {55tags.add(locale.toLanguageTag());56}5758// Filter language tags.59List<String> filteredTags = filterTags(priorityList, tags, mode);6061// Create a list of matching locales.62List<Locale> filteredLocales = new ArrayList<>(filteredTags.size());63for (String tag : filteredTags) {64filteredLocales.add(Locale.forLanguageTag(tag));65}6667return filteredLocales;68}6970public static List<String> filterTags(List<LanguageRange> priorityList,71Collection<String> tags,72FilteringMode mode) {73if (priorityList.isEmpty() || tags.isEmpty()) {74return new ArrayList<>(); // need to return a empty mutable List75}7677ArrayList<LanguageRange> list;78if (mode == EXTENDED_FILTERING) {79return filterExtended(priorityList, tags);80} else {81list = new ArrayList<>();82for (LanguageRange lr : priorityList) {83String range = lr.getRange();84if (range.startsWith("*-")85|| range.indexOf("-*") != -1) { // Extended range86if (mode == AUTOSELECT_FILTERING) {87return filterExtended(priorityList, tags);88} else if (mode == MAP_EXTENDED_RANGES) {89if (range.charAt(0) == '*') {90range = "*";91} else {92range = range.replaceAll("-[*]", "");93}94list.add(new LanguageRange(range, lr.getWeight()));95} else if (mode == REJECT_EXTENDED_RANGES) {96throw new IllegalArgumentException("An extended range \""97+ range98+ "\" found in REJECT_EXTENDED_RANGES mode.");99}100} else { // Basic range101list.add(lr);102}103}104105return filterBasic(list, tags);106}107}108109private static List<String> filterBasic(List<LanguageRange> priorityList,110Collection<String> tags) {111int splitIndex = splitRanges(priorityList);112List<LanguageRange> nonZeroRanges;113List<LanguageRange> zeroRanges;114if (splitIndex != -1) {115nonZeroRanges = priorityList.subList(0, splitIndex);116zeroRanges = priorityList.subList(splitIndex, priorityList.size());117} else {118nonZeroRanges = priorityList;119zeroRanges = List.of();120}121122List<String> list = new ArrayList<>();123for (LanguageRange lr : nonZeroRanges) {124String range = lr.getRange();125if (range.equals("*")) {126tags = removeTagsMatchingBasicZeroRange(zeroRanges, tags);127return new ArrayList<String>(tags);128} else {129for (String tag : tags) {130// change to lowercase for case-insensitive matching131String lowerCaseTag = tag.toLowerCase(Locale.ROOT);132if (lowerCaseTag.startsWith(range)) {133int len = range.length();134if ((lowerCaseTag.length() == len135|| lowerCaseTag.charAt(len) == '-')136&& !caseInsensitiveMatch(list, lowerCaseTag)137&& !shouldIgnoreFilterBasicMatch(zeroRanges,138lowerCaseTag)) {139// preserving the case of the input tag140list.add(tag);141}142}143}144}145}146147return list;148}149150/**151* Removes the tag(s) which are falling in the basic exclusion range(s) i.e152* range(s) with q=0 and returns the updated collection. If the basic153* language ranges contains '*' as one of its non zero range then instead of154* returning all the tags, remove those which are matching the range with155* quality weight q=0.156*/157private static Collection<String> removeTagsMatchingBasicZeroRange(158List<LanguageRange> zeroRange, Collection<String> tags) {159if (zeroRange.isEmpty()) {160tags = removeDuplicates(tags);161return tags;162}163164List<String> matchingTags = new ArrayList<>();165for (String tag : tags) {166// change to lowercase for case-insensitive matching167String lowerCaseTag = tag.toLowerCase(Locale.ROOT);168if (!shouldIgnoreFilterBasicMatch(zeroRange, lowerCaseTag)169&& !caseInsensitiveMatch(matchingTags, lowerCaseTag)) {170matchingTags.add(tag); // preserving the case of the input tag171}172}173174return matchingTags;175}176177/**178* Remove duplicate tags from the given {@code tags} by179* ignoring case considerations.180*/181private static Collection<String> removeDuplicates(182Collection<String> tags) {183Set<String> distinctTags = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);184return tags.stream().filter(x -> distinctTags.add(x))185.toList();186}187188/**189* Returns true if the given {@code list} contains an element which matches190* with the given {@code tag} ignoring case considerations.191*/192private static boolean caseInsensitiveMatch(List<String> list, String tag) {193return list.stream().anyMatch((element)194-> (element.equalsIgnoreCase(tag)));195}196197/**198* The tag which is falling in the basic exclusion range(s) should not199* be considered as the matching tag. Ignores the tag matching with the200* non-zero ranges, if the tag also matches with one of the basic exclusion201* ranges i.e. range(s) having quality weight q=0202*/203private static boolean shouldIgnoreFilterBasicMatch(204List<LanguageRange> zeroRange, String tag) {205if (zeroRange.isEmpty()) {206return false;207}208209for (LanguageRange lr : zeroRange) {210String range = lr.getRange();211if (range.equals("*")) {212return true;213}214if (tag.startsWith(range)) {215int len = range.length();216if ((tag.length() == len || tag.charAt(len) == '-')) {217return true;218}219}220}221222return false;223}224225private static List<String> filterExtended(List<LanguageRange> priorityList,226Collection<String> tags) {227int splitIndex = splitRanges(priorityList);228List<LanguageRange> nonZeroRanges;229List<LanguageRange> zeroRanges;230if (splitIndex != -1) {231nonZeroRanges = priorityList.subList(0, splitIndex);232zeroRanges = priorityList.subList(splitIndex, priorityList.size());233} else {234nonZeroRanges = priorityList;235zeroRanges = List.of();236}237238List<String> list = new ArrayList<>();239for (LanguageRange lr : nonZeroRanges) {240String range = lr.getRange();241if (range.equals("*")) {242tags = removeTagsMatchingExtendedZeroRange(zeroRanges, tags);243return new ArrayList<String>(tags);244}245String[] rangeSubtags = range.split("-");246for (String tag : tags) {247// change to lowercase for case-insensitive matching248String lowerCaseTag = tag.toLowerCase(Locale.ROOT);249String[] tagSubtags = lowerCaseTag.split("-");250if (!rangeSubtags[0].equals(tagSubtags[0])251&& !rangeSubtags[0].equals("*")) {252continue;253}254255int rangeIndex = matchFilterExtendedSubtags(rangeSubtags,256tagSubtags);257if (rangeSubtags.length == rangeIndex258&& !caseInsensitiveMatch(list, lowerCaseTag)259&& !shouldIgnoreFilterExtendedMatch(zeroRanges,260lowerCaseTag)) {261list.add(tag); // preserve the case of the input tag262}263}264}265266return list;267}268269/**270* Removes the tag(s) which are falling in the extended exclusion range(s)271* i.e range(s) with q=0 and returns the updated collection. If the extended272* language ranges contains '*' as one of its non zero range then instead of273* returning all the tags, remove those which are matching the range with274* quality weight q=0.275*/276private static Collection<String> removeTagsMatchingExtendedZeroRange(277List<LanguageRange> zeroRange, Collection<String> tags) {278if (zeroRange.isEmpty()) {279tags = removeDuplicates(tags);280return tags;281}282283List<String> matchingTags = new ArrayList<>();284for (String tag : tags) {285// change to lowercase for case-insensitive matching286String lowerCaseTag = tag.toLowerCase(Locale.ROOT);287if (!shouldIgnoreFilterExtendedMatch(zeroRange, lowerCaseTag)288&& !caseInsensitiveMatch(matchingTags, lowerCaseTag)) {289matchingTags.add(tag); // preserve the case of the input tag290}291}292293return matchingTags;294}295296/**297* The tag which is falling in the extended exclusion range(s) should298* not be considered as the matching tag. Ignores the tag matching with the299* non zero range(s), if the tag also matches with one of the extended300* exclusion range(s) i.e. range(s) having quality weight q=0301*/302private static boolean shouldIgnoreFilterExtendedMatch(303List<LanguageRange> zeroRange, String tag) {304if (zeroRange.isEmpty()) {305return false;306}307308String[] tagSubtags = tag.split("-");309for (LanguageRange lr : zeroRange) {310String range = lr.getRange();311if (range.equals("*")) {312return true;313}314315String[] rangeSubtags = range.split("-");316317if (!rangeSubtags[0].equals(tagSubtags[0])318&& !rangeSubtags[0].equals("*")) {319continue;320}321322int rangeIndex = matchFilterExtendedSubtags(rangeSubtags,323tagSubtags);324if (rangeSubtags.length == rangeIndex) {325return true;326}327}328329return false;330}331332private static int matchFilterExtendedSubtags(String[] rangeSubtags,333String[] tagSubtags) {334int rangeIndex = 1;335int tagIndex = 1;336337while (rangeIndex < rangeSubtags.length338&& tagIndex < tagSubtags.length) {339if (rangeSubtags[rangeIndex].equals("*")) {340rangeIndex++;341} else if (rangeSubtags[rangeIndex]342.equals(tagSubtags[tagIndex])) {343rangeIndex++;344tagIndex++;345} else if (tagSubtags[tagIndex].length() == 1346&& !tagSubtags[tagIndex].equals("*")) {347break;348} else {349tagIndex++;350}351}352return rangeIndex;353}354355public static Locale lookup(List<LanguageRange> priorityList,356Collection<Locale> locales) {357if (priorityList.isEmpty() || locales.isEmpty()) {358return null;359}360361// Create a list of language tags to be matched.362List<String> tags = new ArrayList<>();363for (Locale locale : locales) {364tags.add(locale.toLanguageTag());365}366367// Look up a language tags.368String lookedUpTag = lookupTag(priorityList, tags);369370if (lookedUpTag == null) {371return null;372} else {373return Locale.forLanguageTag(lookedUpTag);374}375}376377public static String lookupTag(List<LanguageRange> priorityList,378Collection<String> tags) {379if (priorityList.isEmpty() || tags.isEmpty()) {380return null;381}382383int splitIndex = splitRanges(priorityList);384List<LanguageRange> nonZeroRanges;385List<LanguageRange> zeroRanges;386if (splitIndex != -1) {387nonZeroRanges = priorityList.subList(0, splitIndex);388zeroRanges = priorityList.subList(splitIndex, priorityList.size());389} else {390nonZeroRanges = priorityList;391zeroRanges = List.of();392}393394for (LanguageRange lr : nonZeroRanges) {395String range = lr.getRange();396397// Special language range ("*") is ignored in lookup.398if (range.equals("*")) {399continue;400}401402String rangeForRegex = range.replace("*", "\\p{Alnum}*");403while (!rangeForRegex.isEmpty()) {404for (String tag : tags) {405// change to lowercase for case-insensitive matching406String lowerCaseTag = tag.toLowerCase(Locale.ROOT);407if (lowerCaseTag.matches(rangeForRegex)408&& !shouldIgnoreLookupMatch(zeroRanges, lowerCaseTag)) {409return tag; // preserve the case of the input tag410}411}412413// Truncate from the end....414rangeForRegex = truncateRange(rangeForRegex);415}416}417418return null;419}420421/**422* The tag which is falling in the exclusion range(s) should not be423* considered as the matching tag. Ignores the tag matching with the424* non zero range(s), if the tag also matches with one of the exclusion425* range(s) i.e. range(s) having quality weight q=0.426*/427private static boolean shouldIgnoreLookupMatch(List<LanguageRange> zeroRange,428String tag) {429for (LanguageRange lr : zeroRange) {430String range = lr.getRange();431432// Special language range ("*") is ignored in lookup.433if (range.equals("*")) {434continue;435}436437String rangeForRegex = range.replace("*", "\\p{Alnum}*");438while (!rangeForRegex.isEmpty()) {439if (tag.matches(rangeForRegex)) {440return true;441}442// Truncate from the end....443rangeForRegex = truncateRange(rangeForRegex);444}445}446447return false;448}449450/* Truncate the range from end during the lookup match */451private static String truncateRange(String rangeForRegex) {452int index = rangeForRegex.lastIndexOf('-');453if (index >= 0) {454rangeForRegex = rangeForRegex.substring(0, index);455456// if range ends with an extension key, truncate it.457index = rangeForRegex.lastIndexOf('-');458if (index >= 0 && index == rangeForRegex.length() - 2) {459rangeForRegex460= rangeForRegex.substring(0, rangeForRegex.length() - 2);461}462} else {463rangeForRegex = "";464}465466return rangeForRegex;467}468469/* Returns the split index of the priority list, if it contains470* language range(s) with quality weight as 0 i.e. q=0, else -1471*/472private static int splitRanges(List<LanguageRange> priorityList) {473int size = priorityList.size();474for (int index = 0; index < size; index++) {475LanguageRange range = priorityList.get(index);476if (range.getWeight() == 0) {477return index;478}479}480481return -1; // no q=0 range exists482}483484public static List<LanguageRange> parse(String ranges) {485ranges = ranges.replace(" ", "").toLowerCase(Locale.ROOT);486if (ranges.startsWith("accept-language:")) {487ranges = ranges.substring(16); // delete unnecessary prefix488}489490String[] langRanges = ranges.split(",");491List<LanguageRange> list = new ArrayList<>(langRanges.length);492List<String> tempList = new ArrayList<>();493int numOfRanges = 0;494495for (String range : langRanges) {496int index;497String r;498double w;499500if ((index = range.indexOf(";q=")) == -1) {501r = range;502w = MAX_WEIGHT;503} else {504r = range.substring(0, index);505index += 3;506try {507w = Double.parseDouble(range.substring(index));508}509catch (Exception e) {510throw new IllegalArgumentException("weight=\""511+ range.substring(index)512+ "\" for language range \"" + r + "\"");513}514515if (w < MIN_WEIGHT || w > MAX_WEIGHT) {516throw new IllegalArgumentException("weight=" + w517+ " for language range \"" + r518+ "\". It must be between " + MIN_WEIGHT519+ " and " + MAX_WEIGHT + ".");520}521}522523if (!tempList.contains(r)) {524LanguageRange lr = new LanguageRange(r, w);525index = numOfRanges;526for (int j = 0; j < numOfRanges; j++) {527if (list.get(j).getWeight() < w) {528index = j;529break;530}531}532list.add(index, lr);533numOfRanges++;534tempList.add(r);535536// Check if the range has an equivalent using IANA LSR data.537// If yes, add it to the User's Language Priority List as well.538539// aa-XX -> aa-YY540String equivalent;541if ((equivalent = getEquivalentForRegionAndVariant(r)) != null542&& !tempList.contains(equivalent)) {543list.add(index+1, new LanguageRange(equivalent, w));544numOfRanges++;545tempList.add(equivalent);546}547548String[] equivalents;549if ((equivalents = getEquivalentsForLanguage(r)) != null) {550for (String equiv: equivalents) {551// aa-XX -> bb-XX(, cc-XX)552if (!tempList.contains(equiv)) {553list.add(index+1, new LanguageRange(equiv, w));554numOfRanges++;555tempList.add(equiv);556}557558// bb-XX -> bb-YY(, cc-YY)559equivalent = getEquivalentForRegionAndVariant(equiv);560if (equivalent != null561&& !tempList.contains(equivalent)) {562list.add(index+1, new LanguageRange(equivalent, w));563numOfRanges++;564tempList.add(equivalent);565}566}567}568}569}570571return list;572}573574/**575* A faster alternative approach to String.replaceFirst(), if the given576* string is a literal String, not a regex.577*/578private static String replaceFirstSubStringMatch(String range,579String substr, String replacement) {580int pos = range.indexOf(substr);581if (pos == -1) {582return range;583} else {584return range.substring(0, pos) + replacement585+ range.substring(pos + substr.length());586}587}588589private static String[] getEquivalentsForLanguage(String range) {590String r = range;591592while (!r.isEmpty()) {593if (LocaleEquivalentMaps.singleEquivMap.containsKey(r)) {594String equiv = LocaleEquivalentMaps.singleEquivMap.get(r);595// Return immediately for performance if the first matching596// subtag is found.597return new String[]{replaceFirstSubStringMatch(range,598r, equiv)};599} else if (LocaleEquivalentMaps.multiEquivsMap.containsKey(r)) {600String[] equivs = LocaleEquivalentMaps.multiEquivsMap.get(r);601String[] result = new String[equivs.length];602for (int i = 0; i < equivs.length; i++) {603result[i] = replaceFirstSubStringMatch(range,604r, equivs[i]);605}606return result;607}608609// Truncate the last subtag simply.610int index = r.lastIndexOf('-');611if (index == -1) {612break;613}614r = r.substring(0, index);615}616617return null;618}619620private static String getEquivalentForRegionAndVariant(String range) {621int extensionKeyIndex = getExtentionKeyIndex(range);622623for (String subtag : LocaleEquivalentMaps.regionVariantEquivMap.keySet()) {624int index;625if ((index = range.indexOf(subtag)) != -1) {626// Check if the matching text is a valid region or variant.627if (extensionKeyIndex != Integer.MIN_VALUE628&& index > extensionKeyIndex) {629continue;630}631632int len = index + subtag.length();633if (range.length() == len || range.charAt(len) == '-') {634return replaceFirstSubStringMatch(range, subtag,635LocaleEquivalentMaps.regionVariantEquivMap636.get(subtag));637}638}639}640641return null;642}643644private static int getExtentionKeyIndex(String s) {645char[] c = s.toCharArray();646int index = Integer.MIN_VALUE;647for (int i = 1; i < c.length; i++) {648if (c[i] == '-') {649if (i - index == 2) {650return index;651} else {652index = i;653}654}655}656return Integer.MIN_VALUE;657}658659public static List<LanguageRange> mapEquivalents(660List<LanguageRange>priorityList,661Map<String, List<String>> map) {662if (priorityList.isEmpty()) {663return new ArrayList<>(); // need to return a empty mutable List664}665if (map == null || map.isEmpty()) {666return new ArrayList<LanguageRange>(priorityList);667}668669// Create a map, key=originalKey.toLowerCaes(), value=originalKey670Map<String, String> keyMap = new HashMap<>();671for (String key : map.keySet()) {672keyMap.put(key.toLowerCase(Locale.ROOT), key);673}674675List<LanguageRange> list = new ArrayList<>();676for (LanguageRange lr : priorityList) {677String range = lr.getRange();678String r = range;679boolean hasEquivalent = false;680681while (!r.isEmpty()) {682if (keyMap.containsKey(r)) {683hasEquivalent = true;684List<String> equivalents = map.get(keyMap.get(r));685if (equivalents != null) {686int len = r.length();687for (String equivalent : equivalents) {688list.add(new LanguageRange(equivalent.toLowerCase(Locale.ROOT)689+ range.substring(len),690lr.getWeight()));691}692}693// Return immediately if the first matching subtag is found.694break;695}696697// Truncate the last subtag simply.698int index = r.lastIndexOf('-');699if (index == -1) {700break;701}702r = r.substring(0, index);703}704705if (!hasEquivalent) {706list.add(lr);707}708}709710return list;711}712713private LocaleMatcher() {}714715}716717718