1 /*
2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package sun.util.locale;
27 
28 import java.util.ArrayList;
29 import java.util.Collection;
30 import java.util.HashMap;
31 import java.util.Iterator;
32 import java.util.LinkedHashMap;
33 import java.util.LinkedList;
34 import java.util.List;
35 import java.util.Locale;
36 import java.util.Locale.*;
37 import static java.util.Locale.FilteringMode.*;
38 import static java.util.Locale.LanguageRange.*;
39 import java.util.Map;
40 import java.util.Set;
41 
42 /**
43  * Implementation for BCP47 Locale matching
44  *
45  */
46 public final class LocaleMatcher {
47 
filter(List<LanguageRange> priorityList, Collection<Locale> locales, FilteringMode mode)48     public static List<Locale> filter(List<LanguageRange> priorityList,
49                                       Collection<Locale> locales,
50                                       FilteringMode mode) {
51         if (priorityList.isEmpty() || locales.isEmpty()) {
52             return new ArrayList<>(); // need to return a empty mutable List
53         }
54 
55         // Create a list of language tags to be matched.
56         List<String> tags = new ArrayList<>();
57         for (Locale locale : locales) {
58             tags.add(locale.toLanguageTag());
59         }
60 
61         // Filter language tags.
62         List<String> filteredTags = filterTags(priorityList, tags, mode);
63 
64         // Create a list of matching locales.
65         List<Locale> filteredLocales = new ArrayList<>(filteredTags.size());
66         for (String tag : filteredTags) {
67               filteredLocales.add(Locale.forLanguageTag(tag));
68         }
69 
70         return filteredLocales;
71     }
72 
filterTags(List<LanguageRange> priorityList, Collection<String> tags, FilteringMode mode)73     public static List<String> filterTags(List<LanguageRange> priorityList,
74                                           Collection<String> tags,
75                                           FilteringMode mode) {
76         if (priorityList.isEmpty() || tags.isEmpty()) {
77             return new ArrayList<>(); // need to return a empty mutable List
78         }
79 
80         ArrayList<LanguageRange> list;
81         if (mode == EXTENDED_FILTERING) {
82             return filterExtended(priorityList, tags);
83         } else {
84             list = new ArrayList<>();
85             for (LanguageRange lr : priorityList) {
86                 String range = lr.getRange();
87                 if (range.startsWith("*-")
88                     || range.indexOf("-*") != -1) { // Extended range
89                     if (mode == AUTOSELECT_FILTERING) {
90                         return filterExtended(priorityList, tags);
91                     } else if (mode == MAP_EXTENDED_RANGES) {
92                         if (range.charAt(0) == '*') {
93                             range = "*";
94                         } else {
95                             range = range.replaceAll("-[*]", "");
96                         }
97                         list.add(new LanguageRange(range, lr.getWeight()));
98                     } else if (mode == REJECT_EXTENDED_RANGES) {
99                         throw new IllegalArgumentException("An extended range \""
100                                       + range
101                                       + "\" found in REJECT_EXTENDED_RANGES mode.");
102                     }
103                 } else { // Basic range
104                     list.add(lr);
105                 }
106             }
107 
108             return filterBasic(list, tags);
109         }
110     }
111 
filterBasic(List<LanguageRange> priorityList, Collection<String> tags)112     private static List<String> filterBasic(List<LanguageRange> priorityList,
113                                             Collection<String> tags) {
114         List<String> list = new ArrayList<>();
115         for (LanguageRange lr : priorityList) {
116             String range = lr.getRange();
117             if (range.equals("*")) {
118                 return new ArrayList<String>(tags);
119             } else {
120                 for (String tag : tags) {
121                     tag = tag.toLowerCase();
122                     if (tag.startsWith(range)) {
123                         int len = range.length();
124                         if ((tag.length() == len || tag.charAt(len) == '-')
125                             && !list.contains(tag)) {
126                             list.add(tag);
127                         }
128                     }
129                 }
130             }
131         }
132 
133         return list;
134     }
135 
filterExtended(List<LanguageRange> priorityList, Collection<String> tags)136     private static List<String> filterExtended(List<LanguageRange> priorityList,
137                                                Collection<String> tags) {
138         List<String> list = new ArrayList<>();
139         for (LanguageRange lr : priorityList) {
140             String range = lr.getRange();
141             if (range.equals("*")) {
142                 return new ArrayList<String>(tags);
143             }
144             String[] rangeSubtags = range.split("-");
145             for (String tag : tags) {
146                 tag = tag.toLowerCase();
147                 String[] tagSubtags = tag.split("-");
148                 if (!rangeSubtags[0].equals(tagSubtags[0])
149                     && !rangeSubtags[0].equals("*")) {
150                     continue;
151                 }
152 
153                 int rangeIndex = 1;
154                 int tagIndex = 1;
155 
156                 while (rangeIndex < rangeSubtags.length
157                        && tagIndex < tagSubtags.length) {
158                    if (rangeSubtags[rangeIndex].equals("*")) {
159                        rangeIndex++;
160                    } else if (rangeSubtags[rangeIndex].equals(tagSubtags[tagIndex])) {
161                        rangeIndex++;
162                        tagIndex++;
163                    } else if (tagSubtags[tagIndex].length() == 1
164                               && !tagSubtags[tagIndex].equals("*")) {
165                        break;
166                    } else {
167                        tagIndex++;
168                    }
169                }
170 
171                if (rangeSubtags.length == rangeIndex && !list.contains(tag)) {
172                    list.add(tag);
173                }
174             }
175         }
176 
177         return list;
178     }
179 
lookup(List<LanguageRange> priorityList, Collection<Locale> locales)180     public static Locale lookup(List<LanguageRange> priorityList,
181                                 Collection<Locale> locales) {
182         if (priorityList.isEmpty() || locales.isEmpty()) {
183             return null;
184         }
185 
186         // Create a list of language tags to be matched.
187         List<String> tags = new ArrayList<>();
188         for (Locale locale : locales) {
189             tags.add(locale.toLanguageTag());
190         }
191 
192         // Look up a language tags.
193         String lookedUpTag = lookupTag(priorityList, tags);
194 
195         if (lookedUpTag == null) {
196             return null;
197         } else {
198             return Locale.forLanguageTag(lookedUpTag);
199         }
200     }
201 
lookupTag(List<LanguageRange> priorityList, Collection<String> tags)202     public static String lookupTag(List<LanguageRange> priorityList,
203                                    Collection<String> tags) {
204         if (priorityList.isEmpty() || tags.isEmpty()) {
205             return null;
206         }
207 
208         for (LanguageRange lr : priorityList) {
209             String range = lr.getRange();
210 
211             // Special language range ("*") is ignored in lookup.
212             if (range.equals("*")) {
213                 continue;
214             }
215 
216             String rangeForRegex = range.replace("*", "\\p{Alnum}*");
217             while (rangeForRegex.length() > 0) {
218                 for (String tag : tags) {
219                     tag = tag.toLowerCase();
220                     if (tag.matches(rangeForRegex)) {
221                         return tag;
222                     }
223                 }
224 
225                 // Truncate from the end....
226                 int index = rangeForRegex.lastIndexOf('-');
227                 if (index >= 0) {
228                     rangeForRegex = rangeForRegex.substring(0, index);
229 
230                     // if range ends with an extension key, truncate it.
231                     if (rangeForRegex.lastIndexOf('-') == rangeForRegex.length()-2) {
232                         rangeForRegex =
233                             rangeForRegex.substring(0, rangeForRegex.length()-2);
234                     }
235                 } else {
236                     rangeForRegex = "";
237                 }
238             }
239         }
240 
241         return null;
242     }
243 
parse(String ranges)244     public static List<LanguageRange> parse(String ranges) {
245         ranges = ranges.replace(" ", "").toLowerCase();
246         if (ranges.startsWith("accept-language:")) {
247             ranges = ranges.substring(16); // delete unnecessary prefix
248         }
249 
250         String[] langRanges = ranges.split(",");
251         List<LanguageRange> list = new ArrayList<>(langRanges.length);
252         List<String> tempList = new ArrayList<>();
253         int numOfRanges = 0;
254 
255         for (String range : langRanges) {
256             int index;
257             String r;
258             double w;
259 
260             if ((index = range.indexOf(";q=")) == -1) {
261                 r = range;
262                 w = MAX_WEIGHT;
263             } else {
264                 r = range.substring(0, index);
265                 index += 3;
266                 try {
267                     w = Double.parseDouble(range.substring(index));
268                 }
269                 catch (Exception e) {
270                     throw new IllegalArgumentException("weight=\""
271                                   + range.substring(index)
272                                   + "\" for language range \"" + r + "\"");
273                 }
274 
275                 if (w < MIN_WEIGHT || w > MAX_WEIGHT) {
276                     throw new IllegalArgumentException("weight=" + w
277                                   + " for language range \"" + r
278                                   + "\". It must be between " + MIN_WEIGHT
279                                   + " and " + MAX_WEIGHT + ".");
280                 }
281             }
282 
283             if (!tempList.contains(r)) {
284                 LanguageRange lr = new LanguageRange(r, w);
285                 index = numOfRanges;
286                 for (int j = 0; j < numOfRanges; j++) {
287                     if (list.get(j).getWeight() < w) {
288                         index = j;
289                         break;
290                     }
291                 }
292                 list.add(index, lr);
293                 numOfRanges++;
294                 tempList.add(r);
295 
296                 // Check if the range has an equivalent using IANA LSR data.
297                 // If yes, add it to the User's Language Priority List as well.
298 
299                 // aa-XX -> aa-YY
300                 String equivalent;
301                 if ((equivalent = getEquivalentForRegionAndVariant(r)) != null
302                     && !tempList.contains(equivalent)) {
303                     list.add(index+1, new LanguageRange(equivalent, w));
304                     numOfRanges++;
305                     tempList.add(equivalent);
306                 }
307 
308                 String[] equivalents;
309                 if ((equivalents = getEquivalentsForLanguage(r)) != null) {
310                     for (String equiv: equivalents) {
311                         // aa-XX -> bb-XX(, cc-XX)
312                         if (!tempList.contains(equiv)) {
313                             list.add(index+1, new LanguageRange(equiv, w));
314                             numOfRanges++;
315                             tempList.add(equiv);
316                         }
317 
318                         // bb-XX -> bb-YY(, cc-YY)
319                         equivalent = getEquivalentForRegionAndVariant(equiv);
320                         if (equivalent != null
321                             && !tempList.contains(equivalent)) {
322                             list.add(index+1, new LanguageRange(equivalent, w));
323                             numOfRanges++;
324                             tempList.add(equivalent);
325                         }
326                     }
327                 }
328             }
329         }
330 
331         return list;
332     }
333 
334     /**
335      * A faster alternative approach to String.replaceFirst(), if the given
336      * string is a literal String, not a regex.
337      */
replaceFirstSubStringMatch(String range, String substr, String replacement)338     private static String replaceFirstSubStringMatch(String range,
339             String substr, String replacement) {
340         int pos = range.indexOf(substr);
341         if (pos == -1) {
342             return range;
343         } else {
344             return range.substring(0, pos) + replacement
345                     + range.substring(pos + substr.length());
346         }
347     }
348 
getEquivalentsForLanguage(String range)349     private static String[] getEquivalentsForLanguage(String range) {
350         String r = range;
351 
352         while (r.length() > 0) {
353             if (LocaleEquivalentMaps.singleEquivMap.containsKey(r)) {
354                 String equiv = LocaleEquivalentMaps.singleEquivMap.get(r);
355                 // Return immediately for performance if the first matching
356                 // subtag is found.
357                 return new String[]{replaceFirstSubStringMatch(range,
358                     r, equiv)};
359             } else if (LocaleEquivalentMaps.multiEquivsMap.containsKey(r)) {
360                 String[] equivs = LocaleEquivalentMaps.multiEquivsMap.get(r);
361                 String[] result = new String[equivs.length];
362                 for (int i = 0; i < equivs.length; i++) {
363                     result[i] = replaceFirstSubStringMatch(range,
364                             r, equivs[i]);
365                 }
366                 return result;
367             }
368 
369             // Truncate the last subtag simply.
370             int index = r.lastIndexOf('-');
371             if (index == -1) {
372                 break;
373             }
374             r = r.substring(0, index);
375         }
376 
377         return null;
378     }
379 
getEquivalentForRegionAndVariant(String range)380     private static String getEquivalentForRegionAndVariant(String range) {
381         int extensionKeyIndex = getExtentionKeyIndex(range);
382 
383         for (String subtag : LocaleEquivalentMaps.regionVariantEquivMap.keySet()) {
384             int index;
385             if ((index = range.indexOf(subtag)) != -1) {
386                 // Check if the matching text is a valid region or variant.
387                 if (extensionKeyIndex != Integer.MIN_VALUE
388                     && index > extensionKeyIndex) {
389                     continue;
390                 }
391 
392                 int len = index + subtag.length();
393                 if (range.length() == len || range.charAt(len) == '-') {
394                     return replaceFirstSubStringMatch(range, subtag,
395                             LocaleEquivalentMaps.regionVariantEquivMap
396                                     .get(subtag));
397                 }
398             }
399         }
400 
401         return null;
402     }
403 
getExtentionKeyIndex(String s)404     private static int getExtentionKeyIndex(String s) {
405         char[] c = s.toCharArray();
406         int index = Integer.MIN_VALUE;
407         for (int i = 1; i < c.length; i++) {
408             if (c[i] == '-') {
409                 if (i - index == 2) {
410                     return index;
411                 } else {
412                     index = i;
413                 }
414             }
415         }
416         return Integer.MIN_VALUE;
417     }
418 
mapEquivalents( List<LanguageRange>priorityList, Map<String, List<String>> map)419     public static List<LanguageRange> mapEquivalents(
420                                           List<LanguageRange>priorityList,
421                                           Map<String, List<String>> map) {
422         if (priorityList.isEmpty()) {
423             return new ArrayList<>(); // need to return a empty mutable List
424         }
425         if (map == null || map.isEmpty()) {
426             return new ArrayList<LanguageRange>(priorityList);
427         }
428 
429         // Create a map, key=originalKey.toLowerCaes(), value=originalKey
430         Map<String, String> keyMap = new HashMap<>();
431         for (String key : map.keySet()) {
432             keyMap.put(key.toLowerCase(), key);
433         }
434 
435         List<LanguageRange> list = new ArrayList<>();
436         for (LanguageRange lr : priorityList) {
437             String range = lr.getRange();
438             String r = range;
439             boolean hasEquivalent = false;
440 
441             while (r.length() > 0) {
442                 if (keyMap.containsKey(r)) {
443                     hasEquivalent = true;
444                     List<String> equivalents = map.get(keyMap.get(r));
445                     if (equivalents != null) {
446                         int len = r.length();
447                         for (String equivalent : equivalents) {
448                             list.add(new LanguageRange(equivalent.toLowerCase()
449                                      + range.substring(len),
450                                      lr.getWeight()));
451                         }
452                     }
453                     // Return immediately if the first matching subtag is found.
454                     break;
455                 }
456 
457                 // Truncate the last subtag simply.
458                 int index = r.lastIndexOf('-');
459                 if (index == -1) {
460                     break;
461                 }
462                 r = r.substring(0, index);
463             }
464 
465             if (!hasEquivalent) {
466                 list.add(lr);
467             }
468         }
469 
470         return list;
471     }
472 
LocaleMatcher()473     private LocaleMatcher() {}
474 
475 }
476