1 /*
2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package sun.util.locale;
27 
28 import java.util.ArrayList;
29 import java.util.Collection;
30 import java.util.HashMap;
31 import java.util.Iterator;
32 import java.util.LinkedHashMap;
33 import java.util.LinkedList;
34 import java.util.List;
35 import java.util.Locale;
36 import java.util.Locale.*;
37 import static java.util.Locale.FilteringMode.*;
38 import static java.util.Locale.LanguageRange.*;
39 import java.util.Map;
40 import java.util.Set;
41 
42 /**
43  * Implementation for BCP47 Locale matching
44  *
45  */
46 public final class LocaleMatcher {
47 
filter(List<LanguageRange> priorityList, Collection<Locale> locales, FilteringMode mode)48     public static List<Locale> filter(List<LanguageRange> priorityList,
49                                       Collection<Locale> locales,
50                                       FilteringMode mode) {
51         if (priorityList.isEmpty() || locales.isEmpty()) {
52             return new ArrayList<>(); // need to return a empty mutable List
53         }
54 
55         // Create a list of language tags to be matched.
56         List<String> tags = new ArrayList<>();
57         for (Locale locale : locales) {
58             tags.add(locale.toLanguageTag());
59         }
60 
61         // Filter language tags.
62         List<String> filteredTags = filterTags(priorityList, tags, mode);
63 
64         // Create a list of matching locales.
65         List<Locale> filteredLocales = new ArrayList<>(filteredTags.size());
66         for (String tag : filteredTags) {
67               filteredLocales.add(Locale.forLanguageTag(tag));
68         }
69 
70         return filteredLocales;
71     }
72 
filterTags(List<LanguageRange> priorityList, Collection<String> tags, FilteringMode mode)73     public static List<String> filterTags(List<LanguageRange> priorityList,
74                                           Collection<String> tags,
75                                           FilteringMode mode) {
76         if (priorityList.isEmpty() || tags.isEmpty()) {
77             return new ArrayList<>(); // need to return a empty mutable List
78         }
79 
80         ArrayList<LanguageRange> list;
81         if (mode == EXTENDED_FILTERING) {
82             return filterExtended(priorityList, tags);
83         } else {
84             list = new ArrayList<>();
85             for (LanguageRange lr : priorityList) {
86                 String range = lr.getRange();
87                 if (range.startsWith("*-")
88                     || range.indexOf("-*") != -1) { // Extended range
89                     if (mode == AUTOSELECT_FILTERING) {
90                         return filterExtended(priorityList, tags);
91                     } else if (mode == MAP_EXTENDED_RANGES) {
92                         if (range.charAt(0) == '*') {
93                             range = "*";
94                         } else {
95                             range = range.replaceAll("-[*]", "");
96                         }
97                         list.add(new LanguageRange(range, lr.getWeight()));
98                     } else if (mode == REJECT_EXTENDED_RANGES) {
99                         throw new IllegalArgumentException("An extended range \""
100                                       + range
101                                       + "\" found in REJECT_EXTENDED_RANGES mode.");
102                     }
103                 } else { // Basic range
104                     list.add(lr);
105                 }
106             }
107 
108             return filterBasic(list, tags);
109         }
110     }
111 
filterBasic(List<LanguageRange> priorityList, Collection<String> tags)112     private static List<String> filterBasic(List<LanguageRange> priorityList,
113                                             Collection<String> tags) {
114         List<String> list = new ArrayList<>();
115         for (LanguageRange lr : priorityList) {
116             String range = lr.getRange();
117             if (range.equals("*")) {
118                 return new ArrayList<String>(tags);
119             } else {
120                 for (String tag : tags) {
121                     tag = tag.toLowerCase();
122                     if (tag.startsWith(range)) {
123                         int len = range.length();
124                         if ((tag.length() == len || tag.charAt(len) == '-')
125                             && !list.contains(tag)) {
126                             list.add(tag);
127                         }
128                     }
129                 }
130             }
131         }
132 
133         return list;
134     }
135 
filterExtended(List<LanguageRange> priorityList, Collection<String> tags)136     private static List<String> filterExtended(List<LanguageRange> priorityList,
137                                                Collection<String> tags) {
138         List<String> list = new ArrayList<>();
139         for (LanguageRange lr : priorityList) {
140             String range = lr.getRange();
141             if (range.equals("*")) {
142                 return new ArrayList<String>(tags);
143             }
144             String[] rangeSubtags = range.split("-");
145             for (String tag : tags) {
146                 tag = tag.toLowerCase();
147                 String[] tagSubtags = tag.split("-");
148                 if (!rangeSubtags[0].equals(tagSubtags[0])
149                     && !rangeSubtags[0].equals("*")) {
150                     continue;
151                 }
152 
153                 int rangeIndex = 1;
154                 int tagIndex = 1;
155 
156                 while (rangeIndex < rangeSubtags.length
157                        && tagIndex < tagSubtags.length) {
158                    if (rangeSubtags[rangeIndex].equals("*")) {
159                        rangeIndex++;
160                    } else if (rangeSubtags[rangeIndex].equals(tagSubtags[tagIndex])) {
161                        rangeIndex++;
162                        tagIndex++;
163                    } else if (tagSubtags[tagIndex].length() == 1
164                               && !tagSubtags[tagIndex].equals("*")) {
165                        break;
166                    } else {
167                        tagIndex++;
168                    }
169                }
170 
171                if (rangeSubtags.length == rangeIndex && !list.contains(tag)) {
172                    list.add(tag);
173                }
174             }
175         }
176 
177         return list;
178     }
179 
lookup(List<LanguageRange> priorityList, Collection<Locale> locales)180     public static Locale lookup(List<LanguageRange> priorityList,
181                                 Collection<Locale> locales) {
182         if (priorityList.isEmpty() || locales.isEmpty()) {
183             return null;
184         }
185 
186         // Create a list of language tags to be matched.
187         List<String> tags = new ArrayList<>();
188         for (Locale locale : locales) {
189             tags.add(locale.toLanguageTag());
190         }
191 
192         // Look up a language tags.
193         String lookedUpTag = lookupTag(priorityList, tags);
194 
195         if (lookedUpTag == null) {
196             return null;
197         } else {
198             return Locale.forLanguageTag(lookedUpTag);
199         }
200     }
201 
lookupTag(List<LanguageRange> priorityList, Collection<String> tags)202     public static String lookupTag(List<LanguageRange> priorityList,
203                                    Collection<String> tags) {
204         if (priorityList.isEmpty() || tags.isEmpty()) {
205             return null;
206         }
207 
208         for (LanguageRange lr : priorityList) {
209             String range = lr.getRange();
210 
211             // Special language range ("*") is ignored in lookup.
212             if (range.equals("*")) {
213                 continue;
214             }
215             // Android-changed: backport OpenJDK 9 fix for JDK-8166994.
216             String rangeForRegex = range.replace("*", "\\p{Alnum}*");
217             while (rangeForRegex.length() > 0) {
218                 for (String tag : tags) {
219                     tag = tag.toLowerCase();
220                     if (tag.matches(rangeForRegex)) {
221                         return tag;
222                     }
223                 }
224 
225                 // Truncate from the end....
226                 int index = rangeForRegex.lastIndexOf('-');
227                 if (index >= 0) {
228                     rangeForRegex = rangeForRegex.substring(0, index);
229 
230                     // if range ends with an extension key, truncate it.
231                     if (rangeForRegex.lastIndexOf('-') == rangeForRegex.length()-2) {
232                         rangeForRegex =
233                             rangeForRegex.substring(0, rangeForRegex.length()-2);
234                     }
235                 } else {
236                     rangeForRegex = "";
237                 }
238             }
239         }
240 
241         return null;
242     }
243 
parse(String ranges)244     public static List<LanguageRange> parse(String ranges) {
245         // Android-changed: backport OpenJDK 9 fix for JDK-8166994.
246         ranges = ranges.replace(" ", "").toLowerCase();
247         if (ranges.startsWith("accept-language:")) {
248             ranges = ranges.substring(16); // delete unnecessary prefix
249         }
250 
251         String[] langRanges = ranges.split(",");
252         List<LanguageRange> list = new ArrayList<>(langRanges.length);
253         List<String> tempList = new ArrayList<>();
254         int numOfRanges = 0;
255 
256         for (String range : langRanges) {
257             int index;
258             String r;
259             double w;
260 
261             if ((index = range.indexOf(";q=")) == -1) {
262                 r = range;
263                 w = MAX_WEIGHT;
264             } else {
265                 r = range.substring(0, index);
266                 index += 3;
267                 try {
268                     w = Double.parseDouble(range.substring(index));
269                 }
270                 catch (Exception e) {
271                     throw new IllegalArgumentException("weight=\""
272                                   + range.substring(index)
273                                   + "\" for language range \"" + r + "\"");
274                 }
275 
276                 if (w < MIN_WEIGHT || w > MAX_WEIGHT) {
277                     throw new IllegalArgumentException("weight=" + w
278                                   + " for language range \"" + r
279                                   + "\". It must be between " + MIN_WEIGHT
280                                   + " and " + MAX_WEIGHT + ".");
281                 }
282             }
283 
284             if (!tempList.contains(r)) {
285                 LanguageRange lr = new LanguageRange(r, w);
286                 index = numOfRanges;
287                 for (int j = 0; j < numOfRanges; j++) {
288                     if (list.get(j).getWeight() < w) {
289                         index = j;
290                         break;
291                     }
292                 }
293                 list.add(index, lr);
294                 numOfRanges++;
295                 tempList.add(r);
296 
297                 // Check if the range has an equivalent using IANA LSR data.
298                 // If yes, add it to the User's Language Priority List as well.
299 
300                 // aa-XX -> aa-YY
301                 String equivalent;
302                 if ((equivalent = getEquivalentForRegionAndVariant(r)) != null
303                     && !tempList.contains(equivalent)) {
304                     list.add(index+1, new LanguageRange(equivalent, w));
305                     numOfRanges++;
306                     tempList.add(equivalent);
307                 }
308 
309                 String[] equivalents;
310                 if ((equivalents = getEquivalentsForLanguage(r)) != null) {
311                     for (String equiv: equivalents) {
312                         // aa-XX -> bb-XX(, cc-XX)
313                         if (!tempList.contains(equiv)) {
314                             list.add(index+1, new LanguageRange(equiv, w));
315                             numOfRanges++;
316                             tempList.add(equiv);
317                         }
318 
319                         // bb-XX -> bb-YY(, cc-YY)
320                         equivalent = getEquivalentForRegionAndVariant(equiv);
321                         if (equivalent != null
322                             && !tempList.contains(equivalent)) {
323                             list.add(index+1, new LanguageRange(equivalent, w));
324                             numOfRanges++;
325                             tempList.add(equivalent);
326                         }
327                     }
328                 }
329             }
330         }
331 
332         return list;
333     }
334 
335     // BEGIN Android-added: backport OpenJDK 9 fix for JDK-8166994.
336     /**
337      * A faster alternative approach to String.replaceFirst(), if the given
338      * string is a literal String, not a regex.
339      */
replaceFirstSubStringMatch(String range, String substr, String replacement)340     private static String replaceFirstSubStringMatch(String range,
341             String substr, String replacement) {
342         int pos = range.indexOf(substr);
343         if (pos == -1) {
344             return range;
345         } else {
346             return range.substring(0, pos) + replacement
347                     + range.substring(pos + substr.length());
348         }
349     }
350     // END Android-added: backport OpenJDK 9 fix for JDK-8166994.
351 
getEquivalentsForLanguage(String range)352     private static String[] getEquivalentsForLanguage(String range) {
353         String r = range;
354 
355         while (r.length() > 0) {
356             if (LocaleEquivalentMaps.singleEquivMap.containsKey(r)) {
357                 String equiv = LocaleEquivalentMaps.singleEquivMap.get(r);
358                 // Return immediately for performance if the first matching
359                 // subtag is found.
360 // BEGIN Android-added: backport OpenJDK 9 fix for JDK-8166994.
361 // Upstream bug: https://bugs.openjdk.java.net/browse/JDK-8166994
362 // Upstream fix: http://hg.openjdk.java.net/jdk9/dev/jdk/rev/60837db5d445
363                 return new String[]{replaceFirstSubStringMatch(range,
364                     r, equiv)};
365             } else if (LocaleEquivalentMaps.multiEquivsMap.containsKey(r)) {
366                 String[] equivs = LocaleEquivalentMaps.multiEquivsMap.get(r);
367                 String[] result = new String[equivs.length];
368                 for (int i = 0; i < equivs.length; i++) {
369                     result[i] = replaceFirstSubStringMatch(range,
370                             r, equivs[i]);
371                 }
372                 return result;
373 // END Android-added: backport OpenJDK 9 fix for JDK-8166994.
374             }
375 
376             // Truncate the last subtag simply.
377             int index = r.lastIndexOf('-');
378             if (index == -1) {
379                 break;
380             }
381             r = r.substring(0, index);
382         }
383 
384         return null;
385     }
386 
getEquivalentForRegionAndVariant(String range)387     private static String getEquivalentForRegionAndVariant(String range) {
388         int extensionKeyIndex = getExtentionKeyIndex(range);
389 
390         for (String subtag : LocaleEquivalentMaps.regionVariantEquivMap.keySet()) {
391             int index;
392             if ((index = range.indexOf(subtag)) != -1) {
393                 // Check if the matching text is a valid region or variant.
394                 if (extensionKeyIndex != Integer.MIN_VALUE
395                     && index > extensionKeyIndex) {
396                     continue;
397                 }
398 
399                 int len = index + subtag.length();
400                 if (range.length() == len || range.charAt(len) == '-') {
401                     return replaceFirstSubStringMatch(range, subtag,
402                             LocaleEquivalentMaps.regionVariantEquivMap
403                                     .get(subtag));
404                 }
405             }
406         }
407 
408         return null;
409     }
410 
getExtentionKeyIndex(String s)411     private static int getExtentionKeyIndex(String s) {
412         char[] c = s.toCharArray();
413         int index = Integer.MIN_VALUE;
414         for (int i = 1; i < c.length; i++) {
415             if (c[i] == '-') {
416                 if (i - index == 2) {
417                     return index;
418                 } else {
419                     index = i;
420                 }
421             }
422         }
423         return Integer.MIN_VALUE;
424     }
425 
mapEquivalents( List<LanguageRange>priorityList, Map<String, List<String>> map)426     public static List<LanguageRange> mapEquivalents(
427                                           List<LanguageRange>priorityList,
428                                           Map<String, List<String>> map) {
429         if (priorityList.isEmpty()) {
430             return new ArrayList<>(); // need to return a empty mutable List
431         }
432         if (map == null || map.isEmpty()) {
433             return new ArrayList<LanguageRange>(priorityList);
434         }
435 
436         // Create a map, key=originalKey.toLowerCaes(), value=originalKey
437         Map<String, String> keyMap = new HashMap<>();
438         for (String key : map.keySet()) {
439             keyMap.put(key.toLowerCase(), key);
440         }
441 
442         List<LanguageRange> list = new ArrayList<>();
443         for (LanguageRange lr : priorityList) {
444             String range = lr.getRange();
445             String r = range;
446             boolean hasEquivalent = false;
447 
448             while (r.length() > 0) {
449                 if (keyMap.containsKey(r)) {
450                     hasEquivalent = true;
451                     List<String> equivalents = map.get(keyMap.get(r));
452                     if (equivalents != null) {
453                         int len = r.length();
454                         for (String equivalent : equivalents) {
455                             list.add(new LanguageRange(equivalent.toLowerCase()
456                                      + range.substring(len),
457                                      lr.getWeight()));
458                         }
459                     }
460                     // Return immediately if the first matching subtag is found.
461                     break;
462                 }
463 
464                 // Truncate the last subtag simply.
465                 int index = r.lastIndexOf('-');
466                 if (index == -1) {
467                     break;
468                 }
469                 r = r.substring(0, index);
470             }
471 
472             if (!hasEquivalent) {
473                 list.add(lr);
474             }
475         }
476 
477         return list;
478     }
479 
LocaleMatcher()480     private LocaleMatcher() {}
481 
482 }
483