1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 package com.ibm.icu.impl.locale;
4 
5 import java.util.Arrays;
6 import java.util.Collection;
7 import java.util.LinkedHashSet;
8 import java.util.Map;
9 import java.util.Map.Entry;
10 import java.util.Set;
11 
12 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
13 import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
14 import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
15 import com.ibm.icu.impl.locale.XCldrStub.Multimap;
16 import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
17 import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
18 import com.ibm.icu.util.LocalePriorityList;
19 import com.ibm.icu.util.Output;
20 import com.ibm.icu.util.ULocale;
21 
22 /**
23  * Immutable class that picks best match between user's desired locales and application's supported locales.
24  * @author markdavis
25  */
26 public class XLocaleMatcher {
27     private static final LSR UND = new LSR("und","","");
28     private static final ULocale UND_LOCALE = new ULocale("und");
29 
30     // Activates debugging output to stderr with details of GetBestMatch.
31     private static final boolean TRACE_MATCHER = false;
32 
33     // normally the default values, but can be set via constructor
34 
35     private final XLocaleDistance localeDistance;
36     private final int thresholdDistance;
37     private final int demotionPerAdditionalDesiredLocale;
38     private final DistanceOption distanceOption;
39 
40     // built based on application's supported languages in constructor
41 
42     private final Map<LSR, Set<ULocale>> supportedLanguages; // the locales in the collection are ordered!
43     private final Set<ULocale> exactSupportedLocales; // the locales in the collection are ordered!
44     private final ULocale defaultLanguage;
45 
46     public static class Builder {
47         private Set<ULocale> supportedLanguagesList;
48         private int thresholdDistance = -1;
49         private int demotionPerAdditionalDesiredLocale = -1;;
50         private ULocale defaultLanguage;
51         private XLocaleDistance localeDistance;
52         private DistanceOption distanceOption;
53         /**
54          * @param languagePriorityList the languagePriorityList to set
55          * @return this Builder object
56          */
setSupportedLocales(String languagePriorityList)57         public Builder setSupportedLocales(String languagePriorityList) {
58             this.supportedLanguagesList = asSet(LocalePriorityList.add(languagePriorityList).build());
59             return this;
60         }
setSupportedLocales(LocalePriorityList languagePriorityList)61         public Builder setSupportedLocales(LocalePriorityList languagePriorityList) {
62             this.supportedLanguagesList = asSet(languagePriorityList);
63             return this;
64         }
setSupportedLocales(Set<ULocale> languagePriorityList)65         public Builder setSupportedLocales(Set<ULocale> languagePriorityList) {
66             Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order
67             temp.addAll(languagePriorityList);
68             this.supportedLanguagesList = temp;
69             return this;
70         }
71 
72         /**
73          * @param thresholdDistance the thresholdDistance to set, with -1 = default
74          * @return this Builder object
75          */
setThresholdDistance(int thresholdDistance)76         public Builder setThresholdDistance(int thresholdDistance) {
77             this.thresholdDistance = thresholdDistance;
78             return this;
79         }
80         /**
81          * @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default
82          * @return this Builder object
83          */
setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale)84         public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) {
85             this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale;
86             return this;
87         }
88 
89         /**
90          * @param localeDistance the localeDistance to set, with default = XLocaleDistance.getDefault().
91          * @return this Builder object
92          */
setLocaleDistance(XLocaleDistance localeDistance)93         public Builder setLocaleDistance(XLocaleDistance localeDistance) {
94             this.localeDistance = localeDistance;
95             return this;
96         }
97 
98         /**
99          * Set the default language, with null = default = first supported language
100          * @param defaultLanguage the default language
101          * @return this Builder object
102          */
setDefaultLanguage(ULocale defaultLanguage)103         public Builder setDefaultLanguage(ULocale defaultLanguage) {
104             this.defaultLanguage = defaultLanguage;
105             return this;
106         }
107 
108         /**
109          * If true, then the language differences are smaller than than script differences.
110          * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
111          * @param distanceOption the distance option
112          * @return this Builder object
113          */
setDistanceOption(DistanceOption distanceOption)114         public Builder setDistanceOption(DistanceOption distanceOption) {
115             this.distanceOption = distanceOption;
116             return this;
117         }
118 
build()119         public XLocaleMatcher build() {
120             return new XLocaleMatcher(this);
121         }
122 
123         @Override
toString()124         public String toString() {
125           StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
126           if (!supportedLanguagesList.isEmpty()) {
127             s.append(" supported={").append(supportedLanguagesList.toString()).append("}");
128           }
129           if (defaultLanguage != null) {
130             s.append(" default=").append(defaultLanguage.toString());
131           }
132           if (thresholdDistance >= 0) {
133             s.append(String.format(" thresholdDistance=%d", thresholdDistance));
134           }
135           s.append(" preference=").append(distanceOption.name());
136           return s.append("}").toString();
137         }
138     }
139 
140     /**
141      * Returns a builder used in chaining parameters for building a Locale Matcher.
142      * @return this Builder object
143      */
builder()144     public static Builder builder() {
145         return new Builder();
146     }
147 
148     /** Convenience method */
XLocaleMatcher(String supportedLocales)149     public XLocaleMatcher(String supportedLocales) {
150         this(builder().setSupportedLocales(supportedLocales));
151     }
152     /** Convenience method */
XLocaleMatcher(LocalePriorityList supportedLocales)153     public XLocaleMatcher(LocalePriorityList supportedLocales) {
154         this(builder().setSupportedLocales(supportedLocales));
155     }
156     /** Convenience method */
XLocaleMatcher(Set<ULocale> supportedLocales)157     public XLocaleMatcher(Set<ULocale> supportedLocales) {
158         this(builder().setSupportedLocales(supportedLocales));
159     }
160 
161     /**
162      * Create a locale matcher with the given parameters.
163      * @param supportedLocales
164      * @param thresholdDistance
165      * @param demotionPerAdditionalDesiredLocale
166      * @param localeDistance
167      * @param likelySubtags
168      */
XLocaleMatcher(Builder builder)169     private XLocaleMatcher(Builder builder) {
170         localeDistance = builder.localeDistance == null ? XLocaleDistance.getDefault()
171             : builder.localeDistance;
172         thresholdDistance = builder.thresholdDistance < 0 ? localeDistance.getDefaultScriptDistance()
173             : builder.thresholdDistance;
174         // only do AFTER above are set
175         Set<LSR> paradigms = extractLsrSet(localeDistance.getParadigms());
176         final Multimap<LSR, ULocale> temp2 = extractLsrMap(builder.supportedLanguagesList, paradigms);
177         supportedLanguages = temp2.asMap();
178         exactSupportedLocales = ImmutableSet.copyOf(temp2.values());
179         defaultLanguage = builder.defaultLanguage != null ? builder.defaultLanguage
180             : supportedLanguages.isEmpty() ? null
181                 : supportedLanguages.entrySet().iterator().next().getValue().iterator().next(); // first language
182         demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ? localeDistance.getDefaultRegionDistance()+1
183             : builder.demotionPerAdditionalDesiredLocale;
184         distanceOption = builder.distanceOption;
185     }
186 
187     // Result is not immutable!
188     private Set<LSR> extractLsrSet(Set<ULocale> languagePriorityList) {
189         Set<LSR> result = new LinkedHashSet<LSR>();
190         for (ULocale item : languagePriorityList) {
191             final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
192             result.add(max);
193         }
194         return result;
195     }
196 
197     private Multimap<LSR,ULocale> extractLsrMap(Set<ULocale> languagePriorityList, Set<LSR> priorities) {
198         Multimap<LSR, ULocale> builder = LinkedHashMultimap.create();
199         for (ULocale item : languagePriorityList) {
200             final LSR max = item.equals(UND_LOCALE) ? UND :
201             LSR.fromMaximalized(item);
202             builder.put(max, item);
203         }
204         if (builder.size() > 1 && priorities != null) {
205             // for the supported list, we put any priorities before all others, except for the first.
206             Multimap<LSR, ULocale> builder2 = LinkedHashMultimap.create();
207 
208             // copy the long way so the priorities are in the same order as in the original
209             boolean first = true;
210             for (Entry<LSR, Set<ULocale>> entry : builder.asMap().entrySet()) {
211                 final LSR key = entry.getKey();
212                 if (first || priorities.contains(key)) {
213                     builder2.putAll(key, entry.getValue());
214                     first = false;
215                 }
216             }
217             // now copy the rest
218             builder2.putAll(builder);
219             if (!builder2.equals(builder)) {
220                 throw new IllegalArgumentException();
221             }
222             builder = builder2;
223         }
224         return ImmutableMultimap.copyOf(builder);
225     }
226 
227 
228     /** Convenience method */
229     public ULocale getBestMatch(ULocale ulocale) {
230         return getBestMatch(ulocale, null);
231     }
232     /** Convenience method */
233     public ULocale getBestMatch(String languageList) {
234         return getBestMatch(LocalePriorityList.add(languageList).build(), null);
235     }
236     /** Convenience method */
237     public ULocale getBestMatch(ULocale... locales) {
238         return getBestMatch(new LinkedHashSet<ULocale>(Arrays.asList(locales)), null);
239     }
240     /** Convenience method */
241     public ULocale getBestMatch(Set<ULocale> desiredLanguages) {
242         return getBestMatch(desiredLanguages, null);
243     }
244     /** Convenience method */
245     public ULocale getBestMatch(LocalePriorityList desiredLanguages) {
246         return getBestMatch(desiredLanguages, null);
247     }
248     /** Convenience method */
249     public ULocale getBestMatch(LocalePriorityList desiredLanguages, Output<ULocale> outputBestDesired) {
250         return getBestMatch(asSet(desiredLanguages), outputBestDesired);
251     }
252 
253     // TODO add LocalePriorityList method asSet() for ordered Set view backed by LocalePriorityList
254     private static Set<ULocale> asSet(LocalePriorityList languageList) {
255         Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order
256         for (ULocale locale : languageList) {
257             temp.add(locale);
258         };
259         return temp;
260     }
261 
262     /**
263      * Get the best match between the desired languages and supported languages
264      * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
265      * @param outputBestDesired The one of the desired languages that matched best.
266      * Set to null if the best match was not below the threshold distance.
267      * @return the best match.
268      */
269     public ULocale getBestMatch(Set<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
270         // fast path for singleton
271         if (desiredLanguages.size() == 1) {
272             return getBestMatch(desiredLanguages.iterator().next(), outputBestDesired);
273         }
274         // TODO produce optimized version for single desired ULocale
275         Multimap<LSR, ULocale> desiredLSRs = extractLsrMap(desiredLanguages,null);
276         int bestDistance = Integer.MAX_VALUE;
277         ULocale bestDesiredLocale = null;
278         Collection<ULocale> bestSupportedLocales = null;
279         int delta = 0;
280     mainLoop:
281         for (final Entry<LSR, Set<ULocale>> desiredLsrAndLocales : desiredLSRs.asMap().entrySet()) {
282           LSR desiredLSR = desiredLsrAndLocales.getKey();
283           for (ULocale desiredLocale : desiredLsrAndLocales.getValue()) {
284             // quick check for exact match
285             if (delta < bestDistance) {
286               if (exactSupportedLocales.contains(desiredLocale)) {
287                 if (outputBestDesired != null) {
288                   outputBestDesired.value = desiredLocale;
289                 }
290                 if (TRACE_MATCHER) {
291                     System.err.printf(
292                               "Returning %s, which is an exact match for a supported language\n",
293                               desiredLocale);
294                  }
295                 return desiredLocale;
296               }
297               // quick check for maximized locale
298               Collection<ULocale> found = supportedLanguages.get(desiredLSR);
299               if (found != null) {
300                 // if we find one in the set, return first (lowest). We already know the exact one isn't
301                 // there.
302                 if (outputBestDesired != null) {
303                   outputBestDesired.value = desiredLocale;
304                 }
305                 ULocale result = found.iterator().next();
306                 if (TRACE_MATCHER) {
307                   System.err.printf("Returning %s\n", result.toString());
308                 }
309                 return result;
310               }
311             }
312             for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
313               int distance =
314                   delta
315                       + localeDistance.distanceRaw(
316                           desiredLSR,
317                           supportedLsrAndLocale.getKey(),
318                           thresholdDistance,
319                           distanceOption);
320               if (distance < bestDistance) {
321                 bestDistance = distance;
322                 bestDesiredLocale = desiredLocale;
323                 bestSupportedLocales = supportedLsrAndLocale.getValue();
324                 if (distance == 0) {
325                   break mainLoop;
326                 }
327               }
328             }
329             delta += demotionPerAdditionalDesiredLocale;
330           }
331         }
332         if (bestDistance >= thresholdDistance) {
333             if (outputBestDesired != null) {
334                 outputBestDesired.value = null;
335             }
336             if (TRACE_MATCHER) {
337               System.err.printf("Returning default %s\n", defaultLanguage.toString());
338             }
339             return defaultLanguage;
340         }
341         if (outputBestDesired != null) {
342             outputBestDesired.value = bestDesiredLocale;
343         }
344         // pick exact match if there is one
345         if (bestSupportedLocales.contains(bestDesiredLocale)) {
346             if (TRACE_MATCHER) {
347               System.err.printf(
348                   "Returning %s which matches a supported language\n", bestDesiredLocale.toString());
349             }
350             return bestDesiredLocale;
351         }
352         // otherwise return first supported, combining variants and extensions from bestDesired
353         ULocale result = bestSupportedLocales.iterator().next();
354         if (TRACE_MATCHER) {
355           System.err.printf("Returning first supported language %s\n", result.toString());
356         }
357         return result;
358     }
359 
360     /**
361      * Get the best match between the desired languages and supported languages
362      * @param desiredLocale the supplied user's language.
363      * @param outputBestDesired The one of the desired languages that matched best.
364      * Set to null if the best match was not below the threshold distance.
365      * @return the best match.
366      */
367     public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) {
368         int bestDistance = Integer.MAX_VALUE;
369         ULocale bestDesiredLocale = null;
370         Collection<ULocale> bestSupportedLocales = null;
371 
372         // quick check for exact match, with hack for und
373         final LSR desiredLSR = desiredLocale.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(desiredLocale);
374 
375         if (exactSupportedLocales.contains(desiredLocale)) {
376             if (outputBestDesired != null) {
377                 outputBestDesired.value = desiredLocale;
378             }
379             if (TRACE_MATCHER) {
380               System.err.printf("Exact match with a supported locale.\n");
381             }
382             return desiredLocale;
383         }
384         // quick check for maximized locale
385         if (distanceOption == DistanceOption.REGION_FIRST) {
386             Collection<ULocale> found = supportedLanguages.get(desiredLSR);
387             if (found != null) {
388                 // if we find one in the set, return first (lowest). We already know the exact one isn't there.
389                 if (outputBestDesired != null) {
390                     outputBestDesired.value = desiredLocale;
391                 }
392                 ULocale result = found.iterator().next();
393                 if (TRACE_MATCHER) {
394                   System.err.printf("Matches a maximized supported locale: %s\n", result);
395                 }
396                 return result;
397             }
398         }
399         for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
400             int distance = localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
401                 thresholdDistance, distanceOption);
402             if (distance < bestDistance) {
403                 bestDistance = distance;
404                 bestDesiredLocale = desiredLocale;
405                 bestSupportedLocales = supportedLsrAndLocale.getValue();
406                 if (distance == 0) {
407                     break;
408                 }
409             }
410         }
411         if (bestDistance >= thresholdDistance) {
412             if (outputBestDesired != null) {
413                 outputBestDesired.value = null;
414             }
415             if (TRACE_MATCHER) {
416               System.err.printf(
417                   "Returning default %s because everything exceeded the threshold of %d.\n",
418                   defaultLanguage, thresholdDistance);
419             }
420             return defaultLanguage;
421         }
422         if (outputBestDesired != null) {
423             outputBestDesired.value = bestDesiredLocale;
424         }
425         // pick exact match if there is one
426         if (bestSupportedLocales.contains(bestDesiredLocale)) {
427             return bestDesiredLocale;
428         }
429         // otherwise return first supported, combining variants and extensions from bestDesired
430         ULocale result = bestSupportedLocales.iterator().next();
431         if (TRACE_MATCHER) {
432           System.err.printf("First in the list of supported locales: %s\n", result);
433         }
434         return result;
435     }
436 
437     /** Combine features of the desired locale into those of the supported, and return result. */
438     public static ULocale combine(ULocale bestSupported, ULocale bestDesired) {
439         // for examples of extensions, variants, see
440         //  http://unicode.org/repos/cldr/tags/latest/common/bcp47/
441         //  http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
442 
443         if (!bestSupported.equals(bestDesired) && bestDesired != null) {
444             // add region, variants, extensions
445             ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported);
446 
447             // copy the region from the desired, if there is one
448             String region = bestDesired.getCountry();
449             if (!region.isEmpty()) {
450                 b.setRegion(region);
451             }
452 
453             // copy the variants from desired, if there is one
454             // note that this will override any subvariants. Eg "sco-ulster-fonipa" + "…-fonupa" => "sco-fonupa" (nuking ulster)
455             String variants = bestDesired.getVariant();
456             if (!variants.isEmpty()) {
457                 b.setVariant(variants);
458             }
459 
460             // copy the extensions from desired, if there are any
461             // note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "…-u-nu-native" => "th-u-nu-native" (nuking calendar)
462             for (char extensionKey : bestDesired.getExtensionKeys()) {
463                 b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
464             }
465             bestSupported = b.build();
466         }
467         return bestSupported;
468     }
469 
470     /** Returns the distance between the two languages. The values are not necessarily symmetric.
471      * @param desired A locale desired by the user
472      * @param supported A locale supported by a program.
473      * @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance).
474      * A language is first maximized with add likely subtags, then compared.
475      */
476     public int distance(ULocale desired, ULocale supported) {
477         return localeDistance.distanceRaw(
478             LSR.fromMaximalized(desired),
479             LSR.fromMaximalized(supported), thresholdDistance, distanceOption);
480     }
481 
482     /** Convenience method */
483     public int distance(String desiredLanguage, String supportedLanguage) {
484         return localeDistance.distanceRaw(
485             LSR.fromMaximalized(new ULocale(desiredLanguage)),
486             LSR.fromMaximalized(new ULocale(supportedLanguage)),
487             thresholdDistance, distanceOption);
488     }
489 
490     @Override
491     public String toString() {
492         return exactSupportedLocales.toString();
493     }
494 
495     /** Return the inverse of the distance: that is, 1-distance(desired, supported) */
496     public double match(ULocale desired, ULocale supported) {
497         return (100-distance(desired, supported))/100.0;
498     }
499 
500     /**
501      * Returns a fraction between 0 and 1, where 1 means that the languages are a
502      * perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
503      * <br>Note that
504      * the precise values may change over time; no code should be made dependent
505      * on the values remaining constant.
506      * @param desired Desired locale
507      * @param desiredMax Maximized locale (using likely subtags)
508      * @param supported Supported locale
509      * @param supportedMax Maximized locale (using likely subtags)
510      * @return value between 0 and 1, inclusive.
511      * @deprecated Use the form with 2 parameters instead.
512      */
513     @Deprecated
514     public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
515         return match(desired, supported);
516     }
517 
518     /**
519      * Canonicalize a locale (language). Note that for now, it is canonicalizing
520      * according to CLDR conventions (he vs iw, etc), since that is what is needed
521      * for likelySubtags.
522      * @param ulocale language/locale code
523      * @return ULocale with remapped subtags.
524      * @stable ICU 4.4
525      */
526     public ULocale canonicalize(ULocale ulocale) {
527         // TODO
528         return null;
529     }
530 
531     /**
532      * @return the thresholdDistance. Any distance above this value is treated as a match failure.
533      */
534     public int getThresholdDistance() {
535         return thresholdDistance;
536     }
537 }
538