1 /*
2  ****************************************************************************************
3  * Copyright (C) 2009-2015, Google, Inc.; International Business Machines Corporation   *
4  * and others. All Rights Reserved.                                                     *
5  ****************************************************************************************
6  */
7 package com.ibm.icu.util;
8 
9 import java.util.HashMap;
10 import java.util.HashSet;
11 import java.util.Iterator;
12 import java.util.LinkedHashMap;
13 import java.util.LinkedHashSet;
14 import java.util.Map;
15 import java.util.Map.Entry;
16 import java.util.Objects;
17 import java.util.Set;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20 
21 import com.ibm.icu.impl.ICUData;
22 import com.ibm.icu.impl.ICUResourceBundle;
23 import com.ibm.icu.impl.Relation;
24 import com.ibm.icu.impl.Row;
25 import com.ibm.icu.impl.Row.R3;
26 
27 /**
28  * Provides a way to match the languages (locales) supported by a product to the
29  * languages (locales) acceptable to a user, and get the best match. For
30  * example:
31  *
32  * <pre>
33  * LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en");
34  *
35  * // afterwards:
36  * matcher.getBestMatch("en-US").toLanguageTag() => "en"
37  * </pre>
38  *
39  * It takes into account when languages are close to one another, such as fil
40  * and tl, and when language regional variants are close, like en-GB and en-AU.
41  * It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test
42  * file.
43  * <p>All classes implementing this interface should be immutable. Often a
44  * product will just need one static instance, built with the languages
45  * that it supports. However, it may want multiple instances with different
46  * default languages based on additional information, such as the domain.
47  *
48  * @author markdavis@google.com
49  * @stable ICU 4.4
50  */
51 public class LocaleMatcher {
52 
53     public static final boolean DEBUG = false;
54 
55     private static final ULocale UNKNOWN_LOCALE = new ULocale("und");
56 
57     /**
58      * Threshold for falling back to the default (first) language. May make this
59      * a parameter in the future.
60      */
61     private static final double DEFAULT_THRESHOLD = 0.5;
62 
63     /**
64      * The default language, in case the threshold is not met.
65      */
66     private final ULocale defaultLanguage;
67 
68     /**
69      * The default language, in case the threshold is not met.
70      */
71     private final double threshold;
72 
73     /**
74      * Create a new language matcher. The highest-weighted language is the
75      * default. That means that if no other language is matches closer than a given
76      * threshold, that default language is chosen. Typically the default is English,
77      * but it could be different based on additional information, such as the domain
78      * of the page.
79      *
80      * @param languagePriorityList weighted list
81      * @stable ICU 4.4
82      */
LocaleMatcher(LocalePriorityList languagePriorityList)83     public LocaleMatcher(LocalePriorityList languagePriorityList) {
84         this(languagePriorityList, defaultWritten);
85     }
86 
87     /**
88      * Create a new language matcher from a String form. The highest-weighted
89      * language is the default.
90      *
91      * @param languagePriorityListString String form of LanguagePriorityList
92      * @stable ICU 4.4
93      */
LocaleMatcher(String languagePriorityListString)94     public LocaleMatcher(String languagePriorityListString) {
95         this(LocalePriorityList.add(languagePriorityListString).build());
96     }
97 
98     /**
99      * Internal testing function; may expose API later.
100      * @param languagePriorityList LocalePriorityList to match
101      * @param matcherData Internal matching data
102      * @internal
103      * @deprecated This API is ICU internal only.
104      */
105     @Deprecated
LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData)106     public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData) {
107         this(languagePriorityList, matcherData, DEFAULT_THRESHOLD);
108     }
109 
110     /**
111      * Internal testing function; may expose API later.
112      * @param languagePriorityList LocalePriorityList to match
113      * @param matcherData Internal matching data
114      * @internal
115      * @deprecated This API is ICU internal only.
116      */
117     @Deprecated
LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold)118     public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) {
119         this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze();
120         for (final ULocale language : languagePriorityList) {
121             add(language, languagePriorityList.getWeight(language));
122         }
123         processMapping();
124         Iterator<ULocale> it = languagePriorityList.iterator();
125         defaultLanguage = it.hasNext() ? it.next() : null;
126         this.threshold = threshold;
127     }
128 
129     /**
130      * Returns a fraction between 0 and 1, where 1 means that the languages are a
131      * perfect match, and 0 means that they are completely different. Note that
132      * the precise values may change over time; no code should be made dependent
133      * on the values remaining constant.
134      * @param desired Desired locale
135      * @param desiredMax Maximized locale (using likely subtags)
136      * @param supported Supported locale
137      * @param supportedMax Maximized locale (using likely subtags)
138      * @return value between 0 and 1, inclusive.
139      * @stable ICU 4.4
140      */
match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax)141     public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
142         return matcherData.match(desired, desiredMax, supported, supportedMax);
143     }
144 
145     /**
146      * Canonicalize a locale (language). Note that for now, it is canonicalizing
147      * according to CLDR conventions (he vs iw, etc), since that is what is needed
148      * for likelySubtags.
149      * @param ulocale language/locale code
150      * @return ULocale with remapped subtags.
151      * @stable ICU 4.4
152      */
canonicalize(ULocale ulocale)153     public ULocale canonicalize(ULocale ulocale) {
154         // TODO Get the data from CLDR, use Java conventions.
155         String lang = ulocale.getLanguage();
156         String lang2 = canonicalMap.get(lang);
157         String script = ulocale.getScript();
158         String script2 = canonicalMap.get(script);
159         String region = ulocale.getCountry();
160         String region2 = canonicalMap.get(region);
161         if (lang2 != null || script2 != null || region2 != null) {
162             return new ULocale(
163                 lang2 == null ? lang : lang2,
164                     script2 == null ? script : script2,
165                         region2 == null ? region : region2);
166         }
167         return ulocale;
168     }
169 
170     /**
171      * Get the best match for a LanguagePriorityList
172      *
173      * @param languageList list to match
174      * @return best matching language code
175      * @stable ICU 4.4
176      */
getBestMatch(LocalePriorityList languageList)177     public ULocale getBestMatch(LocalePriorityList languageList) {
178         double bestWeight = 0;
179         ULocale bestTableMatch = null;
180         double penalty = 0;
181         OutputDouble matchWeight = new OutputDouble();
182         for (final ULocale language : languageList) {
183             final ULocale matchLocale = getBestMatchInternal(language, matchWeight);
184             final double weight = matchWeight.value * languageList.getWeight(language) - penalty;
185             if (weight > bestWeight) {
186                 bestWeight = weight;
187                 bestTableMatch = matchLocale;
188             }
189             penalty += 0.07000001;
190         }
191         if (bestWeight < threshold) {
192             bestTableMatch = defaultLanguage;
193         }
194         return bestTableMatch;
195     }
196 
197     /**
198      * Convenience method: Get the best match for a LanguagePriorityList
199      *
200      * @param languageList String form of language priority list
201      * @return best matching language code
202      * @stable ICU 4.4
203      */
getBestMatch(String languageList)204     public ULocale getBestMatch(String languageList) {
205         return getBestMatch(LocalePriorityList.add(languageList).build());
206     }
207 
208     /**
209      * Get the best match for an individual language code.
210      *
211      * @param ulocale locale/language code to match
212      * @return best matching language code
213      * @stable ICU 4.4
214      */
getBestMatch(ULocale ulocale)215     public ULocale getBestMatch(ULocale ulocale) {
216         return getBestMatchInternal(ulocale, null);
217     }
218 
219     /**
220      * @internal
221      * @deprecated This API is ICU internal only.
222      */
223     @Deprecated
getBestMatch(ULocale... ulocales)224     public ULocale getBestMatch(ULocale... ulocales) {
225         return getBestMatch(LocalePriorityList.add(ulocales).build());
226     }
227 
228     /**
229      * {@inheritDoc}
230      * @stable ICU 4.4
231      */
232     @Override
toString()233     public String toString() {
234         return "{" + defaultLanguage + ", "
235             + localeToMaxLocaleAndWeight + "}";
236     }
237 
238     // ================= Privates =====================
239 
240     /**
241      * Get the best match for an individual language code.
242      *
243      * @param languageCode
244      * @return best matching language code and weight (as per
245      *         {@link #match(ULocale, ULocale)})
246      */
getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight)247     private ULocale getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight) {
248         languageCode = canonicalize(languageCode);
249         final ULocale maximized = addLikelySubtags(languageCode);
250         if (DEBUG) {
251             System.out.println("\ngetBestMatchInternal: " + languageCode + ";\t" + maximized);
252         }
253         double bestWeight = 0;
254         ULocale bestTableMatch = null;
255         String baseLanguage = maximized.getLanguage();
256         Set<R3<ULocale, ULocale, Double>> searchTable = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(baseLanguage);
257         if (searchTable != null) { // we preprocessed the table so as to filter by lanugage
258             if (DEBUG) System.out.println("\tSearching: " + searchTable);
259             for (final R3<ULocale, ULocale, Double> tableKeyValue : searchTable) {
260                 ULocale tableKey = tableKeyValue.get0();
261                 ULocale maxLocale = tableKeyValue.get1();
262                 Double matchedWeight = tableKeyValue.get2();
263                 final double match = match(languageCode, maximized, tableKey, maxLocale);
264                 if (DEBUG) {
265                     System.out.println("\t" + tableKeyValue + ";\t" + match + "\n");
266                 }
267                 final double weight = match * matchedWeight;
268                 if (weight > bestWeight) {
269                     bestWeight = weight;
270                     bestTableMatch = tableKey;
271                     if (weight > 0.999d) { // bail on good enough match.
272                         break;
273                     }
274                 }
275             }
276         }
277         if (bestWeight < threshold) {
278             bestTableMatch = defaultLanguage;
279         }
280         if (outputWeight != null) {
281             outputWeight.value = bestWeight; // only return the weight when needed
282         }
283         return bestTableMatch;
284     }
285 
286     public static class OutputDouble { // TODO, move to where OutputInt is
287         double value;
288     }
289 
add(ULocale language, Double weight)290     private void add(ULocale language, Double weight) {
291         language = canonicalize(language);
292         R3<ULocale, ULocale, Double> row = Row.of(language, addLikelySubtags(language), weight);
293         row.freeze();
294         localeToMaxLocaleAndWeight.add(row);
295     }
296 
297     /**
298      * We preprocess the data to get just the possible matches for each desired base language.
299      */
processMapping()300     private void processMapping() {
301         for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) {
302             String desired = desiredToMatchingLanguages.getKey();
303             Set<String> supported = desiredToMatchingLanguages.getValue();
304             for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
305                 final ULocale key = localeToMaxAndWeight.get0();
306                 String lang = key.getLanguage();
307                 if (supported.contains(lang)) {
308                     addFiltered(desired, localeToMaxAndWeight);
309                 }
310             }
311         }
312         // now put in the values directly, since languages always map to themselves
313         for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) {
314             final ULocale key = localeToMaxAndWeight.get0();
315             String lang = key.getLanguage();
316             addFiltered(lang, localeToMaxAndWeight);
317         }
318     }
319 
addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight)320     private void addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight) {
321         Set<R3<ULocale, ULocale, Double>> map = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(desired);
322         if (map == null) {
323             desiredLanguageToPossibleLocalesToMaxLocaleToData.put(desired, map = new LinkedHashSet<R3<ULocale, ULocale, Double>>());
324         }
325         map.add(localeToMaxAndWeight);
326         if (DEBUG) {
327             System.out.println(desired + ", " + localeToMaxAndWeight);
328         }
329     }
330 
331     Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<Row.R3<ULocale, ULocale, Double>>();
332     Map<String, Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData = new LinkedHashMap<String, Set<Row.R3<ULocale, ULocale, Double>>>();
333 
334     // =============== Special Mapping Information ==============
335 
336     /**
337      * We need to add another method to addLikelySubtags that doesn't return
338      * null, but instead substitutes Zzzz and ZZ if unknown. There are also
339      * a few cases where addLikelySubtags needs to have expanded data, to handle
340      * all deprecated codes.
341      * @param languageCode
342      * @return "fixed" addLikelySubtags
343      */
addLikelySubtags(ULocale languageCode)344     private ULocale addLikelySubtags(ULocale languageCode) {
345         // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined
346         // language would normally match English.  But that would produce the counterintuitive results
347         // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and
348         // getBestMatch("en", LocaleMatcher("it,und")) would be "und".
349         //
350         // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults)
351         // so that max("und")="und". That produces the following, more desirable results:
352         if (languageCode.equals(UNKNOWN_LOCALE)) {
353             return UNKNOWN_LOCALE;
354         }
355         final ULocale result = ULocale.addLikelySubtags(languageCode);
356         // should have method on getLikelySubtags for this
357         if (result == null || result.equals(languageCode)) {
358             final String language = languageCode.getLanguage();
359             final String script = languageCode.getScript();
360             final String region = languageCode.getCountry();
361             return new ULocale((language.length() == 0 ? "und"
362                 : language)
363                 + "_"
364                 + (script.length() == 0 ? "Zzzz" : script)
365                 + "_"
366                 + (region.length() == 0 ? "ZZ" : region));
367         }
368         return result;
369     }
370 
371     private static class LocalePatternMatcher {
372         // a value of null means a wildcard; matches any.
373         private String lang;
374         private String script;
375         private String region;
376         private Level level;
377         static Pattern pattern = Pattern.compile(
378             "([a-z]{1,8}|\\*)"
379                 + "(?:[_-]([A-Z][a-z]{3}|\\*))?"
380                 + "(?:[_-]([A-Z]{2}|[0-9]{3}|\\*))?");
381 
LocalePatternMatcher(String toMatch)382         public LocalePatternMatcher(String toMatch) {
383             Matcher matcher = pattern.matcher(toMatch);
384             if (!matcher.matches()) {
385                 throw new IllegalArgumentException("Bad pattern: " + toMatch);
386             }
387             lang = matcher.group(1);
388             script = matcher.group(2);
389             region = matcher.group(3);
390             level = region != null ? Level.region : script != null ? Level.script : Level.language;
391 
392             if (lang.equals("*")) {
393                 lang = null;
394             }
395             if (script != null && script.equals("*")) {
396                 script = null;
397             }
398             if (region != null && region.equals("*")) {
399                 region = null;
400             }
401         }
402 
matches(ULocale ulocale)403         boolean matches(ULocale ulocale) {
404             if (lang != null && !lang.equals(ulocale.getLanguage())) {
405                 return false;
406             }
407             if (script != null && !script.equals(ulocale.getScript())) {
408                 return false;
409             }
410             if (region != null && !region.equals(ulocale.getCountry())) {
411                 return false;
412             }
413             return true;
414         }
415 
getLevel()416         public Level getLevel() {
417             return level;
418         }
419 
getLanguage()420         public String getLanguage() {
421             return (lang == null ? "*" : lang);
422         }
423 
getScript()424         public String getScript() {
425             return (script == null ? "*" : script);
426         }
427 
getRegion()428         public String getRegion() {
429             return (region == null ? "*" : region);
430         }
431 
toString()432         public String toString() {
433             String result = getLanguage();
434             if (level != Level.language) {
435                 result += "-" + getScript();
436                 if (level != Level.script) {
437                     result += "-" + getRegion();
438                 }
439             }
440             return result;
441         }
442 
443         /* (non-Javadoc)
444          * @see java.lang.Object#equals(java.lang.Object)
445          */
446         @Override
equals(Object obj)447         public boolean equals(Object obj) {
448             LocalePatternMatcher other = (LocalePatternMatcher) obj;
449             return Objects.equals(level, other.level)
450                 && Objects.equals(lang, other.lang)
451                 && Objects.equals(script, other.script)
452                 && Objects.equals(region, other.region);
453         }
454 
455         /* (non-Javadoc)
456          * @see java.lang.Object#hashCode()
457          */
458         @Override
hashCode()459         public int hashCode() {
460             return level.ordinal()
461                 ^ (lang == null ? 0 : lang.hashCode())
462                 ^ (script == null ? 0 : script.hashCode())
463                 ^ (region == null ? 0 : region.hashCode());
464         }
465     }
466 
467     enum Level {
468         language(0.99),
469         script(0.2),
470         region(0.04);
471 
472         final double worst;
473 
Level(double d)474         Level(double d) {
475             worst = d;
476         }
477     }
478 
479     private static class ScoreData implements Freezable<ScoreData> {
480         @SuppressWarnings("unused")
481         private static final double maxUnequal_changeD_sameS = 0.5;
482 
483         @SuppressWarnings("unused")
484         private static final double maxUnequal_changeEqual = 0.75;
485 
486         LinkedHashSet<Row.R3<LocalePatternMatcher, LocalePatternMatcher, Double>> scores = new LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>();
487         final Level level;
488 
ScoreData(Level level)489         public ScoreData(Level level) {
490             this.level = level;
491         }
492 
addDataToScores(String desired, String supported, R3<LocalePatternMatcher, LocalePatternMatcher, Double> data)493         void addDataToScores(String desired, String supported, R3<LocalePatternMatcher, LocalePatternMatcher, Double> data) {
494             //            Map<String, Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>>> lang_result = scores.get(desired);
495             //            if (lang_result == null) {
496             //                scores.put(desired, lang_result = new HashMap());
497             //            }
498             //            Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>> result = lang_result.get(supported);
499             //            if (result == null) {
500             //                lang_result.put(supported, result = new LinkedHashSet());
501             //            }
502             //            result.add(data);
503             boolean added = scores.add(data);
504             if (!added) {
505                 throw new ICUException("trying to add duplicate data: " + data);
506             }
507         }
508 
getScore(ULocale dMax, String desiredRaw, String desiredMax, ULocale sMax, String supportedRaw, String supportedMax)509         double getScore(ULocale dMax, String desiredRaw, String desiredMax,
510             ULocale sMax, String supportedRaw, String supportedMax) {
511             double distance = 0;
512             if (!desiredMax.equals(supportedMax)) {
513                 distance = getRawScore(dMax, sMax);
514             } else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal
515                 distance += 0.001;
516             }
517             return distance;
518         }
519 
getRawScore(ULocale desiredLocale, ULocale supportedLocale)520         private double getRawScore(ULocale desiredLocale, ULocale supportedLocale) {
521             if (DEBUG) {
522                 System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale);
523             }
524             for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> datum : scores) { // : result
525                 if (datum.get0().matches(desiredLocale)
526                     && datum.get1().matches(supportedLocale)) {
527                     if (DEBUG) {
528                         System.out.println("\t\t\t\tFOUND\t" + datum);
529                     }
530                     return datum.get2();
531                 }
532             }
533             if (DEBUG) {
534                 System.out.println("\t\t\t\tNOTFOUND\t" + level.worst);
535             }
536             return level.worst;
537         }
538 
toString()539         public String toString() {
540             StringBuilder result = new StringBuilder().append(level);
541             for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) {
542                 result.append("\n\t\t").append(score);
543             }
544             return result.toString();
545         }
546 
547         @SuppressWarnings("unchecked")
cloneAsThawed()548         public ScoreData cloneAsThawed() {
549             try {
550                 ScoreData result = (ScoreData) clone();
551                 result.scores = (LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>) result.scores.clone();
552                 result.frozen = false;
553                 return result;
554             } catch (CloneNotSupportedException e) {
555                 throw new ICUCloneNotSupportedException(e); // will never happen
556             }
557 
558         }
559 
560         private volatile boolean frozen = false;
561 
freeze()562         public ScoreData freeze() {
563             return this;
564         }
565 
isFrozen()566         public boolean isFrozen() {
567             return frozen;
568         }
569 
getMatchingLanguages()570         public Relation<String, String> getMatchingLanguages() {
571             Relation<String, String> desiredToSupported = Relation.of(new LinkedHashMap<String, Set<String>>(), HashSet.class);
572             for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> item : scores) {
573                 LocalePatternMatcher desired = item.get0();
574                 LocalePatternMatcher supported = item.get1();
575                 if (desired.lang != null && supported.lang != null) { // explicitly mentioned languages must have reasonable distance
576                     desiredToSupported.put(desired.lang, supported.lang);
577                 }
578             }
579             desiredToSupported.freeze();
580             return desiredToSupported;
581         }
582     }
583 
584     /**
585      * Only for testing and use by tools. Interface may change!!
586      * @internal
587      * @deprecated This API is ICU internal only.
588      */
589     @Deprecated
590     public static class LanguageMatcherData implements Freezable<LanguageMatcherData> {
591         private ScoreData languageScores = new ScoreData(Level.language);
592         private ScoreData scriptScores = new ScoreData(Level.script);
593         private ScoreData regionScores = new ScoreData(Level.region);
594         private Relation<String, String> matchingLanguages;
595         private volatile boolean frozen = false;
596 
597         /**
598          * @internal
599          * @deprecated This API is ICU internal only.
600          */
601         @Deprecated
LanguageMatcherData()602         public LanguageMatcherData() {
603         }
604 
605         /**
606          * @internal
607          * @deprecated This API is ICU internal only.
608          */
609         @Deprecated
matchingLanguages()610         public Relation<String, String> matchingLanguages() {
611             return matchingLanguages;
612         }
613 
614         /**
615          * @internal
616          * @deprecated This API is ICU internal only.
617          */
618         @Deprecated
toString()619         public String toString() {
620             return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores;
621         }
622 
623         /**
624          * @internal
625          * @deprecated This API is ICU internal only.
626          */
627         @Deprecated
match(ULocale a, ULocale aMax, ULocale b, ULocale bMax)628         public double match(ULocale a, ULocale aMax, ULocale b, ULocale bMax) {
629             double diff = 0;
630             diff += languageScores.getScore(aMax, a.getLanguage(), aMax.getLanguage(), bMax, b.getLanguage(), bMax.getLanguage());
631             if (diff > 0.999d) { // with no language match, we bail
632                 return 0.0d;
633             }
634             diff += scriptScores.getScore(aMax, a.getScript(), aMax.getScript(), bMax, b.getScript(), bMax.getScript());
635             diff += regionScores.getScore(aMax, a.getCountry(), aMax.getCountry(), bMax, b.getCountry(), bMax.getCountry());
636 
637             if (!a.getVariant().equals(b.getVariant())) {
638                 diff += 0.01;
639             }
640             if (diff < 0.0d) {
641                 diff = 0.0d;
642             } else if (diff > 1.0d) {
643                 diff = 1.0d;
644             }
645             if (DEBUG) {
646                 System.out.println("\t\t\tTotal Distance\t" + diff);
647             }
648             return 1.0 - diff;
649         }
650 
651         /**
652          * Add an exceptional distance between languages, typically because regional
653          * dialects were given their own language codes. At this point the code is
654          * symmetric. We don't bother producing an equivalence class because there are
655          * so few cases; this function depends on the other permutations being
656          * added specifically.
657          * @internal
658          * @deprecated This API is ICU internal only.
659          */
660         @SuppressWarnings("unused")
661         @Deprecated
addDistance(String desired, String supported, int percent)662         private LanguageMatcherData addDistance(String desired, String supported, int percent) {
663             return addDistance(desired, supported, percent, false, null);
664         }
665 
666         /**
667          * @internal
668          * @deprecated This API is ICU internal only.
669          */
670         @Deprecated
addDistance(String desired, String supported, int percent, String comment)671         public LanguageMatcherData addDistance(String desired, String supported, int percent, String comment) {
672             return addDistance(desired, supported, percent, false, comment);
673         }
674 
675         /**
676          * @internal
677          * @deprecated This API is ICU internal only.
678          */
679         @Deprecated
addDistance(String desired, String supported, int percent, boolean oneway)680         public LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway) {
681             return addDistance(desired, supported, percent, oneway, null);
682         }
683 
addDistance(String desired, String supported, int percent, boolean oneway, String comment)684         private LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway, String comment) {
685             if (DEBUG) {
686                 System.out.println("\t<languageMatch desired=\"" + desired + "\"" +
687                     " supported=\"" + supported + "\"" +
688                     " percent=\"" + percent + "\""
689                     + (oneway ? " oneway=\"true\"" : "")
690                     + "/>"
691                     + (comment == null ? "" : "\t<!-- " + comment + " -->"));
692                 //                    //     .addDistance("nn", "nb", 4, true)
693                 //                        System.out.println(".addDistance(\"" + desired + "\"" +
694                 //                                ", \"" + supported + "\"" +
695                 //                                ", " + percent + ""
696                 //                                + (oneway ? "" : ", true")
697                 //                                + (comment == null ? "" : ", \"" + comment + "\"")
698                 //                                + ")"
699                 //                        );
700 
701             }
702             double score = 1 - percent / 100.0; // convert from percentage
703             LocalePatternMatcher desiredMatcher = new LocalePatternMatcher(desired);
704             Level desiredLen = desiredMatcher.getLevel();
705             LocalePatternMatcher supportedMatcher = new LocalePatternMatcher(supported);
706             Level supportedLen = supportedMatcher.getLevel();
707             if (desiredLen != supportedLen) {
708                 throw new IllegalArgumentException("Lengths unequal: " + desired + ", " + supported);
709             }
710             R3<LocalePatternMatcher, LocalePatternMatcher, Double> data = Row.of(desiredMatcher, supportedMatcher, score);
711             R3<LocalePatternMatcher, LocalePatternMatcher, Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score);
712             boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher);
713             switch (desiredLen) {
714             case language:
715                 String dlanguage = desiredMatcher.getLanguage();
716                 String slanguage = supportedMatcher.getLanguage();
717                 languageScores.addDataToScores(dlanguage, slanguage, data);
718                 if (!oneway && !desiredEqualsSupported) {
719                     languageScores.addDataToScores(slanguage, dlanguage, data2);
720                 }
721                 break;
722             case script:
723                 String dscript = desiredMatcher.getScript();
724                 String sscript = supportedMatcher.getScript();
725                 scriptScores.addDataToScores(dscript, sscript, data);
726                 if (!oneway && !desiredEqualsSupported) {
727                     scriptScores.addDataToScores(sscript, dscript, data2);
728                 }
729                 break;
730             case region:
731                 String dregion = desiredMatcher.getRegion();
732                 String sregion = supportedMatcher.getRegion();
733                 regionScores.addDataToScores(dregion, sregion, data);
734                 if (!oneway && !desiredEqualsSupported) {
735                     regionScores.addDataToScores(sregion, dregion, data2);
736                 }
737                 break;
738             }
739             return this;
740         }
741 
742         /**
743          * {@inheritDoc}
744          * @internal
745          * @deprecated This API is ICU internal only.
746          */
747         @Deprecated
cloneAsThawed()748         public LanguageMatcherData cloneAsThawed() {
749             LanguageMatcherData result;
750             try {
751                 result = (LanguageMatcherData) clone();
752                 result.languageScores = languageScores.cloneAsThawed();
753                 result.scriptScores = scriptScores.cloneAsThawed();
754                 result.regionScores = regionScores.cloneAsThawed();
755                 result.frozen = false;
756                 return result;
757             } catch (CloneNotSupportedException e) {
758                 throw new ICUCloneNotSupportedException(e); // will never happen
759             }
760         }
761 
762         /**
763          * {@inheritDoc}
764          * @internal
765          * @deprecated This API is ICU internal only.
766          */
767         @Deprecated
freeze()768         public LanguageMatcherData freeze() {
769             languageScores.freeze();
770             regionScores.freeze();
771             scriptScores.freeze();
772             matchingLanguages = languageScores.getMatchingLanguages();
773             frozen = true;
774             return this;
775         }
776 
777         /**
778          * {@inheritDoc}
779          * @internal
780          * @deprecated This API is ICU internal only.
781          */
782         @Deprecated
isFrozen()783         public boolean isFrozen() {
784             return frozen;
785         }
786     }
787 
788     LanguageMatcherData matcherData;
789 
790     private static final LanguageMatcherData defaultWritten;
791 
792     private static HashMap<String, String> canonicalMap = new HashMap<String, String>();
793 
794     static {
795         canonicalMap.put("iw", "he");
796         canonicalMap.put("mo", "ro");
797         canonicalMap.put("tl", "fil");
798 
799         ICUResourceBundle suppData = getICUSupplementalData();
800         ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching");
801         ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written");
802         defaultWritten = new LanguageMatcherData();
803 
804         for (UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
805             ICUResourceBundle item = (ICUResourceBundle) iter.next();
806             /*
807             "*_*_*",
808             "*_*_*",
809             "96",
810              */
811             // <languageMatch desired="gsw" supported="de" percent="96" oneway="true" />
812             boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
813             defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway);
814         }
defaultWritten.freeze()815         defaultWritten.freeze();
816     }
817 
818     /**
819      * @internal
820      * @deprecated This API is ICU internal only.
821      */
822     @Deprecated
getICUSupplementalData()823     public static ICUResourceBundle getICUSupplementalData() {
824         ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance(
825             ICUData.ICU_BASE_NAME,
826             "supplementalData",
827             ICUResourceBundle.ICU_DATA_CLASS_LOADER);
828         return suppData;
829     }
830 
831     /**
832      * @internal
833      * @deprecated This API is ICU internal only.
834      */
835     @Deprecated
match(ULocale a, ULocale b)836     public static double match(ULocale a, ULocale b) {
837         final LocaleMatcher matcher = new LocaleMatcher("");
838         return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b));
839     }
840 }
841