1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 package com.ibm.icu.impl.locale;
4 
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collection;
8 import java.util.Collections;
9 import java.util.Enumeration;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.LinkedHashMap;
13 import java.util.LinkedHashSet;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Map.Entry;
17 import java.util.Objects;
18 import java.util.Set;
19 import java.util.TreeMap;
20 import java.util.TreeSet;
21 
22 import com.ibm.icu.impl.ICUResourceBundle;
23 import com.ibm.icu.impl.Row;
24 import com.ibm.icu.impl.Row.R4;
25 import com.ibm.icu.impl.locale.XCldrStub.CollectionUtilities;
26 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap;
27 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
28 import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
29 import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
30 import com.ibm.icu.impl.locale.XCldrStub.Multimap;
31 import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
32 import com.ibm.icu.impl.locale.XCldrStub.Predicate;
33 import com.ibm.icu.impl.locale.XCldrStub.Splitter;
34 import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
35 import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
36 import com.ibm.icu.impl.locale.XLocaleDistance.RegionMapper.Builder;
37 import com.ibm.icu.text.LocaleDisplayNames;
38 import com.ibm.icu.util.LocaleMatcher;
39 import com.ibm.icu.util.Output;
40 import com.ibm.icu.util.ULocale;
41 import com.ibm.icu.util.UResourceBundleIterator;
42 
43 public class XLocaleDistance {
44 
45     static final boolean PRINT_OVERRIDES = false;
46 
47     public static final int ABOVE_THRESHOLD = 100;
48 
49     // Activates debugging output to stderr with details of GetBestMatch.
50     // Be sure to set this to false before checking this in for production!
51     private static final boolean TRACE_DISTANCE = false;
52 
53     @Deprecated
54     public static final String ANY = "�"; // matches any character. Uses value above any subtag.
55 
fixAny(String string)56     private static String fixAny(String string) {
57         return "*".equals(string) ? ANY : string;
58     }
59 
60     static final LocaleDisplayNames english = LocaleDisplayNames.getInstance(ULocale.ENGLISH);
61 
xGetLanguageMatcherData()62     private static List<R4<String, String, Integer, Boolean>> xGetLanguageMatcherData() {
63         List<R4<String, String, Integer, Boolean>> distanceList = new ArrayList<>();
64 
65         ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
66         ICUResourceBundle languageMatchingNew = suppData.findTopLevel("languageMatchingNew");
67         ICUResourceBundle written = (ICUResourceBundle) languageMatchingNew.get("written");
68 
69         for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
70             ICUResourceBundle item = (ICUResourceBundle) iter.next();
71             boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
72             distanceList.add(
73                     (R4<String, String, Integer, Boolean>)            // note: .freeze returning wrong type, so casting.
74                     Row.of(
75                             item.getString(0),
76                             item.getString(1),
77                             Integer.parseInt(item.getString(2)),
78                             oneway)
79                     .freeze());
80         }
81         return Collections.unmodifiableList(distanceList);
82     }
83 
84     @SuppressWarnings("unused")
xGetParadigmLocales()85     private static Set<String> xGetParadigmLocales() {
86         ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
87         ICUResourceBundle languageMatchingInfo = suppData.findTopLevel("languageMatchingInfo");
88         ICUResourceBundle writtenParadigmLocales = (ICUResourceBundle) languageMatchingInfo.get("written")
89                 .get("paradigmLocales");
90         //      paradigmLocales{ "en", "en-GB",... }
91         HashSet<String> paradigmLocales = new HashSet<>(Arrays.asList(writtenParadigmLocales.getStringArray()));
92         return Collections.unmodifiableSet(paradigmLocales);
93     }
94 
95     @SuppressWarnings("unused")
xGetMatchVariables()96     private static Map<String, String> xGetMatchVariables() {
97         ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
98         ICUResourceBundle languageMatchingInfo = suppData.findTopLevel("languageMatchingInfo");
99         ICUResourceBundle writtenMatchVariables = (ICUResourceBundle) languageMatchingInfo.get("written")
100                 .get("matchVariable");
101         //        matchVariable{ americas{"019"} cnsar{"HK+MO"} ...}
102 
103         HashMap<String,String> matchVariables = new HashMap<>();
104         for (Enumeration<String> enumer = writtenMatchVariables.getKeys(); enumer.hasMoreElements(); ) {
105             String key = enumer.nextElement();
106             matchVariables.put(key, writtenMatchVariables.getString(key));
107         }
108         return Collections.unmodifiableMap(matchVariables);
109     }
110 
xGetContainment()111     private static Multimap<String, String> xGetContainment() {
112         TreeMultimap<String,String> containment = TreeMultimap.create();
113         containment
114         .putAll("001", "019", "002", "150", "142", "009")
115         .putAll("011", "BF", "BJ", "CI", "CV", "GH", "GM", "GN", "GW", "LR", "ML", "MR", "NE", "NG", "SH", "SL", "SN", "TG")
116         .putAll("013", "BZ", "CR", "GT", "HN", "MX", "NI", "PA", "SV")
117         .putAll("014", "BI", "DJ", "ER", "ET", "KE", "KM", "MG", "MU", "MW", "MZ", "RE", "RW", "SC", "SO", "SS", "TZ", "UG", "YT", "ZM", "ZW")
118         .putAll("142", "145", "143", "030", "034", "035")
119         .putAll("143", "TM", "TJ", "KG", "KZ", "UZ")
120         .putAll("145", "AE", "AM", "AZ", "BH", "CY", "GE", "IL", "IQ", "JO", "KW", "LB", "OM", "PS", "QA", "SA", "SY", "TR", "YE", "NT", "YD")
121         .putAll("015", "DZ", "EG", "EH", "LY", "MA", "SD", "TN", "EA", "IC")
122         .putAll("150", "154", "155", "151", "039")
123         .putAll("151", "BG", "BY", "CZ", "HU", "MD", "PL", "RO", "RU", "SK", "UA", "SU")
124         .putAll("154", "GG", "IM", "JE", "AX", "DK", "EE", "FI", "FO", "GB", "IE", "IS", "LT", "LV", "NO", "SE", "SJ")
125         .putAll("155", "AT", "BE", "CH", "DE", "FR", "LI", "LU", "MC", "NL", "DD", "FX")
126         .putAll("017", "AO", "CD", "CF", "CG", "CM", "GA", "GQ", "ST", "TD", "ZR")
127         .putAll("018", "BW", "LS", "NA", "SZ", "ZA")
128         .putAll("019", "021", "013", "029", "005", "003", "419")
129         .putAll("002", "015", "011", "017", "014", "018")
130         .putAll("021", "BM", "CA", "GL", "PM", "US")
131         .putAll("029", "AG", "AI", "AW", "BB", "BL", "BQ", "BS", "CU", "CW", "DM", "DO", "GD", "GP", "HT", "JM", "KN", "KY", "LC", "MF", "MQ", "MS", "PR", "SX", "TC", "TT", "VC", "VG", "VI", "AN")
132         .putAll("003", "021", "013", "029")
133         .putAll("030", "CN", "HK", "JP", "KP", "KR", "MN", "MO", "TW")
134         .putAll("035", "BN", "ID", "KH", "LA", "MM", "MY", "PH", "SG", "TH", "TL", "VN", "BU", "TP")
135         .putAll("039", "AD", "AL", "BA", "ES", "GI", "GR", "HR", "IT", "ME", "MK", "MT", "RS", "PT", "SI", "SM", "VA", "XK", "CS", "YU")
136         .putAll("419", "013", "029", "005")
137         .putAll("005", "AR", "BO", "BR", "CL", "CO", "EC", "FK", "GF", "GY", "PE", "PY", "SR", "UY", "VE")
138         .putAll("053", "AU", "NF", "NZ")
139         .putAll("054", "FJ", "NC", "PG", "SB", "VU")
140         .putAll("057", "FM", "GU", "KI", "MH", "MP", "NR", "PW")
141         .putAll("061", "AS", "CK", "NU", "PF", "PN", "TK", "TO", "TV", "WF", "WS")
142         .putAll("034", "AF", "BD", "BT", "IN", "IR", "LK", "MV", "NP", "PK")
143         .putAll("009", "053", "054", "057", "061", "QO")
144         .putAll("QO", "AQ", "BV", "CC", "CX", "GS", "HM", "IO", "TF", "UM", "AC", "CP", "DG", "TA")
145         ;
146         //Can't use following, because data from CLDR is discarded
147         //        ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData();
148         //        UResourceBundle territoryContainment = suppData.get("territoryContainment");
149         //        for (int i = 0 ; i < territoryContainment.getSize(); i++) {
150         //            UResourceBundle mapping = territoryContainment.get(i);
151         //            String parent = mapping.getKey();
152         //            for (int j = 0 ; j < mapping.getSize(); j++) {
153         //                String child = mapping.getString(j);
154         //                containment.put(parent,child);
155         //                System.out.println(parent + " => " + child);
156         //            }
157         //        }
158         TreeMultimap<String,String> containmentResolved = TreeMultimap.create();
159         fill("001", containment, containmentResolved);
160         return ImmutableMultimap.copyOf(containmentResolved);
161     }
162 
fill(String region, TreeMultimap<String, String> containment, Multimap<String, String> toAddTo)163     private static Set<String> fill(String region, TreeMultimap<String, String> containment, Multimap<String, String> toAddTo) {
164         Set<String> contained = containment.get(region);
165         if (contained == null) {
166             return Collections.emptySet();
167         }
168         toAddTo.putAll(region, contained); // do top level
169         // then recursively
170         for (String subregion : contained) {
171             toAddTo.putAll(region, fill(subregion, containment, toAddTo));
172         }
173         return toAddTo.get(region);
174     }
175 
176 
177     static final Multimap<String,String> CONTAINER_TO_CONTAINED;
178     static final Multimap<String,String> CONTAINER_TO_CONTAINED_FINAL;
179     static {
180         //         Multimap<String, String> containerToContainedTemp = xGetContainment();
181         //         fill(Region.getInstance("001"), containerToContainedTemp);
182 
183         CONTAINER_TO_CONTAINED = xGetContainment();
184         Multimap<String, String> containerToFinalContainedBuilder = TreeMultimap.create();
185         for (Entry<String, Set<String>> entry : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
186             String container = entry.getKey();
187             for (String contained : entry.getValue()) {
188                 if (CONTAINER_TO_CONTAINED.get(contained) == null) {
containerToFinalContainedBuilder.put(container, contained)189                     containerToFinalContainedBuilder.put(container, contained);
190                 }
191             }
192         }
193         CONTAINER_TO_CONTAINED_FINAL = ImmutableMultimap.copyOf(containerToFinalContainedBuilder);
194     }
195 
196     final static private Set<String> ALL_FINAL_REGIONS = ImmutableSet.copyOf(CONTAINER_TO_CONTAINED_FINAL.get("001"));
197 
198     // end of data from CLDR
199 
200     private final DistanceTable languageDesired2Supported;
201     private final RegionMapper regionMapper;
202     private final int defaultLanguageDistance;
203     private final int defaultScriptDistance;
204     private final int defaultRegionDistance;
205 
206     @Deprecated
207     public static abstract class DistanceTable {
getDistance(String desiredLang, String supportedlang, Output<DistanceTable> table, boolean starEquals)208         abstract int getDistance(String desiredLang, String supportedlang, Output<DistanceTable> table, boolean starEquals);
getCloser(int threshold)209         abstract Set<String> getCloser(int threshold);
toString(boolean abbreviate)210         abstract String toString(boolean abbreviate);
compact()211         public DistanceTable compact() {
212             return this;
213         }
214         //        public Integer getInternalDistance(String a, String b) {
215         //            return null;
216         //        }
getInternalNode(String any, String any2)217         public DistanceNode getInternalNode(String any, String any2) {
218             return null;
219         }
getInternalMatches()220         public Map<String, Set<String>> getInternalMatches() {
221             return null;
222         }
isEmpty()223         public boolean isEmpty() {
224             return true;
225         }
226     }
227 
228     @Deprecated
229     public static class DistanceNode {
230         final int distance;
231 
DistanceNode(int distance)232         public DistanceNode(int distance) {
233             this.distance = distance;
234         }
235 
getDistanceTable()236         public DistanceTable getDistanceTable() {
237             return null;
238         }
239 
240         @Override
equals(Object obj)241         public boolean equals(Object obj) {
242             return this == obj ||
243                     (obj != null
244                     && obj.getClass() == this.getClass()
245                     && distance == ((DistanceNode) obj).distance);
246         }
247         @Override
hashCode()248         public int hashCode() {
249             return distance;
250         }
251         @Override
toString()252         public String toString() {
253             return "\ndistance: " + distance;
254         }
255     }
256 
257     private interface IdMapper<K,V> {
toId(K source)258         public V toId(K source);
259     }
260 
261     static class IdMakerFull<T> implements IdMapper<T,Integer> {
262         private final Map<T, Integer> objectToInt = new HashMap<>();
263         private final List<T> intToObject = new ArrayList<>();
264         final String name; // for debugging
265 
IdMakerFull(String name)266         IdMakerFull(String name) {
267             this.name = name;
268         }
269 
IdMakerFull()270         IdMakerFull() {
271             this("unnamed");
272         }
273 
IdMakerFull(String name, T zeroValue)274         IdMakerFull(String name, T zeroValue) {
275             this(name);
276             add(zeroValue);
277         }
278 
279         /**
280          * Return an id, making one if there wasn't one already.
281          */
add(T source)282         public Integer add(T source) {
283             Integer result = objectToInt.get(source);
284             if (result == null) {
285                 Integer newResult = intToObject.size();
286                 objectToInt.put(source, newResult);
287                 intToObject.add(source);
288                 return newResult;
289             } else {
290                 return result;
291             }
292         }
293 
294         /**
295          * Return an id, or null if there is none.
296          */
297         @Override
toId(T source)298         public Integer toId(T source) {
299             return objectToInt.get(source);
300             //            return value == null ? 0 : value;
301         }
302 
303         /**
304          * Return the object for the id, or null if there is none.
305          */
fromId(int id)306         public T fromId(int id) {
307             return intToObject.get(id);
308         }
309 
310         /**
311          * Return interned object
312          */
intern(T source)313         public T intern(T source) {
314             return fromId(add(source));
315         }
316 
size()317         public int size() {
318             return intToObject.size();
319         }
320         /**
321          * Same as add, except if the object didn't have an id, return null;
322          */
getOldAndAdd(T source)323         public Integer getOldAndAdd(T source) {
324             Integer result = objectToInt.get(source);
325             if (result == null) {
326                 Integer newResult = intToObject.size();
327                 objectToInt.put(source, newResult);
328                 intToObject.add(source);
329             }
330             return result;
331         }
332 
333         @Override
toString()334         public String toString() {
335             return size() + ": " + intToObject;
336         }
337         @Override
equals(Object obj)338         public boolean equals(Object obj) {
339             return this == obj ||
340                     (obj != null
341                     && obj.getClass() == this.getClass()
342                     && intToObject.equals(((IdMakerFull<?>) obj).intToObject));
343         }
344         @Override
hashCode()345         public int hashCode() {
346             return intToObject.hashCode();
347         }
348     }
349 
350     static class StringDistanceNode extends DistanceNode {
351         final DistanceTable distanceTable;
352 
StringDistanceNode(int distance, DistanceTable distanceTable)353         public StringDistanceNode(int distance, DistanceTable distanceTable) {
354             super(distance);
355             this.distanceTable = distanceTable;
356         }
357 
358         @Override
equals(Object obj)359         public boolean equals(Object obj) {
360             StringDistanceNode other;
361             return this == obj ||
362                     (obj != null
363                     && obj.getClass() == this.getClass()
364                     && distance == (other = (StringDistanceNode) obj).distance
365                     && Objects.equals(distanceTable, other.distanceTable)
366                     && super.equals(other));
367         }
368         @Override
hashCode()369         public int hashCode() {
370             return distance ^ Objects.hashCode(distanceTable);
371         }
372 
StringDistanceNode(int distance)373         StringDistanceNode(int distance) {
374             this(distance, new StringDistanceTable());
375         }
376 
addSubtables(String desiredSub, String supportedSub, CopyIfEmpty r)377         public void addSubtables(String desiredSub, String supportedSub, CopyIfEmpty r) {
378             ((StringDistanceTable) distanceTable).addSubtables(desiredSub, supportedSub, r);
379         }
380         @Override
toString()381         public String toString() {
382             return "distance: " + distance + "\n" + distanceTable;
383         }
384 
copyTables(StringDistanceTable value)385         public void copyTables(StringDistanceTable value) {
386             if (value != null) {
387                 ((StringDistanceTable)distanceTable).copy(value);
388             }
389         }
390 
391         @Override
getDistanceTable()392         public DistanceTable getDistanceTable() {
393             return distanceTable;
394         }
395     }
396 
XLocaleDistance(DistanceTable datadistancetable2, RegionMapper regionMapper)397     public XLocaleDistance(DistanceTable datadistancetable2, RegionMapper regionMapper) {
398         languageDesired2Supported = datadistancetable2;
399         this.regionMapper = regionMapper;
400 
401         StringDistanceNode languageNode = (StringDistanceNode) ((StringDistanceTable) languageDesired2Supported).subtables.get(ANY).get(ANY);
402         defaultLanguageDistance = languageNode.distance;
403         StringDistanceNode scriptNode = (StringDistanceNode) ((StringDistanceTable)languageNode.distanceTable).subtables.get(ANY).get(ANY);
404         defaultScriptDistance = scriptNode.distance;
405         DistanceNode regionNode = ((StringDistanceTable)scriptNode.distanceTable).subtables.get(ANY).get(ANY);
406         defaultRegionDistance = regionNode.distance;
407     }
408 
409     @SuppressWarnings("rawtypes")
newMap()410     private static Map newMap() { // for debugging
411         return new TreeMap();
412     }
413 
414     /**
415      * Internal class
416      */
417     @Deprecated
418     public static class StringDistanceTable extends DistanceTable {
419         final Map<String, Map<String, DistanceNode>> subtables;
420 
StringDistanceTable(Map<String, Map<String, DistanceNode>> tables)421         StringDistanceTable(Map<String, Map<String, DistanceNode>> tables) {
422             subtables = tables;
423         }
424         @SuppressWarnings("unchecked")
StringDistanceTable()425         StringDistanceTable() {
426             this(newMap());
427         }
428 
429         @Override
isEmpty()430         public boolean isEmpty() {
431             return subtables.isEmpty();
432         }
433 
434         @Override
equals(Object obj)435         public boolean equals(Object obj) {
436             return this == obj ||
437                     (obj != null
438                     && obj.getClass() == this.getClass()
439                     && subtables.equals(((StringDistanceTable) obj).subtables));
440         }
441         @Override
hashCode()442         public int hashCode() {
443             return subtables.hashCode();
444         }
445 
446         @Override
getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals)447         public int getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals) {
448             if (TRACE_DISTANCE) {
449                 System.err.printf("    Entering       getDistance: desired=%s supported=%s starEquals=%s\n",
450                     desired, supported, Boolean.toString(starEquals));
451             }
452             boolean star = false;
453             Map<String, DistanceNode> sub2 = subtables.get(desired);
454             if (sub2 == null) {
455                 sub2 = subtables.get(ANY); // <*, supported>
456                 star = true;
457             }
458             DistanceNode value = sub2.get(supported);   // <*/desired, supported>
459             if (value == null) {
460                 value = sub2.get(ANY);  // <*/desired, *>
461                 if (value == null && !star) {
462                     sub2 = subtables.get(ANY);   // <*, supported>
463                     value = sub2.get(supported);
464                     if (value == null) {
465                         value = sub2.get(ANY);   // <*, *>
466                     }
467                 }
468                 star = true;
469             }
470             if (distanceTable != null) {
471                 distanceTable.value = ((StringDistanceNode) value).distanceTable;
472             }
473             int result = starEquals && star && desired.equals(supported) ? 0 : value.distance;
474             if (TRACE_DISTANCE) {
475                 System.err.printf("    Returning from getDistance: %d\n", result);
476             }
477             return result;
478         }
479 
copy(StringDistanceTable other)480         public void copy(StringDistanceTable other) {
481             for (Entry<String, Map<String, DistanceNode>> e1 : other.subtables.entrySet()) {
482                 for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
483                     DistanceNode value = e2.getValue();
484                     @SuppressWarnings("unused")
485                     DistanceNode subNode = addSubtable(e1.getKey(), e2.getKey(), value.distance);
486                 }
487             }
488         }
489 
490         @SuppressWarnings("unchecked")
addSubtable(String desired, String supported, int distance)491         DistanceNode addSubtable(String desired, String supported, int distance) {
492             Map<String, DistanceNode> sub2 = subtables.get(desired);
493             if (sub2 == null) {
494                 subtables.put(desired, sub2 = newMap());
495             }
496             DistanceNode oldNode = sub2.get(supported);
497             if (oldNode != null) {
498                 return oldNode;
499             }
500 
501             final StringDistanceNode newNode = new StringDistanceNode(distance);
502             sub2.put(supported, newNode);
503             return newNode;
504         }
505 
506         /**
507          * Return null if value doesn't exist
508          */
getNode(String desired, String supported)509         private DistanceNode getNode(String desired, String supported) {
510             Map<String, DistanceNode> sub2 = subtables.get(desired);
511             if (sub2 == null) {
512                 return null;
513             }
514             return sub2.get(supported);
515         }
516 
517 
518         /** add table for each subitem that matches and doesn't have a table already
519          */
addSubtables( String desired, String supported, Predicate<DistanceNode> action)520         public void addSubtables(
521                 String desired, String supported,
522                 Predicate<DistanceNode> action) {
523             DistanceNode node = getNode(desired, supported);
524             if (node == null) {
525                 // get the distance it would have
526                 Output<DistanceTable> node2 = new Output<>();
527                 int distance = getDistance(desired, supported, node2, true);
528                 // now add it
529                 node = addSubtable(desired, supported, distance);
530                 if (node2.value != null) {
531                     ((StringDistanceNode)node).copyTables((StringDistanceTable)(node2.value));
532                 }
533             }
534             action.test(node);
535         }
536 
addSubtables(String desiredLang, String supportedLang, String desiredScript, String supportedScript, int percentage)537         public void addSubtables(String desiredLang, String supportedLang,
538                 String desiredScript, String supportedScript,
539                 int percentage) {
540 
541             // add to all the values that have the matching desiredLang and supportedLang
542             @SuppressWarnings("unused")
543             boolean haveKeys = false;
544             for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
545                 String key1 = e1.getKey();
546                 final boolean desiredIsKey = desiredLang.equals(key1);
547                 if (desiredIsKey || desiredLang.equals(ANY)) {
548                     for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
549                         String key2 = e2.getKey();
550                         final boolean supportedIsKey = supportedLang.equals(key2);
551                         haveKeys |= (desiredIsKey && supportedIsKey);
552                         if (supportedIsKey || supportedLang.equals(ANY)) {
553                             DistanceNode value = e2.getValue();
554                             ((StringDistanceTable)value.getDistanceTable()).addSubtable(desiredScript, supportedScript, percentage);
555                         }
556                     }
557                 }
558             }
559             // now add the sequence explicitly
560             StringDistanceTable dt = new StringDistanceTable();
561             dt.addSubtable(desiredScript, supportedScript, percentage);
562             CopyIfEmpty r = new CopyIfEmpty(dt);
563             addSubtables(desiredLang, supportedLang, r);
564         }
565 
addSubtables(String desiredLang, String supportedLang, String desiredScript, String supportedScript, String desiredRegion, String supportedRegion, int percentage)566         public void addSubtables(String desiredLang, String supportedLang,
567                 String desiredScript, String supportedScript,
568                 String desiredRegion, String supportedRegion,
569                 int percentage) {
570 
571             // add to all the values that have the matching desiredLang and supportedLang
572             @SuppressWarnings("unused")
573             boolean haveKeys = false;
574             for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
575                 String key1 = e1.getKey();
576                 final boolean desiredIsKey = desiredLang.equals(key1);
577                 if (desiredIsKey || desiredLang.equals(ANY)) {
578                     for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
579                         String key2 = e2.getKey();
580                         final boolean supportedIsKey = supportedLang.equals(key2);
581                         haveKeys |= (desiredIsKey && supportedIsKey);
582                         if (supportedIsKey || supportedLang.equals(ANY)) {
583                             StringDistanceNode value = (StringDistanceNode) e2.getValue();
584                             ((StringDistanceTable)value.distanceTable).addSubtables(desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
585                         }
586                     }
587                 }
588             }
589             // now add the sequence explicitly
590 
591             StringDistanceTable dt = new StringDistanceTable();
592             dt.addSubtable(desiredRegion, supportedRegion, percentage);
593             AddSub r = new AddSub(desiredScript, supportedScript, dt);
594             addSubtables(desiredLang,  supportedLang,  r);
595         }
596 
597         @Override
toString()598         public String toString() {
599             return toString(false);
600         }
601 
602         @Override
toString(boolean abbreviate)603         public String toString(boolean abbreviate) {
604             return toString(abbreviate, "", new IdMakerFull<>("interner"), new StringBuilder()).toString();
605         }
606 
toString(boolean abbreviate, String indent, IdMakerFull<Object> intern, StringBuilder buffer)607         public StringBuilder toString(boolean abbreviate, String indent, IdMakerFull<Object> intern, StringBuilder buffer) {
608             String indent2 = indent.isEmpty() ? "" : "\t";
609             Integer id = abbreviate ? intern.getOldAndAdd(subtables) : null;
610             if (id != null) {
611                 buffer.append(indent2).append('#').append(id).append('\n');
612             } else for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
613                 final Map<String, DistanceNode> subsubtable = e1.getValue();
614                 buffer.append(indent2).append(e1.getKey());
615                 String indent3 = "\t";
616                 id = abbreviate ? intern.getOldAndAdd(subsubtable) : null;
617                 if (id != null) {
618                     buffer.append(indent3).append('#').append(id).append('\n');
619                 } else for (Entry<String, DistanceNode> e2 : subsubtable.entrySet()) {
620                     DistanceNode value = e2.getValue();
621                     buffer.append(indent3).append(e2.getKey());
622                     id = abbreviate ? intern.getOldAndAdd(value) : null;
623                     if (id != null) {
624                         buffer.append('\t').append('#').append(id).append('\n');
625                     } else {
626                         buffer.append('\t').append(value.distance);
627                         final DistanceTable distanceTable = value.getDistanceTable();
628                         if (distanceTable != null) {
629                             id = abbreviate ? intern.getOldAndAdd(distanceTable) : null;
630                             if (id != null) {
631                                 buffer.append('\t').append('#').append(id).append('\n');
632                             } else {
633                                 ((StringDistanceTable)distanceTable).toString(abbreviate, indent+"\t\t\t", intern, buffer);
634                                 buffer.append('\n');
635                             }
636                         } else {
637                             buffer.append('\n');
638                         }
639                     }
640                     indent3 = indent+'\t';
641                 }
642                 indent2 = indent;
643             }
644             return buffer;
645         }
646 
647         @Override
compact()648         public StringDistanceTable compact() {
649             return new CompactAndImmutablizer().compact(this);
650         }
651 
652         @Override
getCloser(int threshold)653         public Set<String> getCloser(int threshold) {
654             Set<String> result = new HashSet<>();
655             for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) {
656                 String desired = e1.getKey();
657                 for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) {
658                     if (e2.getValue().distance < threshold) {
659                         result.add(desired);
660                         break;
661                     }
662                 }
663             }
664             return result;
665         }
666 
getInternalDistance(String a, String b)667         public Integer getInternalDistance(String a, String b) {
668             Map<String, DistanceNode> subsub = subtables.get(a);
669             if (subsub == null) {
670                 return null;
671             }
672             DistanceNode dnode = subsub.get(b);
673             return dnode == null ? null : dnode.distance;
674         }
675 
676         @Override
getInternalNode(String a, String b)677         public DistanceNode getInternalNode(String a, String b) {
678             Map<String, DistanceNode> subsub = subtables.get(a);
679             if (subsub == null) {
680                 return null;
681             }
682             return subsub.get(b);
683         }
684 
685         @Override
getInternalMatches()686         public Map<String, Set<String>> getInternalMatches() {
687             Map<String, Set<String>> result = new LinkedHashMap<>();
688             for (Entry<String, Map<String, DistanceNode>> entry : subtables.entrySet()) {
689                 result.put(entry.getKey(), new LinkedHashSet<>(entry.getValue().keySet()));
690             }
691             return result;
692         }
693     }
694 
695     static class CopyIfEmpty implements Predicate<DistanceNode> {
696         private final StringDistanceTable toCopy;
CopyIfEmpty(StringDistanceTable resetIfNotNull)697         CopyIfEmpty(StringDistanceTable resetIfNotNull) {
698             this.toCopy = resetIfNotNull;
699         }
700         @Override
test(DistanceNode node)701         public boolean test(DistanceNode node) {
702             final StringDistanceTable subtables = (StringDistanceTable) node.getDistanceTable();
703             if (subtables.subtables.isEmpty()) {
704                 subtables.copy(toCopy);
705             }
706             return true;
707         }
708     }
709 
710     static class AddSub implements Predicate<DistanceNode> {
711         private final String desiredSub;
712         private final String supportedSub;
713         private final CopyIfEmpty r;
714 
AddSub(String desiredSub, String supportedSub, StringDistanceTable distanceTableToCopy)715         AddSub(String desiredSub, String supportedSub, StringDistanceTable distanceTableToCopy) {
716             this.r = new CopyIfEmpty(distanceTableToCopy);
717             this.desiredSub = desiredSub;
718             this.supportedSub = supportedSub;
719         }
720         @Override
test(DistanceNode node)721         public boolean test(DistanceNode node) {
722             if (node == null) {
723                 throw new IllegalArgumentException("bad structure");
724             } else {
725                 ((StringDistanceNode)node).addSubtables(desiredSub, supportedSub, r);
726             }
727             return true;
728         }
729     }
730 
distance(ULocale desired, ULocale supported, int threshold, DistanceOption distanceOption)731     public int distance(ULocale desired, ULocale supported, int threshold, DistanceOption distanceOption) {
732         LSR supportedLSR = LSR.fromMaximalized(supported);
733         LSR desiredLSR = LSR.fromMaximalized(desired);
734         return distanceRaw(desiredLSR, supportedLSR, threshold, distanceOption);
735     }
736 
737     /**
738      * Returns distance, from 0 to ABOVE_THRESHOLD.
739      * ULocales must be in canonical, addLikelySubtags format. Returns distance
740      */
distanceRaw(LSR desired, LSR supported, int threshold, DistanceOption distanceOption)741     public int distanceRaw(LSR desired, LSR supported, int threshold, DistanceOption distanceOption) {
742         if (TRACE_DISTANCE) {
743             System.err.printf("  Entering       distanceRaw: desired=%s supported=%s "
744             + "threshold=%d preferred=%s\n",
745             desired, supported, threshold,
746             distanceOption.name());
747         }
748         int result = distanceRaw(desired.language, supported.language,
749                 desired.script, supported.script,
750                 desired.region, supported.region,
751                 threshold, distanceOption);
752         if (TRACE_DISTANCE) {
753             System.err.printf("  Returning from distanceRaw: %d\n", result);
754         }
755         return result;
756     }
757 
758     public enum DistanceOption {REGION_FIRST, SCRIPT_FIRST}
759     // NOTE: Replaced "NORMAL" with "REGION_FIRST". By default, scripts have greater weight
760     // than regions, so they might be considered the "normal" case.
761 
762     /**
763      * Returns distance, from 0 to ABOVE_THRESHOLD.
764      * ULocales must be in canonical, addLikelySubtags format.
765      * (Exception: internal calls may pass any strings. They do this for pseudo-locales.)
766      * Returns distance.
767      */
distanceRaw( String desiredLang, String supportedLang, String desiredScript, String supportedScript, String desiredRegion, String supportedRegion, int threshold, DistanceOption distanceOption)768     public int distanceRaw(
769             String desiredLang, String supportedLang,
770             String desiredScript, String supportedScript,
771             String desiredRegion, String supportedRegion,
772             int threshold,
773             DistanceOption distanceOption) {
774 
775         Output<DistanceTable> subtable = new Output<>();
776 
777         int distance = languageDesired2Supported.getDistance(desiredLang, supportedLang, subtable, true);
778         boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST;
779         if (scriptFirst) {
780             distance >>= 2;
781         }
782         if (distance < 0) {
783             distance = 0;
784         } else if (distance >= threshold) {
785             return ABOVE_THRESHOLD;
786         }
787 
788         int scriptDistance = subtable.value.getDistance(desiredScript, supportedScript, subtable, true);
789         if (scriptFirst) {
790             scriptDistance >>= 1;
791         }
792         distance += scriptDistance;
793         if (distance >= threshold) {
794             return ABOVE_THRESHOLD;
795         }
796 
797         if (desiredRegion.equals(supportedRegion)) {
798             return distance;
799         }
800 
801         // From here on we know the regions are not equal
802 
803         final String desiredPartition = regionMapper.toId(desiredRegion);
804         final String supportedPartition = regionMapper.toId(supportedRegion);
805         int subdistance;
806 
807         // check for macros. If one is found, we take the maximum distance
808         // this could be optimized by adding some more structure, but probably not worth it.
809 
810         Collection<String> desiredPartitions = desiredPartition.isEmpty() ? regionMapper.macroToPartitions.get(desiredRegion) : null;
811         Collection<String> supportedPartitions = supportedPartition.isEmpty() ? regionMapper.macroToPartitions.get(supportedRegion) : null;
812         if (desiredPartitions != null || supportedPartitions != null) {
813             subdistance = 0;
814             // make the code simple for now
815             if (desiredPartitions == null) {
816                 desiredPartitions = Collections.singleton(desiredPartition);
817             }
818             if (supportedPartitions == null) {
819                 supportedPartitions = Collections.singleton(supportedPartition);
820             }
821 
822             for (String desiredPartition2 : desiredPartitions) {
823                 for (String supportedPartition2 : supportedPartitions) {
824                     int tempSubdistance = subtable.value.getDistance(desiredPartition2, supportedPartition2, null, false);
825                     if (subdistance < tempSubdistance) {
826                         subdistance = tempSubdistance;
827                     }
828                 }
829             }
830         } else {
831             subdistance = subtable.value.getDistance(desiredPartition, supportedPartition, null, false);
832         }
833         distance += subdistance;
834         return distance >= threshold ? ABOVE_THRESHOLD : distance;
835     }
836 
837 
838     private static final XLocaleDistance DEFAULT;
839 
getDefault()840     public static XLocaleDistance getDefault() {
841         return DEFAULT;
842     }
843 
844     static {
845         String[][] variableOverrides = {
846                 {"$enUS", "AS+GU+MH+MP+PR+UM+US+VI"},
847 
848                 {"$cnsar", "HK+MO"},
849 
850                 {"$americas", "019"},
851 
852                 {"$maghreb", "MA+DZ+TN+LY+MR+EH"},
853         };
854         String[] paradigmRegions = {
855                 "en", "en-GB", "es", "es-419", "pt-BR", "pt-PT"
856         };
857         String[][] regionRuleOverrides = {
858                 {"ar_*_$maghreb", "ar_*_$maghreb", "96"},
859                 {"ar_*_$!maghreb", "ar_*_$!maghreb", "96"},
860                 {"ar_*_*", "ar_*_*", "95"},
861 
862                 {"en_*_$enUS", "en_*_$enUS", "96"},
863                 {"en_*_$!enUS", "en_*_$!enUS", "96"},
864                 {"en_*_*", "en_*_*", "95"},
865 
866                 {"es_*_$americas", "es_*_$americas", "96"},
867                 {"es_*_$!americas", "es_*_$!americas", "96"},
868                 {"es_*_*", "es_*_*", "95"},
869 
870                 {"pt_*_$americas", "pt_*_$americas", "96"},
871                 {"pt_*_$!americas", "pt_*_$!americas", "96"},
872                 {"pt_*_*", "pt_*_*", "95"},
873 
874                 {"zh_Hant_$cnsar", "zh_Hant_$cnsar", "96"},
875                 {"zh_Hant_$!cnsar", "zh_Hant_$!cnsar", "96"},
876                 {"zh_Hant_*", "zh_Hant_*", "95"},
877 
878                 {"*_*_*", "*_*_*", "96"},
879         };
880 
881         Builder rmb = new RegionMapper.Builder().addParadigms(paradigmRegions);
882         for (String[] variableRule : variableOverrides) {
rmb.add(variableRule[0], variableRule[1])883             rmb.add(variableRule[0], variableRule[1]);
884         }
885         if (PRINT_OVERRIDES) {
886             System.out.println("\t\t<languageMatches type=\"written\" alt=\"enhanced\">");
887             System.out.println("\t\t\t<paradigmLocales locales=\"" + XCldrStub.join(paradigmRegions, " ")
888             + "\"/>");
889             for (String[] variableRule : variableOverrides) {
890                 System.out.println("\t\t\t<matchVariable id=\"" + variableRule[0]
891                         + "\" value=\""
892                         + variableRule[1]
893                                 + "\"/>");
894             }
895         }
896 
897         final StringDistanceTable defaultDistanceTable = new StringDistanceTable();
898         final RegionMapper defaultRegionMapper = rmb.build();
899 
900         Splitter bar = Splitter.on('_');
901 
902         @SuppressWarnings({"unchecked", "rawtypes"})
903         List<Row.R4<List<String>, List<String>, Integer, Boolean>>[] sorted = new ArrayList[3];
904         sorted[0] = new ArrayList<>();
905         sorted[1] = new ArrayList<>();
906         sorted[2] = new ArrayList<>();
907 
908         // sort the rules so that the language-only are first, then the language-script, and finally the language-script-region.
909         for (R4<String, String, Integer, Boolean> info : xGetLanguageMatcherData()) {
910             String desiredRaw = info.get0();
911             String supportedRaw = info.get1();
912             List<String> desired = bar.splitToList(desiredRaw);
913             List<String> supported = bar.splitToList(supportedRaw);
914             Boolean oneway = info.get3();
915             int distance = desiredRaw.equals("*_*") ? 50 : info.get2();
916             int size = desired.size();
917 
918             // for now, skip size == 3
919             if (size == 3) continue;
920 
Row.of(desired, supported, distance, oneway)921             sorted[size-1].add(Row.of(desired, supported, distance, oneway));
922         }
923 
924         for (List<Row.R4<List<String>, List<String>, Integer, Boolean>> item1 : sorted) {
925             for (Row.R4<List<String>, List<String>, Integer, Boolean> item2 : item1) {
926                 List<String> desired = item2.get0();
927                 List<String> supported = item2.get1();
928                 Integer distance = item2.get2();
929                 Boolean oneway = item2.get3();
add(defaultDistanceTable, desired, supported, distance)930                 add(defaultDistanceTable, desired, supported, distance);
931                 if (oneway != Boolean.TRUE && !desired.equals(supported)) {
add(defaultDistanceTable, supported, desired, distance)932                     add(defaultDistanceTable, supported, desired, distance);
933                 }
printMatchXml(desired, supported, distance, oneway)934                 printMatchXml(desired, supported, distance, oneway);
935             }
936         }
937 
938         // add new size=3
939         for (String[] rule : regionRuleOverrides) {
940             //            if (PRINT_OVERRIDES) System.out.println("\t\t\t<languageMatch desired=\""
941             //                + rule[0]
942             //                    + "\" supported=\""
943             //                    + rule[1]
944             //                        + "\" distance=\""
945             //                        + rule[2]
946             //                            + "\"/>");
947             //            if (rule[0].equals("en_*_*") || rule[1].equals("*_*_*")) {
948             //                int debug = 0;
949             //            }
950             List<String> desiredBase = new ArrayList<>(bar.splitToList(rule[0]));
951             List<String> supportedBase = new ArrayList<>(bar.splitToList(rule[1]));
952             Integer distance = 100-Integer.parseInt(rule[2]);
printMatchXml(desiredBase, supportedBase, distance, false)953             printMatchXml(desiredBase, supportedBase, distance, false);
954 
955             Collection<String> desiredRegions = defaultRegionMapper.getIdsFromVariable(desiredBase.get(2));
956             if (desiredRegions.isEmpty()) {
957                 throw new IllegalArgumentException("Bad region variable: " + desiredBase.get(2));
958             }
959             Collection<String> supportedRegions = defaultRegionMapper.getIdsFromVariable(supportedBase.get(2));
960             if (supportedRegions.isEmpty()) {
961                 throw new IllegalArgumentException("Bad region variable: " + supportedBase.get(2));
962             }
963             for (String desiredRegion2 : desiredRegions) {
964                 desiredBase.set(2, desiredRegion2.toString()); // fix later
965                 for (String supportedRegion2 : supportedRegions) {
966                     supportedBase.set(2, supportedRegion2.toString()); // fix later
add(defaultDistanceTable, desiredBase, supportedBase, distance)967                     add(defaultDistanceTable, desiredBase, supportedBase, distance);
add(defaultDistanceTable, supportedBase, desiredBase, distance)968                     add(defaultDistanceTable, supportedBase, desiredBase, distance);
969                 }
970             }
971         }
972 
973         // Pseudo regions should match no other regions.
974         // {"*-*-XA", "*-*-*", "0"},
975         // {"*-*-XB", "*-*-*", "0"},
976         // {"*-*-XC", "*-*-*", "0"},
977         // {"x1-*-*", "*-*-*", "0"},
978         // {"x2-*-*", "*-*-*", "0"},
979         // ...
980         // {"x8-*-*", "*-*-*", "0"},
981         List<String> supported = Arrays.asList("*", "*", "*");
982         for (String x : Arrays.asList("XA", "XB", "XC")) {
983             List<String> desired = Arrays.asList("*", "*", x);
add(defaultDistanceTable, desired, supported, 100)984             add(defaultDistanceTable, desired, supported, 100);
add(defaultDistanceTable, supported, desired, 100)985             add(defaultDistanceTable, supported, desired, 100);
986         }
987         // See XLikelySubtags.java for the mapping of pseudo-locales to x1 ... x8.
988         for (int i = 1; i <= 8; ++i) {
989             List<String> desired = Arrays.asList("x" + String.valueOf(i), "*", "*");
add(defaultDistanceTable, desired, supported, 100)990             add(defaultDistanceTable, desired, supported, 100);
add(defaultDistanceTable, supported, desired, 100)991             add(defaultDistanceTable, supported, desired, 100);
992         }
993 
994         if (PRINT_OVERRIDES) {
995             System.out.println("\t\t</languageMatches>");
996         }
997 
998         DEFAULT = new XLocaleDistance(defaultDistanceTable.compact(), defaultRegionMapper);
999 
1000         if (PRINT_OVERRIDES) {
1001             System.out.println(defaultRegionMapper);
1002             System.out.println(defaultDistanceTable);
IllegalArgumentException()1003             throw new IllegalArgumentException();
1004         }
1005     }
1006 
printMatchXml(List<String> desired, List<String> supported, Integer distance, Boolean oneway)1007     private static void printMatchXml(List<String> desired, List<String> supported, Integer distance, Boolean oneway) {
1008         if (PRINT_OVERRIDES) {
1009             String desiredStr = CollectionUtilities.join(desired, "_");
1010             String supportedStr = CollectionUtilities.join(supported, "_");
1011             String desiredName = fixedName(desired);
1012             String supportedName = fixedName(supported);
1013             System.out.println("\t\t\t<languageMatch"
1014                     + " desired=\"" + desiredStr
1015                     + "\"\tsupported=\"" + supportedStr
1016                     + "\"\tdistance=\"" + distance
1017                     + (!oneway ? "" : "\"\toneway=\"true")
1018                     + "\"/>\t<!-- " + desiredName + " ⇒ " + supportedName + " -->");
1019         }
1020     }
1021 
fixedName(List<String> match)1022     private static String fixedName(List<String> match) {
1023         List<String> alt = new ArrayList<>(match);
1024         int size = alt.size();
1025         assert size >= 1 && size <= 3;
1026 
1027         StringBuilder result = new StringBuilder();
1028 
1029         if (size >= 3) {
1030             String region = alt.get(2);
1031             if (region.equals("*") || region.startsWith("$")) {
1032                 result.append(region);
1033             } else {
1034                 result.append(english.regionDisplayName(region));
1035             }
1036         }
1037         if (size >= 2) {
1038             String script = alt.get(1);
1039             if (script.equals("*")) {
1040                 result.insert(0, script);
1041             } else {
1042                 result.insert(0, english.scriptDisplayName(script));
1043             }
1044         }
1045         if (size >= 1) {
1046             String language = alt.get(0);
1047             if (language.equals("*")) {
1048                 result.insert(0, language);
1049             } else {
1050                 result.insert(0, english.languageDisplayName(language));
1051             }
1052         }
1053         return CollectionUtilities.join(alt, "; ");
1054     }
1055 
add(StringDistanceTable languageDesired2Supported, List<String> desired, List<String> supported, int percentage)1056     static public void add(StringDistanceTable languageDesired2Supported, List<String> desired, List<String> supported, int percentage) {
1057         int size = desired.size();
1058         if (size != supported.size() || size < 1 || size > 3) {
1059             throw new IllegalArgumentException();
1060         }
1061         final String desiredLang = fixAny(desired.get(0));
1062         final String supportedLang = fixAny(supported.get(0));
1063         if (size == 1) {
1064             languageDesired2Supported.addSubtable(desiredLang, supportedLang, percentage);
1065         } else {
1066             final String desiredScript = fixAny(desired.get(1));
1067             final String supportedScript = fixAny(supported.get(1));
1068             if (size == 2) {
1069                 languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, percentage);
1070             } else {
1071                 final String desiredRegion = fixAny(desired.get(2));
1072                 final String supportedRegion = fixAny(supported.get(2));
1073                 languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, desiredRegion, supportedRegion, percentage);
1074             }
1075         }
1076     }
1077 
1078     @Override
toString()1079     public String toString() {
1080         return toString(false);
1081     }
1082 
toString(boolean abbreviate)1083     public String toString(boolean abbreviate) {
1084         return regionMapper + "\n" + languageDesired2Supported.toString(abbreviate);
1085     }
1086 
1087 
1088     //    public static XLocaleDistance createDefaultInt() {
1089     //        IntDistanceTable d = new IntDistanceTable(DEFAULT_DISTANCE_TABLE);
1090     //        return new XLocaleDistance(d, DEFAULT_REGION_MAPPER);
1091     //    }
1092 
getContainingMacrosFor(Collection<String> input, Set<String> output)1093     static Set<String> getContainingMacrosFor(Collection<String> input, Set<String> output) {
1094         output.clear();
1095         for (Entry<String, Set<String>> entry : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
1096             if (input.containsAll(entry.getValue())) { // example; if all southern Europe are contained, then add S. Europe
1097                 output.add(entry.getKey());
1098             }
1099         }
1100         return output;
1101     }
1102 
1103     static class RegionMapper implements IdMapper<String,String> {
1104         /**
1105          * Used for processing rules. At the start we have a variable setting like $A1=US+CA+MX. We generate a mapping from $A1 to a set of partitions {P1, P2}
1106          * When we hit a rule that contains a variable, we replace that rule by multiple rules for the partitions.
1107          */
1108         final Multimap<String,String> variableToPartition;
1109         /**
1110          * Used for executing the rules. We map a region to a partition before processing.
1111          */
1112         final Map<String,String> regionToPartition;
1113         /**
1114          * Used to support es_419 compared to es_AR, etc.
1115          */
1116         final Multimap<String,String> macroToPartitions;
1117         /**
1118          * Used to get the paradigm region for a cluster, if there is one
1119          */
1120         final Set<ULocale> paradigms;
1121 
RegionMapper( Multimap<String, String> variableToPartitionIn, Map<String, String> regionToPartitionIn, Multimap<String,String> macroToPartitionsIn, Set<ULocale> paradigmsIn)1122         private RegionMapper(
1123                 Multimap<String, String> variableToPartitionIn,
1124                 Map<String, String> regionToPartitionIn,
1125                 Multimap<String,String> macroToPartitionsIn,
1126                 Set<ULocale> paradigmsIn) {
1127             variableToPartition = ImmutableMultimap.copyOf(variableToPartitionIn);
1128             regionToPartition = ImmutableMap.copyOf(regionToPartitionIn);
1129             macroToPartitions = ImmutableMultimap.copyOf(macroToPartitionsIn);
1130             paradigms = ImmutableSet.copyOf(paradigmsIn);
1131         }
1132 
1133         @Override
toId(String region)1134         public String toId(String region) {
1135             String result = regionToPartition.get(region);
1136             return result == null ? "" : result;
1137         }
1138 
getIdsFromVariable(String variable)1139         public Collection<String> getIdsFromVariable(String variable) {
1140             if (variable.equals("*")) {
1141                 return Collections.singleton("*");
1142             }
1143             Collection<String> result = variableToPartition.get(variable);
1144             if (result == null || result.isEmpty()) {
1145                 throw new IllegalArgumentException("Variable not defined: " + variable);
1146             }
1147             return result;
1148         }
1149 
regions()1150         public Set<String> regions() {
1151             return regionToPartition.keySet();
1152         }
1153 
variables()1154         public Set<String> variables() {
1155             return variableToPartition.keySet();
1156         }
1157 
1158         @Override
toString()1159         public String toString() {
1160             TreeMultimap<String, String> partitionToVariables = Multimaps.invertFrom(variableToPartition,
1161                     TreeMultimap.<String, String>create());
1162             TreeMultimap<String, String> partitionToRegions = TreeMultimap.create();
1163             for (Entry<String, String> e : regionToPartition.entrySet()) {
1164                 partitionToRegions.put(e.getValue(), e.getKey());
1165             }
1166             StringBuilder buffer = new StringBuilder();
1167             buffer.append("Partition ➠ Variables ➠ Regions (final)");
1168             for (Entry<String, Set<String>> e : partitionToVariables.asMap().entrySet()) {
1169                 buffer.append('\n');
1170                 buffer.append(e.getKey() + "\t" + e.getValue() + "\t" + partitionToRegions.get(e.getKey()));
1171             }
1172             buffer.append("\nMacro ➠ Partitions");
1173             for (Entry<String, Set<String>> e : macroToPartitions.asMap().entrySet()) {
1174                 buffer.append('\n');
1175                 buffer.append(e.getKey() + "\t" + e.getValue());
1176             }
1177 
1178             return buffer.toString();
1179         }
1180 
1181         static class Builder {
1182             final private Multimap<String, String> regionToRawPartition = TreeMultimap.create();
1183             final private RegionSet regionSet = new RegionSet();
1184             final private Set<ULocale> paradigms = new LinkedHashSet<>();
1185 
add(String variable, String barString)1186             void add(String variable, String barString) {
1187                 Set<String> tempRegions = regionSet.parseSet(barString);
1188 
1189                 for (String region : tempRegions) {
1190                     regionToRawPartition.put(region, variable);
1191                 }
1192 
1193                 // now add the inverse variable
1194 
1195                 Set<String> inverse = regionSet.inverse();
1196                 String inverseVariable = "$!" + variable.substring(1);
1197                 for (String region : inverse) {
1198                     regionToRawPartition.put(region, inverseVariable);
1199                 }
1200             }
1201 
addParadigms(String... paradigmRegions)1202             public Builder addParadigms(String... paradigmRegions) {
1203                 for (String paradigm : paradigmRegions) {
1204                     paradigms.add(new ULocale(paradigm));
1205                 }
1206                 return this;
1207             }
1208 
build()1209             RegionMapper build() {
1210                 final IdMakerFull<Collection<String>> id = new IdMakerFull<>("partition");
1211                 Multimap<String,String> variableToPartitions = TreeMultimap.create();
1212                 Map<String,String> regionToPartition = new TreeMap<>();
1213                 Multimap<String,String> partitionToRegions = TreeMultimap.create();
1214 
1215                 for (Entry<String, Set<String>> e : regionToRawPartition.asMap().entrySet()) {
1216                     final String region = e.getKey();
1217                     final Collection<String> rawPartition = e.getValue();
1218                     String partition = String.valueOf((char)('α' + id.add(rawPartition)));
1219 
1220                     regionToPartition.put(region, partition);
1221                     partitionToRegions.put(partition, region);
1222 
1223                     for (String variable : rawPartition) {
1224                         variableToPartitions.put(variable, partition);
1225                     }
1226                 }
1227 
1228                 // we get a mapping of each macro to the partitions it intersects with
1229                 Multimap<String,String> macroToPartitions = TreeMultimap.create();
1230                 for (Entry<String, Set<String>> e : CONTAINER_TO_CONTAINED.asMap().entrySet()) {
1231                     String macro = e.getKey();
1232                     for (Entry<String, Set<String>> e2 : partitionToRegions.asMap().entrySet()) {
1233                         String partition = e2.getKey();
1234                         if (!Collections.disjoint(e.getValue(), e2.getValue())) {
1235                             macroToPartitions.put(macro, partition);
1236                         }
1237                     }
1238                 }
1239 
1240                 return new RegionMapper(
1241                         variableToPartitions,
1242                         regionToPartition,
1243                         macroToPartitions,
1244                         paradigms);
1245             }
1246         }
1247     }
1248 
1249     /**
1250      * Parses a string of regions like "US+005-BR" and produces a set of resolved regions.
1251      * All macroregions are fully resolved to sets of non-macro regions.
1252      * <br>Syntax is simple for now:
1253      * <pre>regionSet := region ([-+] region)*</pre>
1254      * No precedence, so "x+y-y+z" is (((x+y)-y)+z) NOT (x+y)-(y+z)
1255      */
1256     private static class RegionSet {
1257         private enum Operation {add, remove}
1258         // temporaries used in processing
1259         final private Set<String> tempRegions = new TreeSet<>();
1260         private Operation operation = null;
1261 
parseSet(String barString)1262         private Set<String> parseSet(String barString) {
1263             operation = Operation.add;
1264             int last = 0;
1265             tempRegions.clear();
1266             int i = 0;
1267             for (; i < barString.length(); ++i) {
1268                 char c = barString.charAt(i); // UTF16 is ok, since syntax is only ascii
1269                 switch(c) {
1270                 case '+':
1271                     add(barString, last, i);
1272                     last = i+1;
1273                     operation = Operation.add;
1274                     break;
1275                 case '-':
1276                     add(barString, last, i);
1277                     last = i+1;
1278                     operation = Operation.remove;
1279                     break;
1280                 }
1281             }
1282             add(barString, last, i);
1283             return tempRegions;
1284         }
1285 
inverse()1286         private Set<String> inverse() {
1287             TreeSet<String> result = new TreeSet<>(ALL_FINAL_REGIONS);
1288             result.removeAll(tempRegions);
1289             return result;
1290         }
1291 
add(String barString, int last, int i)1292         private void add(String barString, int last, int i) {
1293             if (i > last) {
1294                 String region = barString.substring(last,i);
1295                 changeSet(operation, region);
1296             }
1297         }
1298 
changeSet(Operation operation, String region)1299         private void changeSet(Operation operation, String region) {
1300             Collection<String> contained = CONTAINER_TO_CONTAINED_FINAL.get(region);
1301             if (contained != null && !contained.isEmpty()) {
1302                 if (Operation.add == operation) {
1303                     tempRegions.addAll(contained);
1304                 } else {
1305                     tempRegions.removeAll(contained);
1306                 }
1307             } else if (Operation.add == operation) {
1308                 tempRegions.add(region);
1309             } else {
1310                 tempRegions.remove(region);
1311             }
1312         }
1313     }
1314 
invertMap(Map<V,K> map)1315     public static <K,V> Multimap<K,V> invertMap(Map<V,K> map) {
1316         return Multimaps.invertFrom(Multimaps.forMap(map), LinkedHashMultimap.<K,V>create());
1317     }
1318 
getParadigms()1319     public Set<ULocale> getParadigms() {
1320         return regionMapper.paradigms;
1321     }
1322 
getDefaultLanguageDistance()1323     public int getDefaultLanguageDistance() {
1324         return defaultLanguageDistance;
1325     }
1326 
getDefaultScriptDistance()1327     public int getDefaultScriptDistance() {
1328         return defaultScriptDistance;
1329     }
1330 
getDefaultRegionDistance()1331     public int getDefaultRegionDistance() {
1332         return defaultRegionDistance;
1333     }
1334 
1335     static class CompactAndImmutablizer extends IdMakerFull<Object> {
compact(StringDistanceTable item)1336         StringDistanceTable compact(StringDistanceTable item) {
1337             if (toId(item) != null) {
1338                 return (StringDistanceTable) intern(item);
1339             }
1340             return new StringDistanceTable(compact(item.subtables, 0));
1341         }
1342         @SuppressWarnings({ "unchecked", "rawtypes" })
compact(Map<K,T> item, int level)1343         <K,T> Map<K,T> compact(Map<K,T> item, int level) {
1344             if (toId(item) != null) {
1345                 return (Map<K, T>) intern(item);
1346             }
1347             Map<K,T> copy = new LinkedHashMap<>();
1348             for (Entry<K,T> entry : item.entrySet()) {
1349                 T value = entry.getValue();
1350                 if (value instanceof Map) {
1351                     copy.put(entry.getKey(), (T)compact((Map)value, level+1));
1352                 } else {
1353                     copy.put(entry.getKey(), (T)compact((DistanceNode)value));
1354                 }
1355             }
1356             return ImmutableMap.copyOf(copy);
1357         }
compact(DistanceNode item)1358         DistanceNode compact(DistanceNode item) {
1359             if (toId(item) != null) {
1360                 return (DistanceNode) intern(item);
1361             }
1362             final DistanceTable distanceTable = item.getDistanceTable();
1363             if (distanceTable == null || distanceTable.isEmpty()) {
1364                 return new DistanceNode(item.distance);
1365             } else {
1366                 return new StringDistanceNode(item.distance, compact((StringDistanceTable)((StringDistanceNode)item).distanceTable));
1367             }
1368         }
1369     }
1370 
1371     @Deprecated
internalGetDistanceTable()1372     public StringDistanceTable internalGetDistanceTable() {
1373         return (StringDistanceTable) languageDesired2Supported;
1374     }
1375 
main(String[] args)1376     public static void main(String[] args) {
1377         //      for (Entry<String, Collection<String>> entry : containerToContained.asMap().entrySet()) {
1378         //          System.out.println(entry.getKey() + "\t⥢" + entry.getValue() + "; " + containerToFinalContained.get(entry.getKey()));
1379         //      }
1380         //      final Multimap<String,String> regionToMacros = ImmutableMultimap.copyOf(Multimaps.invertFrom(containerToContained, TreeMultimap.create()));
1381         //      for (Entry<String, Collection<String>> entry : regionToMacros.asMap().entrySet()) {
1382         //          System.out.println(entry.getKey() + "\t⥤ " + entry.getValue());
1383         //      }
1384         if (PRINT_OVERRIDES) {
1385             System.out.println(getDefault().toString(true));
1386         }
1387         DistanceTable table = getDefault().languageDesired2Supported;
1388         DistanceTable compactedTable = table.compact();
1389         if (!table.equals(compactedTable)) {
1390             throw new IllegalArgumentException("Compaction isn't equal");
1391         }
1392     }
1393 }
1394