1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 package com.ibm.icu.impl.locale; 4 5 import java.util.ArrayList; 6 import java.util.Arrays; 7 import java.util.Collection; 8 import java.util.Collections; 9 import java.util.Enumeration; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.LinkedHashMap; 13 import java.util.LinkedHashSet; 14 import java.util.List; 15 import java.util.Map; 16 import java.util.Map.Entry; 17 import java.util.Objects; 18 import java.util.Set; 19 import java.util.TreeMap; 20 import java.util.TreeSet; 21 22 import com.ibm.icu.impl.ICUResourceBundle; 23 import com.ibm.icu.impl.Row; 24 import com.ibm.icu.impl.Row.R4; 25 import com.ibm.icu.impl.locale.XCldrStub.CollectionUtilities; 26 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap; 27 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap; 28 import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet; 29 import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap; 30 import com.ibm.icu.impl.locale.XCldrStub.Multimap; 31 import com.ibm.icu.impl.locale.XCldrStub.Multimaps; 32 import com.ibm.icu.impl.locale.XCldrStub.Predicate; 33 import com.ibm.icu.impl.locale.XCldrStub.Splitter; 34 import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap; 35 import com.ibm.icu.impl.locale.XLikelySubtags.LSR; 36 import com.ibm.icu.impl.locale.XLocaleDistance.RegionMapper.Builder; 37 import com.ibm.icu.text.LocaleDisplayNames; 38 import com.ibm.icu.util.LocaleMatcher; 39 import com.ibm.icu.util.Output; 40 import com.ibm.icu.util.ULocale; 41 import com.ibm.icu.util.UResourceBundleIterator; 42 43 public class XLocaleDistance { 44 45 static final boolean PRINT_OVERRIDES = false; 46 47 public static final int ABOVE_THRESHOLD = 100; 48 49 // Activates debugging output to stderr with details of GetBestMatch. 50 // Be sure to set this to false before checking this in for production! 51 private static final boolean TRACE_DISTANCE = false; 52 53 @Deprecated 54 public static final String ANY = "�"; // matches any character. Uses value above any subtag. 55 fixAny(String string)56 private static String fixAny(String string) { 57 return "*".equals(string) ? ANY : string; 58 } 59 60 static final LocaleDisplayNames english = LocaleDisplayNames.getInstance(ULocale.ENGLISH); 61 xGetLanguageMatcherData()62 private static List<R4<String, String, Integer, Boolean>> xGetLanguageMatcherData() { 63 List<R4<String, String, Integer, Boolean>> distanceList = new ArrayList<>(); 64 65 ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData(); 66 ICUResourceBundle languageMatchingNew = suppData.findTopLevel("languageMatchingNew"); 67 ICUResourceBundle written = (ICUResourceBundle) languageMatchingNew.get("written"); 68 69 for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) { 70 ICUResourceBundle item = (ICUResourceBundle) iter.next(); 71 boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3)); 72 distanceList.add( 73 (R4<String, String, Integer, Boolean>) // note: .freeze returning wrong type, so casting. 74 Row.of( 75 item.getString(0), 76 item.getString(1), 77 Integer.parseInt(item.getString(2)), 78 oneway) 79 .freeze()); 80 } 81 return Collections.unmodifiableList(distanceList); 82 } 83 84 @SuppressWarnings("unused") xGetParadigmLocales()85 private static Set<String> xGetParadigmLocales() { 86 ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData(); 87 ICUResourceBundle languageMatchingInfo = suppData.findTopLevel("languageMatchingInfo"); 88 ICUResourceBundle writtenParadigmLocales = (ICUResourceBundle) languageMatchingInfo.get("written") 89 .get("paradigmLocales"); 90 // paradigmLocales{ "en", "en-GB",... } 91 HashSet<String> paradigmLocales = new HashSet<>(Arrays.asList(writtenParadigmLocales.getStringArray())); 92 return Collections.unmodifiableSet(paradigmLocales); 93 } 94 95 @SuppressWarnings("unused") xGetMatchVariables()96 private static Map<String, String> xGetMatchVariables() { 97 ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData(); 98 ICUResourceBundle languageMatchingInfo = suppData.findTopLevel("languageMatchingInfo"); 99 ICUResourceBundle writtenMatchVariables = (ICUResourceBundle) languageMatchingInfo.get("written") 100 .get("matchVariable"); 101 // matchVariable{ americas{"019"} cnsar{"HK+MO"} ...} 102 103 HashMap<String,String> matchVariables = new HashMap<>(); 104 for (Enumeration<String> enumer = writtenMatchVariables.getKeys(); enumer.hasMoreElements(); ) { 105 String key = enumer.nextElement(); 106 matchVariables.put(key, writtenMatchVariables.getString(key)); 107 } 108 return Collections.unmodifiableMap(matchVariables); 109 } 110 xGetContainment()111 private static Multimap<String, String> xGetContainment() { 112 TreeMultimap<String,String> containment = TreeMultimap.create(); 113 containment 114 .putAll("001", "019", "002", "150", "142", "009") 115 .putAll("011", "BF", "BJ", "CI", "CV", "GH", "GM", "GN", "GW", "LR", "ML", "MR", "NE", "NG", "SH", "SL", "SN", "TG") 116 .putAll("013", "BZ", "CR", "GT", "HN", "MX", "NI", "PA", "SV") 117 .putAll("014", "BI", "DJ", "ER", "ET", "KE", "KM", "MG", "MU", "MW", "MZ", "RE", "RW", "SC", "SO", "SS", "TZ", "UG", "YT", "ZM", "ZW") 118 .putAll("142", "145", "143", "030", "034", "035") 119 .putAll("143", "TM", "TJ", "KG", "KZ", "UZ") 120 .putAll("145", "AE", "AM", "AZ", "BH", "CY", "GE", "IL", "IQ", "JO", "KW", "LB", "OM", "PS", "QA", "SA", "SY", "TR", "YE", "NT", "YD") 121 .putAll("015", "DZ", "EG", "EH", "LY", "MA", "SD", "TN", "EA", "IC") 122 .putAll("150", "154", "155", "151", "039") 123 .putAll("151", "BG", "BY", "CZ", "HU", "MD", "PL", "RO", "RU", "SK", "UA", "SU") 124 .putAll("154", "GG", "IM", "JE", "AX", "DK", "EE", "FI", "FO", "GB", "IE", "IS", "LT", "LV", "NO", "SE", "SJ") 125 .putAll("155", "AT", "BE", "CH", "DE", "FR", "LI", "LU", "MC", "NL", "DD", "FX") 126 .putAll("017", "AO", "CD", "CF", "CG", "CM", "GA", "GQ", "ST", "TD", "ZR") 127 .putAll("018", "BW", "LS", "NA", "SZ", "ZA") 128 .putAll("019", "021", "013", "029", "005", "003", "419") 129 .putAll("002", "015", "011", "017", "014", "018") 130 .putAll("021", "BM", "CA", "GL", "PM", "US") 131 .putAll("029", "AG", "AI", "AW", "BB", "BL", "BQ", "BS", "CU", "CW", "DM", "DO", "GD", "GP", "HT", "JM", "KN", "KY", "LC", "MF", "MQ", "MS", "PR", "SX", "TC", "TT", "VC", "VG", "VI", "AN") 132 .putAll("003", "021", "013", "029") 133 .putAll("030", "CN", "HK", "JP", "KP", "KR", "MN", "MO", "TW") 134 .putAll("035", "BN", "ID", "KH", "LA", "MM", "MY", "PH", "SG", "TH", "TL", "VN", "BU", "TP") 135 .putAll("039", "AD", "AL", "BA", "ES", "GI", "GR", "HR", "IT", "ME", "MK", "MT", "RS", "PT", "SI", "SM", "VA", "XK", "CS", "YU") 136 .putAll("419", "013", "029", "005") 137 .putAll("005", "AR", "BO", "BR", "CL", "CO", "EC", "FK", "GF", "GY", "PE", "PY", "SR", "UY", "VE") 138 .putAll("053", "AU", "NF", "NZ") 139 .putAll("054", "FJ", "NC", "PG", "SB", "VU") 140 .putAll("057", "FM", "GU", "KI", "MH", "MP", "NR", "PW") 141 .putAll("061", "AS", "CK", "NU", "PF", "PN", "TK", "TO", "TV", "WF", "WS") 142 .putAll("034", "AF", "BD", "BT", "IN", "IR", "LK", "MV", "NP", "PK") 143 .putAll("009", "053", "054", "057", "061", "QO") 144 .putAll("QO", "AQ", "BV", "CC", "CX", "GS", "HM", "IO", "TF", "UM", "AC", "CP", "DG", "TA") 145 ; 146 //Can't use following, because data from CLDR is discarded 147 // ICUResourceBundle suppData = LocaleMatcher.getICUSupplementalData(); 148 // UResourceBundle territoryContainment = suppData.get("territoryContainment"); 149 // for (int i = 0 ; i < territoryContainment.getSize(); i++) { 150 // UResourceBundle mapping = territoryContainment.get(i); 151 // String parent = mapping.getKey(); 152 // for (int j = 0 ; j < mapping.getSize(); j++) { 153 // String child = mapping.getString(j); 154 // containment.put(parent,child); 155 // System.out.println(parent + " => " + child); 156 // } 157 // } 158 TreeMultimap<String,String> containmentResolved = TreeMultimap.create(); 159 fill("001", containment, containmentResolved); 160 return ImmutableMultimap.copyOf(containmentResolved); 161 } 162 fill(String region, TreeMultimap<String, String> containment, Multimap<String, String> toAddTo)163 private static Set<String> fill(String region, TreeMultimap<String, String> containment, Multimap<String, String> toAddTo) { 164 Set<String> contained = containment.get(region); 165 if (contained == null) { 166 return Collections.emptySet(); 167 } 168 toAddTo.putAll(region, contained); // do top level 169 // then recursively 170 for (String subregion : contained) { 171 toAddTo.putAll(region, fill(subregion, containment, toAddTo)); 172 } 173 return toAddTo.get(region); 174 } 175 176 177 static final Multimap<String,String> CONTAINER_TO_CONTAINED; 178 static final Multimap<String,String> CONTAINER_TO_CONTAINED_FINAL; 179 static { 180 // Multimap<String, String> containerToContainedTemp = xGetContainment(); 181 // fill(Region.getInstance("001"), containerToContainedTemp); 182 183 CONTAINER_TO_CONTAINED = xGetContainment(); 184 Multimap<String, String> containerToFinalContainedBuilder = TreeMultimap.create(); 185 for (Entry<String, Set<String>> entry : CONTAINER_TO_CONTAINED.asMap().entrySet()) { 186 String container = entry.getKey(); 187 for (String contained : entry.getValue()) { 188 if (CONTAINER_TO_CONTAINED.get(contained) == null) { containerToFinalContainedBuilder.put(container, contained)189 containerToFinalContainedBuilder.put(container, contained); 190 } 191 } 192 } 193 CONTAINER_TO_CONTAINED_FINAL = ImmutableMultimap.copyOf(containerToFinalContainedBuilder); 194 } 195 196 final static private Set<String> ALL_FINAL_REGIONS = ImmutableSet.copyOf(CONTAINER_TO_CONTAINED_FINAL.get("001")); 197 198 // end of data from CLDR 199 200 private final DistanceTable languageDesired2Supported; 201 private final RegionMapper regionMapper; 202 private final int defaultLanguageDistance; 203 private final int defaultScriptDistance; 204 private final int defaultRegionDistance; 205 206 @Deprecated 207 public static abstract class DistanceTable { getDistance(String desiredLang, String supportedlang, Output<DistanceTable> table, boolean starEquals)208 abstract int getDistance(String desiredLang, String supportedlang, Output<DistanceTable> table, boolean starEquals); getCloser(int threshold)209 abstract Set<String> getCloser(int threshold); toString(boolean abbreviate)210 abstract String toString(boolean abbreviate); compact()211 public DistanceTable compact() { 212 return this; 213 } 214 // public Integer getInternalDistance(String a, String b) { 215 // return null; 216 // } getInternalNode(String any, String any2)217 public DistanceNode getInternalNode(String any, String any2) { 218 return null; 219 } getInternalMatches()220 public Map<String, Set<String>> getInternalMatches() { 221 return null; 222 } isEmpty()223 public boolean isEmpty() { 224 return true; 225 } 226 } 227 228 @Deprecated 229 public static class DistanceNode { 230 final int distance; 231 DistanceNode(int distance)232 public DistanceNode(int distance) { 233 this.distance = distance; 234 } 235 getDistanceTable()236 public DistanceTable getDistanceTable() { 237 return null; 238 } 239 240 @Override equals(Object obj)241 public boolean equals(Object obj) { 242 return this == obj || 243 (obj != null 244 && obj.getClass() == this.getClass() 245 && distance == ((DistanceNode) obj).distance); 246 } 247 @Override hashCode()248 public int hashCode() { 249 return distance; 250 } 251 @Override toString()252 public String toString() { 253 return "\ndistance: " + distance; 254 } 255 } 256 257 private interface IdMapper<K,V> { toId(K source)258 public V toId(K source); 259 } 260 261 static class IdMakerFull<T> implements IdMapper<T,Integer> { 262 private final Map<T, Integer> objectToInt = new HashMap<>(); 263 private final List<T> intToObject = new ArrayList<>(); 264 final String name; // for debugging 265 IdMakerFull(String name)266 IdMakerFull(String name) { 267 this.name = name; 268 } 269 IdMakerFull()270 IdMakerFull() { 271 this("unnamed"); 272 } 273 IdMakerFull(String name, T zeroValue)274 IdMakerFull(String name, T zeroValue) { 275 this(name); 276 add(zeroValue); 277 } 278 279 /** 280 * Return an id, making one if there wasn't one already. 281 */ add(T source)282 public Integer add(T source) { 283 Integer result = objectToInt.get(source); 284 if (result == null) { 285 Integer newResult = intToObject.size(); 286 objectToInt.put(source, newResult); 287 intToObject.add(source); 288 return newResult; 289 } else { 290 return result; 291 } 292 } 293 294 /** 295 * Return an id, or null if there is none. 296 */ 297 @Override toId(T source)298 public Integer toId(T source) { 299 return objectToInt.get(source); 300 // return value == null ? 0 : value; 301 } 302 303 /** 304 * Return the object for the id, or null if there is none. 305 */ fromId(int id)306 public T fromId(int id) { 307 return intToObject.get(id); 308 } 309 310 /** 311 * Return interned object 312 */ intern(T source)313 public T intern(T source) { 314 return fromId(add(source)); 315 } 316 size()317 public int size() { 318 return intToObject.size(); 319 } 320 /** 321 * Same as add, except if the object didn't have an id, return null; 322 */ getOldAndAdd(T source)323 public Integer getOldAndAdd(T source) { 324 Integer result = objectToInt.get(source); 325 if (result == null) { 326 Integer newResult = intToObject.size(); 327 objectToInt.put(source, newResult); 328 intToObject.add(source); 329 } 330 return result; 331 } 332 333 @Override toString()334 public String toString() { 335 return size() + ": " + intToObject; 336 } 337 @Override equals(Object obj)338 public boolean equals(Object obj) { 339 return this == obj || 340 (obj != null 341 && obj.getClass() == this.getClass() 342 && intToObject.equals(((IdMakerFull<?>) obj).intToObject)); 343 } 344 @Override hashCode()345 public int hashCode() { 346 return intToObject.hashCode(); 347 } 348 } 349 350 static class StringDistanceNode extends DistanceNode { 351 final DistanceTable distanceTable; 352 StringDistanceNode(int distance, DistanceTable distanceTable)353 public StringDistanceNode(int distance, DistanceTable distanceTable) { 354 super(distance); 355 this.distanceTable = distanceTable; 356 } 357 358 @Override equals(Object obj)359 public boolean equals(Object obj) { 360 StringDistanceNode other; 361 return this == obj || 362 (obj != null 363 && obj.getClass() == this.getClass() 364 && distance == (other = (StringDistanceNode) obj).distance 365 && Objects.equals(distanceTable, other.distanceTable) 366 && super.equals(other)); 367 } 368 @Override hashCode()369 public int hashCode() { 370 return distance ^ Objects.hashCode(distanceTable); 371 } 372 StringDistanceNode(int distance)373 StringDistanceNode(int distance) { 374 this(distance, new StringDistanceTable()); 375 } 376 addSubtables(String desiredSub, String supportedSub, CopyIfEmpty r)377 public void addSubtables(String desiredSub, String supportedSub, CopyIfEmpty r) { 378 ((StringDistanceTable) distanceTable).addSubtables(desiredSub, supportedSub, r); 379 } 380 @Override toString()381 public String toString() { 382 return "distance: " + distance + "\n" + distanceTable; 383 } 384 copyTables(StringDistanceTable value)385 public void copyTables(StringDistanceTable value) { 386 if (value != null) { 387 ((StringDistanceTable)distanceTable).copy(value); 388 } 389 } 390 391 @Override getDistanceTable()392 public DistanceTable getDistanceTable() { 393 return distanceTable; 394 } 395 } 396 XLocaleDistance(DistanceTable datadistancetable2, RegionMapper regionMapper)397 public XLocaleDistance(DistanceTable datadistancetable2, RegionMapper regionMapper) { 398 languageDesired2Supported = datadistancetable2; 399 this.regionMapper = regionMapper; 400 401 StringDistanceNode languageNode = (StringDistanceNode) ((StringDistanceTable) languageDesired2Supported).subtables.get(ANY).get(ANY); 402 defaultLanguageDistance = languageNode.distance; 403 StringDistanceNode scriptNode = (StringDistanceNode) ((StringDistanceTable)languageNode.distanceTable).subtables.get(ANY).get(ANY); 404 defaultScriptDistance = scriptNode.distance; 405 DistanceNode regionNode = ((StringDistanceTable)scriptNode.distanceTable).subtables.get(ANY).get(ANY); 406 defaultRegionDistance = regionNode.distance; 407 } 408 409 @SuppressWarnings("rawtypes") newMap()410 private static Map newMap() { // for debugging 411 return new TreeMap(); 412 } 413 414 /** 415 * Internal class 416 */ 417 @Deprecated 418 public static class StringDistanceTable extends DistanceTable { 419 final Map<String, Map<String, DistanceNode>> subtables; 420 StringDistanceTable(Map<String, Map<String, DistanceNode>> tables)421 StringDistanceTable(Map<String, Map<String, DistanceNode>> tables) { 422 subtables = tables; 423 } 424 @SuppressWarnings("unchecked") StringDistanceTable()425 StringDistanceTable() { 426 this(newMap()); 427 } 428 429 @Override isEmpty()430 public boolean isEmpty() { 431 return subtables.isEmpty(); 432 } 433 434 @Override equals(Object obj)435 public boolean equals(Object obj) { 436 return this == obj || 437 (obj != null 438 && obj.getClass() == this.getClass() 439 && subtables.equals(((StringDistanceTable) obj).subtables)); 440 } 441 @Override hashCode()442 public int hashCode() { 443 return subtables.hashCode(); 444 } 445 446 @Override getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals)447 public int getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals) { 448 if (TRACE_DISTANCE) { 449 System.err.printf(" Entering getDistance: desired=%s supported=%s starEquals=%s\n", 450 desired, supported, Boolean.toString(starEquals)); 451 } 452 boolean star = false; 453 Map<String, DistanceNode> sub2 = subtables.get(desired); 454 if (sub2 == null) { 455 sub2 = subtables.get(ANY); // <*, supported> 456 star = true; 457 } 458 DistanceNode value = sub2.get(supported); // <*/desired, supported> 459 if (value == null) { 460 value = sub2.get(ANY); // <*/desired, *> 461 if (value == null && !star) { 462 sub2 = subtables.get(ANY); // <*, supported> 463 value = sub2.get(supported); 464 if (value == null) { 465 value = sub2.get(ANY); // <*, *> 466 } 467 } 468 star = true; 469 } 470 if (distanceTable != null) { 471 distanceTable.value = ((StringDistanceNode) value).distanceTable; 472 } 473 int result = starEquals && star && desired.equals(supported) ? 0 : value.distance; 474 if (TRACE_DISTANCE) { 475 System.err.printf(" Returning from getDistance: %d\n", result); 476 } 477 return result; 478 } 479 copy(StringDistanceTable other)480 public void copy(StringDistanceTable other) { 481 for (Entry<String, Map<String, DistanceNode>> e1 : other.subtables.entrySet()) { 482 for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) { 483 DistanceNode value = e2.getValue(); 484 @SuppressWarnings("unused") 485 DistanceNode subNode = addSubtable(e1.getKey(), e2.getKey(), value.distance); 486 } 487 } 488 } 489 490 @SuppressWarnings("unchecked") addSubtable(String desired, String supported, int distance)491 DistanceNode addSubtable(String desired, String supported, int distance) { 492 Map<String, DistanceNode> sub2 = subtables.get(desired); 493 if (sub2 == null) { 494 subtables.put(desired, sub2 = newMap()); 495 } 496 DistanceNode oldNode = sub2.get(supported); 497 if (oldNode != null) { 498 return oldNode; 499 } 500 501 final StringDistanceNode newNode = new StringDistanceNode(distance); 502 sub2.put(supported, newNode); 503 return newNode; 504 } 505 506 /** 507 * Return null if value doesn't exist 508 */ getNode(String desired, String supported)509 private DistanceNode getNode(String desired, String supported) { 510 Map<String, DistanceNode> sub2 = subtables.get(desired); 511 if (sub2 == null) { 512 return null; 513 } 514 return sub2.get(supported); 515 } 516 517 518 /** add table for each subitem that matches and doesn't have a table already 519 */ addSubtables( String desired, String supported, Predicate<DistanceNode> action)520 public void addSubtables( 521 String desired, String supported, 522 Predicate<DistanceNode> action) { 523 DistanceNode node = getNode(desired, supported); 524 if (node == null) { 525 // get the distance it would have 526 Output<DistanceTable> node2 = new Output<>(); 527 int distance = getDistance(desired, supported, node2, true); 528 // now add it 529 node = addSubtable(desired, supported, distance); 530 if (node2.value != null) { 531 ((StringDistanceNode)node).copyTables((StringDistanceTable)(node2.value)); 532 } 533 } 534 action.test(node); 535 } 536 addSubtables(String desiredLang, String supportedLang, String desiredScript, String supportedScript, int percentage)537 public void addSubtables(String desiredLang, String supportedLang, 538 String desiredScript, String supportedScript, 539 int percentage) { 540 541 // add to all the values that have the matching desiredLang and supportedLang 542 @SuppressWarnings("unused") 543 boolean haveKeys = false; 544 for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) { 545 String key1 = e1.getKey(); 546 final boolean desiredIsKey = desiredLang.equals(key1); 547 if (desiredIsKey || desiredLang.equals(ANY)) { 548 for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) { 549 String key2 = e2.getKey(); 550 final boolean supportedIsKey = supportedLang.equals(key2); 551 haveKeys |= (desiredIsKey && supportedIsKey); 552 if (supportedIsKey || supportedLang.equals(ANY)) { 553 DistanceNode value = e2.getValue(); 554 ((StringDistanceTable)value.getDistanceTable()).addSubtable(desiredScript, supportedScript, percentage); 555 } 556 } 557 } 558 } 559 // now add the sequence explicitly 560 StringDistanceTable dt = new StringDistanceTable(); 561 dt.addSubtable(desiredScript, supportedScript, percentage); 562 CopyIfEmpty r = new CopyIfEmpty(dt); 563 addSubtables(desiredLang, supportedLang, r); 564 } 565 addSubtables(String desiredLang, String supportedLang, String desiredScript, String supportedScript, String desiredRegion, String supportedRegion, int percentage)566 public void addSubtables(String desiredLang, String supportedLang, 567 String desiredScript, String supportedScript, 568 String desiredRegion, String supportedRegion, 569 int percentage) { 570 571 // add to all the values that have the matching desiredLang and supportedLang 572 @SuppressWarnings("unused") 573 boolean haveKeys = false; 574 for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) { 575 String key1 = e1.getKey(); 576 final boolean desiredIsKey = desiredLang.equals(key1); 577 if (desiredIsKey || desiredLang.equals(ANY)) { 578 for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) { 579 String key2 = e2.getKey(); 580 final boolean supportedIsKey = supportedLang.equals(key2); 581 haveKeys |= (desiredIsKey && supportedIsKey); 582 if (supportedIsKey || supportedLang.equals(ANY)) { 583 StringDistanceNode value = (StringDistanceNode) e2.getValue(); 584 ((StringDistanceTable)value.distanceTable).addSubtables(desiredScript, supportedScript, desiredRegion, supportedRegion, percentage); 585 } 586 } 587 } 588 } 589 // now add the sequence explicitly 590 591 StringDistanceTable dt = new StringDistanceTable(); 592 dt.addSubtable(desiredRegion, supportedRegion, percentage); 593 AddSub r = new AddSub(desiredScript, supportedScript, dt); 594 addSubtables(desiredLang, supportedLang, r); 595 } 596 597 @Override toString()598 public String toString() { 599 return toString(false); 600 } 601 602 @Override toString(boolean abbreviate)603 public String toString(boolean abbreviate) { 604 return toString(abbreviate, "", new IdMakerFull<>("interner"), new StringBuilder()).toString(); 605 } 606 toString(boolean abbreviate, String indent, IdMakerFull<Object> intern, StringBuilder buffer)607 public StringBuilder toString(boolean abbreviate, String indent, IdMakerFull<Object> intern, StringBuilder buffer) { 608 String indent2 = indent.isEmpty() ? "" : "\t"; 609 Integer id = abbreviate ? intern.getOldAndAdd(subtables) : null; 610 if (id != null) { 611 buffer.append(indent2).append('#').append(id).append('\n'); 612 } else for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) { 613 final Map<String, DistanceNode> subsubtable = e1.getValue(); 614 buffer.append(indent2).append(e1.getKey()); 615 String indent3 = "\t"; 616 id = abbreviate ? intern.getOldAndAdd(subsubtable) : null; 617 if (id != null) { 618 buffer.append(indent3).append('#').append(id).append('\n'); 619 } else for (Entry<String, DistanceNode> e2 : subsubtable.entrySet()) { 620 DistanceNode value = e2.getValue(); 621 buffer.append(indent3).append(e2.getKey()); 622 id = abbreviate ? intern.getOldAndAdd(value) : null; 623 if (id != null) { 624 buffer.append('\t').append('#').append(id).append('\n'); 625 } else { 626 buffer.append('\t').append(value.distance); 627 final DistanceTable distanceTable = value.getDistanceTable(); 628 if (distanceTable != null) { 629 id = abbreviate ? intern.getOldAndAdd(distanceTable) : null; 630 if (id != null) { 631 buffer.append('\t').append('#').append(id).append('\n'); 632 } else { 633 ((StringDistanceTable)distanceTable).toString(abbreviate, indent+"\t\t\t", intern, buffer); 634 buffer.append('\n'); 635 } 636 } else { 637 buffer.append('\n'); 638 } 639 } 640 indent3 = indent+'\t'; 641 } 642 indent2 = indent; 643 } 644 return buffer; 645 } 646 647 @Override compact()648 public StringDistanceTable compact() { 649 return new CompactAndImmutablizer().compact(this); 650 } 651 652 @Override getCloser(int threshold)653 public Set<String> getCloser(int threshold) { 654 Set<String> result = new HashSet<>(); 655 for (Entry<String, Map<String, DistanceNode>> e1 : subtables.entrySet()) { 656 String desired = e1.getKey(); 657 for (Entry<String, DistanceNode> e2 : e1.getValue().entrySet()) { 658 if (e2.getValue().distance < threshold) { 659 result.add(desired); 660 break; 661 } 662 } 663 } 664 return result; 665 } 666 getInternalDistance(String a, String b)667 public Integer getInternalDistance(String a, String b) { 668 Map<String, DistanceNode> subsub = subtables.get(a); 669 if (subsub == null) { 670 return null; 671 } 672 DistanceNode dnode = subsub.get(b); 673 return dnode == null ? null : dnode.distance; 674 } 675 676 @Override getInternalNode(String a, String b)677 public DistanceNode getInternalNode(String a, String b) { 678 Map<String, DistanceNode> subsub = subtables.get(a); 679 if (subsub == null) { 680 return null; 681 } 682 return subsub.get(b); 683 } 684 685 @Override getInternalMatches()686 public Map<String, Set<String>> getInternalMatches() { 687 Map<String, Set<String>> result = new LinkedHashMap<>(); 688 for (Entry<String, Map<String, DistanceNode>> entry : subtables.entrySet()) { 689 result.put(entry.getKey(), new LinkedHashSet<>(entry.getValue().keySet())); 690 } 691 return result; 692 } 693 } 694 695 static class CopyIfEmpty implements Predicate<DistanceNode> { 696 private final StringDistanceTable toCopy; CopyIfEmpty(StringDistanceTable resetIfNotNull)697 CopyIfEmpty(StringDistanceTable resetIfNotNull) { 698 this.toCopy = resetIfNotNull; 699 } 700 @Override test(DistanceNode node)701 public boolean test(DistanceNode node) { 702 final StringDistanceTable subtables = (StringDistanceTable) node.getDistanceTable(); 703 if (subtables.subtables.isEmpty()) { 704 subtables.copy(toCopy); 705 } 706 return true; 707 } 708 } 709 710 static class AddSub implements Predicate<DistanceNode> { 711 private final String desiredSub; 712 private final String supportedSub; 713 private final CopyIfEmpty r; 714 AddSub(String desiredSub, String supportedSub, StringDistanceTable distanceTableToCopy)715 AddSub(String desiredSub, String supportedSub, StringDistanceTable distanceTableToCopy) { 716 this.r = new CopyIfEmpty(distanceTableToCopy); 717 this.desiredSub = desiredSub; 718 this.supportedSub = supportedSub; 719 } 720 @Override test(DistanceNode node)721 public boolean test(DistanceNode node) { 722 if (node == null) { 723 throw new IllegalArgumentException("bad structure"); 724 } else { 725 ((StringDistanceNode)node).addSubtables(desiredSub, supportedSub, r); 726 } 727 return true; 728 } 729 } 730 distance(ULocale desired, ULocale supported, int threshold, DistanceOption distanceOption)731 public int distance(ULocale desired, ULocale supported, int threshold, DistanceOption distanceOption) { 732 LSR supportedLSR = LSR.fromMaximalized(supported); 733 LSR desiredLSR = LSR.fromMaximalized(desired); 734 return distanceRaw(desiredLSR, supportedLSR, threshold, distanceOption); 735 } 736 737 /** 738 * Returns distance, from 0 to ABOVE_THRESHOLD. 739 * ULocales must be in canonical, addLikelySubtags format. Returns distance 740 */ distanceRaw(LSR desired, LSR supported, int threshold, DistanceOption distanceOption)741 public int distanceRaw(LSR desired, LSR supported, int threshold, DistanceOption distanceOption) { 742 if (TRACE_DISTANCE) { 743 System.err.printf(" Entering distanceRaw: desired=%s supported=%s " 744 + "threshold=%d preferred=%s\n", 745 desired, supported, threshold, 746 distanceOption.name()); 747 } 748 int result = distanceRaw(desired.language, supported.language, 749 desired.script, supported.script, 750 desired.region, supported.region, 751 threshold, distanceOption); 752 if (TRACE_DISTANCE) { 753 System.err.printf(" Returning from distanceRaw: %d\n", result); 754 } 755 return result; 756 } 757 758 public enum DistanceOption {REGION_FIRST, SCRIPT_FIRST} 759 // NOTE: Replaced "NORMAL" with "REGION_FIRST". By default, scripts have greater weight 760 // than regions, so they might be considered the "normal" case. 761 762 /** 763 * Returns distance, from 0 to ABOVE_THRESHOLD. 764 * ULocales must be in canonical, addLikelySubtags format. 765 * (Exception: internal calls may pass any strings. They do this for pseudo-locales.) 766 * Returns distance. 767 */ distanceRaw( String desiredLang, String supportedLang, String desiredScript, String supportedScript, String desiredRegion, String supportedRegion, int threshold, DistanceOption distanceOption)768 public int distanceRaw( 769 String desiredLang, String supportedLang, 770 String desiredScript, String supportedScript, 771 String desiredRegion, String supportedRegion, 772 int threshold, 773 DistanceOption distanceOption) { 774 775 Output<DistanceTable> subtable = new Output<>(); 776 777 int distance = languageDesired2Supported.getDistance(desiredLang, supportedLang, subtable, true); 778 boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST; 779 if (scriptFirst) { 780 distance >>= 2; 781 } 782 if (distance < 0) { 783 distance = 0; 784 } else if (distance >= threshold) { 785 return ABOVE_THRESHOLD; 786 } 787 788 int scriptDistance = subtable.value.getDistance(desiredScript, supportedScript, subtable, true); 789 if (scriptFirst) { 790 scriptDistance >>= 1; 791 } 792 distance += scriptDistance; 793 if (distance >= threshold) { 794 return ABOVE_THRESHOLD; 795 } 796 797 if (desiredRegion.equals(supportedRegion)) { 798 return distance; 799 } 800 801 // From here on we know the regions are not equal 802 803 final String desiredPartition = regionMapper.toId(desiredRegion); 804 final String supportedPartition = regionMapper.toId(supportedRegion); 805 int subdistance; 806 807 // check for macros. If one is found, we take the maximum distance 808 // this could be optimized by adding some more structure, but probably not worth it. 809 810 Collection<String> desiredPartitions = desiredPartition.isEmpty() ? regionMapper.macroToPartitions.get(desiredRegion) : null; 811 Collection<String> supportedPartitions = supportedPartition.isEmpty() ? regionMapper.macroToPartitions.get(supportedRegion) : null; 812 if (desiredPartitions != null || supportedPartitions != null) { 813 subdistance = 0; 814 // make the code simple for now 815 if (desiredPartitions == null) { 816 desiredPartitions = Collections.singleton(desiredPartition); 817 } 818 if (supportedPartitions == null) { 819 supportedPartitions = Collections.singleton(supportedPartition); 820 } 821 822 for (String desiredPartition2 : desiredPartitions) { 823 for (String supportedPartition2 : supportedPartitions) { 824 int tempSubdistance = subtable.value.getDistance(desiredPartition2, supportedPartition2, null, false); 825 if (subdistance < tempSubdistance) { 826 subdistance = tempSubdistance; 827 } 828 } 829 } 830 } else { 831 subdistance = subtable.value.getDistance(desiredPartition, supportedPartition, null, false); 832 } 833 distance += subdistance; 834 return distance >= threshold ? ABOVE_THRESHOLD : distance; 835 } 836 837 838 private static final XLocaleDistance DEFAULT; 839 getDefault()840 public static XLocaleDistance getDefault() { 841 return DEFAULT; 842 } 843 844 static { 845 String[][] variableOverrides = { 846 {"$enUS", "AS+GU+MH+MP+PR+UM+US+VI"}, 847 848 {"$cnsar", "HK+MO"}, 849 850 {"$americas", "019"}, 851 852 {"$maghreb", "MA+DZ+TN+LY+MR+EH"}, 853 }; 854 String[] paradigmRegions = { 855 "en", "en-GB", "es", "es-419", "pt-BR", "pt-PT" 856 }; 857 String[][] regionRuleOverrides = { 858 {"ar_*_$maghreb", "ar_*_$maghreb", "96"}, 859 {"ar_*_$!maghreb", "ar_*_$!maghreb", "96"}, 860 {"ar_*_*", "ar_*_*", "95"}, 861 862 {"en_*_$enUS", "en_*_$enUS", "96"}, 863 {"en_*_$!enUS", "en_*_$!enUS", "96"}, 864 {"en_*_*", "en_*_*", "95"}, 865 866 {"es_*_$americas", "es_*_$americas", "96"}, 867 {"es_*_$!americas", "es_*_$!americas", "96"}, 868 {"es_*_*", "es_*_*", "95"}, 869 870 {"pt_*_$americas", "pt_*_$americas", "96"}, 871 {"pt_*_$!americas", "pt_*_$!americas", "96"}, 872 {"pt_*_*", "pt_*_*", "95"}, 873 874 {"zh_Hant_$cnsar", "zh_Hant_$cnsar", "96"}, 875 {"zh_Hant_$!cnsar", "zh_Hant_$!cnsar", "96"}, 876 {"zh_Hant_*", "zh_Hant_*", "95"}, 877 878 {"*_*_*", "*_*_*", "96"}, 879 }; 880 881 Builder rmb = new RegionMapper.Builder().addParadigms(paradigmRegions); 882 for (String[] variableRule : variableOverrides) { rmb.add(variableRule[0], variableRule[1])883 rmb.add(variableRule[0], variableRule[1]); 884 } 885 if (PRINT_OVERRIDES) { 886 System.out.println("\t\t<languageMatches type=\"written\" alt=\"enhanced\">"); 887 System.out.println("\t\t\t<paradigmLocales locales=\"" + XCldrStub.join(paradigmRegions, " ") 888 + "\"/>"); 889 for (String[] variableRule : variableOverrides) { 890 System.out.println("\t\t\t<matchVariable id=\"" + variableRule[0] 891 + "\" value=\"" 892 + variableRule[1] 893 + "\"/>"); 894 } 895 } 896 897 final StringDistanceTable defaultDistanceTable = new StringDistanceTable(); 898 final RegionMapper defaultRegionMapper = rmb.build(); 899 900 Splitter bar = Splitter.on('_'); 901 902 @SuppressWarnings({"unchecked", "rawtypes"}) 903 List<Row.R4<List<String>, List<String>, Integer, Boolean>>[] sorted = new ArrayList[3]; 904 sorted[0] = new ArrayList<>(); 905 sorted[1] = new ArrayList<>(); 906 sorted[2] = new ArrayList<>(); 907 908 // sort the rules so that the language-only are first, then the language-script, and finally the language-script-region. 909 for (R4<String, String, Integer, Boolean> info : xGetLanguageMatcherData()) { 910 String desiredRaw = info.get0(); 911 String supportedRaw = info.get1(); 912 List<String> desired = bar.splitToList(desiredRaw); 913 List<String> supported = bar.splitToList(supportedRaw); 914 Boolean oneway = info.get3(); 915 int distance = desiredRaw.equals("*_*") ? 50 : info.get2(); 916 int size = desired.size(); 917 918 // for now, skip size == 3 919 if (size == 3) continue; 920 Row.of(desired, supported, distance, oneway)921 sorted[size-1].add(Row.of(desired, supported, distance, oneway)); 922 } 923 924 for (List<Row.R4<List<String>, List<String>, Integer, Boolean>> item1 : sorted) { 925 for (Row.R4<List<String>, List<String>, Integer, Boolean> item2 : item1) { 926 List<String> desired = item2.get0(); 927 List<String> supported = item2.get1(); 928 Integer distance = item2.get2(); 929 Boolean oneway = item2.get3(); add(defaultDistanceTable, desired, supported, distance)930 add(defaultDistanceTable, desired, supported, distance); 931 if (oneway != Boolean.TRUE && !desired.equals(supported)) { add(defaultDistanceTable, supported, desired, distance)932 add(defaultDistanceTable, supported, desired, distance); 933 } printMatchXml(desired, supported, distance, oneway)934 printMatchXml(desired, supported, distance, oneway); 935 } 936 } 937 938 // add new size=3 939 for (String[] rule : regionRuleOverrides) { 940 // if (PRINT_OVERRIDES) System.out.println("\t\t\t<languageMatch desired=\"" 941 // + rule[0] 942 // + "\" supported=\"" 943 // + rule[1] 944 // + "\" distance=\"" 945 // + rule[2] 946 // + "\"/>"); 947 // if (rule[0].equals("en_*_*") || rule[1].equals("*_*_*")) { 948 // int debug = 0; 949 // } 950 List<String> desiredBase = new ArrayList<>(bar.splitToList(rule[0])); 951 List<String> supportedBase = new ArrayList<>(bar.splitToList(rule[1])); 952 Integer distance = 100-Integer.parseInt(rule[2]); printMatchXml(desiredBase, supportedBase, distance, false)953 printMatchXml(desiredBase, supportedBase, distance, false); 954 955 Collection<String> desiredRegions = defaultRegionMapper.getIdsFromVariable(desiredBase.get(2)); 956 if (desiredRegions.isEmpty()) { 957 throw new IllegalArgumentException("Bad region variable: " + desiredBase.get(2)); 958 } 959 Collection<String> supportedRegions = defaultRegionMapper.getIdsFromVariable(supportedBase.get(2)); 960 if (supportedRegions.isEmpty()) { 961 throw new IllegalArgumentException("Bad region variable: " + supportedBase.get(2)); 962 } 963 for (String desiredRegion2 : desiredRegions) { 964 desiredBase.set(2, desiredRegion2.toString()); // fix later 965 for (String supportedRegion2 : supportedRegions) { 966 supportedBase.set(2, supportedRegion2.toString()); // fix later add(defaultDistanceTable, desiredBase, supportedBase, distance)967 add(defaultDistanceTable, desiredBase, supportedBase, distance); add(defaultDistanceTable, supportedBase, desiredBase, distance)968 add(defaultDistanceTable, supportedBase, desiredBase, distance); 969 } 970 } 971 } 972 973 // Pseudo regions should match no other regions. 974 // {"*-*-XA", "*-*-*", "0"}, 975 // {"*-*-XB", "*-*-*", "0"}, 976 // {"*-*-XC", "*-*-*", "0"}, 977 // {"x1-*-*", "*-*-*", "0"}, 978 // {"x2-*-*", "*-*-*", "0"}, 979 // ... 980 // {"x8-*-*", "*-*-*", "0"}, 981 List<String> supported = Arrays.asList("*", "*", "*"); 982 for (String x : Arrays.asList("XA", "XB", "XC")) { 983 List<String> desired = Arrays.asList("*", "*", x); add(defaultDistanceTable, desired, supported, 100)984 add(defaultDistanceTable, desired, supported, 100); add(defaultDistanceTable, supported, desired, 100)985 add(defaultDistanceTable, supported, desired, 100); 986 } 987 // See XLikelySubtags.java for the mapping of pseudo-locales to x1 ... x8. 988 for (int i = 1; i <= 8; ++i) { 989 List<String> desired = Arrays.asList("x" + String.valueOf(i), "*", "*"); add(defaultDistanceTable, desired, supported, 100)990 add(defaultDistanceTable, desired, supported, 100); add(defaultDistanceTable, supported, desired, 100)991 add(defaultDistanceTable, supported, desired, 100); 992 } 993 994 if (PRINT_OVERRIDES) { 995 System.out.println("\t\t</languageMatches>"); 996 } 997 998 DEFAULT = new XLocaleDistance(defaultDistanceTable.compact(), defaultRegionMapper); 999 1000 if (PRINT_OVERRIDES) { 1001 System.out.println(defaultRegionMapper); 1002 System.out.println(defaultDistanceTable); IllegalArgumentException()1003 throw new IllegalArgumentException(); 1004 } 1005 } 1006 printMatchXml(List<String> desired, List<String> supported, Integer distance, Boolean oneway)1007 private static void printMatchXml(List<String> desired, List<String> supported, Integer distance, Boolean oneway) { 1008 if (PRINT_OVERRIDES) { 1009 String desiredStr = CollectionUtilities.join(desired, "_"); 1010 String supportedStr = CollectionUtilities.join(supported, "_"); 1011 String desiredName = fixedName(desired); 1012 String supportedName = fixedName(supported); 1013 System.out.println("\t\t\t<languageMatch" 1014 + " desired=\"" + desiredStr 1015 + "\"\tsupported=\"" + supportedStr 1016 + "\"\tdistance=\"" + distance 1017 + (!oneway ? "" : "\"\toneway=\"true") 1018 + "\"/>\t<!-- " + desiredName + " ⇒ " + supportedName + " -->"); 1019 } 1020 } 1021 fixedName(List<String> match)1022 private static String fixedName(List<String> match) { 1023 List<String> alt = new ArrayList<>(match); 1024 int size = alt.size(); 1025 assert size >= 1 && size <= 3; 1026 1027 StringBuilder result = new StringBuilder(); 1028 1029 if (size >= 3) { 1030 String region = alt.get(2); 1031 if (region.equals("*") || region.startsWith("$")) { 1032 result.append(region); 1033 } else { 1034 result.append(english.regionDisplayName(region)); 1035 } 1036 } 1037 if (size >= 2) { 1038 String script = alt.get(1); 1039 if (script.equals("*")) { 1040 result.insert(0, script); 1041 } else { 1042 result.insert(0, english.scriptDisplayName(script)); 1043 } 1044 } 1045 if (size >= 1) { 1046 String language = alt.get(0); 1047 if (language.equals("*")) { 1048 result.insert(0, language); 1049 } else { 1050 result.insert(0, english.languageDisplayName(language)); 1051 } 1052 } 1053 return CollectionUtilities.join(alt, "; "); 1054 } 1055 add(StringDistanceTable languageDesired2Supported, List<String> desired, List<String> supported, int percentage)1056 static public void add(StringDistanceTable languageDesired2Supported, List<String> desired, List<String> supported, int percentage) { 1057 int size = desired.size(); 1058 if (size != supported.size() || size < 1 || size > 3) { 1059 throw new IllegalArgumentException(); 1060 } 1061 final String desiredLang = fixAny(desired.get(0)); 1062 final String supportedLang = fixAny(supported.get(0)); 1063 if (size == 1) { 1064 languageDesired2Supported.addSubtable(desiredLang, supportedLang, percentage); 1065 } else { 1066 final String desiredScript = fixAny(desired.get(1)); 1067 final String supportedScript = fixAny(supported.get(1)); 1068 if (size == 2) { 1069 languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, percentage); 1070 } else { 1071 final String desiredRegion = fixAny(desired.get(2)); 1072 final String supportedRegion = fixAny(supported.get(2)); 1073 languageDesired2Supported.addSubtables(desiredLang, supportedLang, desiredScript, supportedScript, desiredRegion, supportedRegion, percentage); 1074 } 1075 } 1076 } 1077 1078 @Override toString()1079 public String toString() { 1080 return toString(false); 1081 } 1082 toString(boolean abbreviate)1083 public String toString(boolean abbreviate) { 1084 return regionMapper + "\n" + languageDesired2Supported.toString(abbreviate); 1085 } 1086 1087 1088 // public static XLocaleDistance createDefaultInt() { 1089 // IntDistanceTable d = new IntDistanceTable(DEFAULT_DISTANCE_TABLE); 1090 // return new XLocaleDistance(d, DEFAULT_REGION_MAPPER); 1091 // } 1092 getContainingMacrosFor(Collection<String> input, Set<String> output)1093 static Set<String> getContainingMacrosFor(Collection<String> input, Set<String> output) { 1094 output.clear(); 1095 for (Entry<String, Set<String>> entry : CONTAINER_TO_CONTAINED.asMap().entrySet()) { 1096 if (input.containsAll(entry.getValue())) { // example; if all southern Europe are contained, then add S. Europe 1097 output.add(entry.getKey()); 1098 } 1099 } 1100 return output; 1101 } 1102 1103 static class RegionMapper implements IdMapper<String,String> { 1104 /** 1105 * Used for processing rules. At the start we have a variable setting like $A1=US+CA+MX. We generate a mapping from $A1 to a set of partitions {P1, P2} 1106 * When we hit a rule that contains a variable, we replace that rule by multiple rules for the partitions. 1107 */ 1108 final Multimap<String,String> variableToPartition; 1109 /** 1110 * Used for executing the rules. We map a region to a partition before processing. 1111 */ 1112 final Map<String,String> regionToPartition; 1113 /** 1114 * Used to support es_419 compared to es_AR, etc. 1115 */ 1116 final Multimap<String,String> macroToPartitions; 1117 /** 1118 * Used to get the paradigm region for a cluster, if there is one 1119 */ 1120 final Set<ULocale> paradigms; 1121 RegionMapper( Multimap<String, String> variableToPartitionIn, Map<String, String> regionToPartitionIn, Multimap<String,String> macroToPartitionsIn, Set<ULocale> paradigmsIn)1122 private RegionMapper( 1123 Multimap<String, String> variableToPartitionIn, 1124 Map<String, String> regionToPartitionIn, 1125 Multimap<String,String> macroToPartitionsIn, 1126 Set<ULocale> paradigmsIn) { 1127 variableToPartition = ImmutableMultimap.copyOf(variableToPartitionIn); 1128 regionToPartition = ImmutableMap.copyOf(regionToPartitionIn); 1129 macroToPartitions = ImmutableMultimap.copyOf(macroToPartitionsIn); 1130 paradigms = ImmutableSet.copyOf(paradigmsIn); 1131 } 1132 1133 @Override toId(String region)1134 public String toId(String region) { 1135 String result = regionToPartition.get(region); 1136 return result == null ? "" : result; 1137 } 1138 getIdsFromVariable(String variable)1139 public Collection<String> getIdsFromVariable(String variable) { 1140 if (variable.equals("*")) { 1141 return Collections.singleton("*"); 1142 } 1143 Collection<String> result = variableToPartition.get(variable); 1144 if (result == null || result.isEmpty()) { 1145 throw new IllegalArgumentException("Variable not defined: " + variable); 1146 } 1147 return result; 1148 } 1149 regions()1150 public Set<String> regions() { 1151 return regionToPartition.keySet(); 1152 } 1153 variables()1154 public Set<String> variables() { 1155 return variableToPartition.keySet(); 1156 } 1157 1158 @Override toString()1159 public String toString() { 1160 TreeMultimap<String, String> partitionToVariables = Multimaps.invertFrom(variableToPartition, 1161 TreeMultimap.<String, String>create()); 1162 TreeMultimap<String, String> partitionToRegions = TreeMultimap.create(); 1163 for (Entry<String, String> e : regionToPartition.entrySet()) { 1164 partitionToRegions.put(e.getValue(), e.getKey()); 1165 } 1166 StringBuilder buffer = new StringBuilder(); 1167 buffer.append("Partition ➠ Variables ➠ Regions (final)"); 1168 for (Entry<String, Set<String>> e : partitionToVariables.asMap().entrySet()) { 1169 buffer.append('\n'); 1170 buffer.append(e.getKey() + "\t" + e.getValue() + "\t" + partitionToRegions.get(e.getKey())); 1171 } 1172 buffer.append("\nMacro ➠ Partitions"); 1173 for (Entry<String, Set<String>> e : macroToPartitions.asMap().entrySet()) { 1174 buffer.append('\n'); 1175 buffer.append(e.getKey() + "\t" + e.getValue()); 1176 } 1177 1178 return buffer.toString(); 1179 } 1180 1181 static class Builder { 1182 final private Multimap<String, String> regionToRawPartition = TreeMultimap.create(); 1183 final private RegionSet regionSet = new RegionSet(); 1184 final private Set<ULocale> paradigms = new LinkedHashSet<>(); 1185 add(String variable, String barString)1186 void add(String variable, String barString) { 1187 Set<String> tempRegions = regionSet.parseSet(barString); 1188 1189 for (String region : tempRegions) { 1190 regionToRawPartition.put(region, variable); 1191 } 1192 1193 // now add the inverse variable 1194 1195 Set<String> inverse = regionSet.inverse(); 1196 String inverseVariable = "$!" + variable.substring(1); 1197 for (String region : inverse) { 1198 regionToRawPartition.put(region, inverseVariable); 1199 } 1200 } 1201 addParadigms(String... paradigmRegions)1202 public Builder addParadigms(String... paradigmRegions) { 1203 for (String paradigm : paradigmRegions) { 1204 paradigms.add(new ULocale(paradigm)); 1205 } 1206 return this; 1207 } 1208 build()1209 RegionMapper build() { 1210 final IdMakerFull<Collection<String>> id = new IdMakerFull<>("partition"); 1211 Multimap<String,String> variableToPartitions = TreeMultimap.create(); 1212 Map<String,String> regionToPartition = new TreeMap<>(); 1213 Multimap<String,String> partitionToRegions = TreeMultimap.create(); 1214 1215 for (Entry<String, Set<String>> e : regionToRawPartition.asMap().entrySet()) { 1216 final String region = e.getKey(); 1217 final Collection<String> rawPartition = e.getValue(); 1218 String partition = String.valueOf((char)('α' + id.add(rawPartition))); 1219 1220 regionToPartition.put(region, partition); 1221 partitionToRegions.put(partition, region); 1222 1223 for (String variable : rawPartition) { 1224 variableToPartitions.put(variable, partition); 1225 } 1226 } 1227 1228 // we get a mapping of each macro to the partitions it intersects with 1229 Multimap<String,String> macroToPartitions = TreeMultimap.create(); 1230 for (Entry<String, Set<String>> e : CONTAINER_TO_CONTAINED.asMap().entrySet()) { 1231 String macro = e.getKey(); 1232 for (Entry<String, Set<String>> e2 : partitionToRegions.asMap().entrySet()) { 1233 String partition = e2.getKey(); 1234 if (!Collections.disjoint(e.getValue(), e2.getValue())) { 1235 macroToPartitions.put(macro, partition); 1236 } 1237 } 1238 } 1239 1240 return new RegionMapper( 1241 variableToPartitions, 1242 regionToPartition, 1243 macroToPartitions, 1244 paradigms); 1245 } 1246 } 1247 } 1248 1249 /** 1250 * Parses a string of regions like "US+005-BR" and produces a set of resolved regions. 1251 * All macroregions are fully resolved to sets of non-macro regions. 1252 * <br>Syntax is simple for now: 1253 * <pre>regionSet := region ([-+] region)*</pre> 1254 * No precedence, so "x+y-y+z" is (((x+y)-y)+z) NOT (x+y)-(y+z) 1255 */ 1256 private static class RegionSet { 1257 private enum Operation {add, remove} 1258 // temporaries used in processing 1259 final private Set<String> tempRegions = new TreeSet<>(); 1260 private Operation operation = null; 1261 parseSet(String barString)1262 private Set<String> parseSet(String barString) { 1263 operation = Operation.add; 1264 int last = 0; 1265 tempRegions.clear(); 1266 int i = 0; 1267 for (; i < barString.length(); ++i) { 1268 char c = barString.charAt(i); // UTF16 is ok, since syntax is only ascii 1269 switch(c) { 1270 case '+': 1271 add(barString, last, i); 1272 last = i+1; 1273 operation = Operation.add; 1274 break; 1275 case '-': 1276 add(barString, last, i); 1277 last = i+1; 1278 operation = Operation.remove; 1279 break; 1280 } 1281 } 1282 add(barString, last, i); 1283 return tempRegions; 1284 } 1285 inverse()1286 private Set<String> inverse() { 1287 TreeSet<String> result = new TreeSet<>(ALL_FINAL_REGIONS); 1288 result.removeAll(tempRegions); 1289 return result; 1290 } 1291 add(String barString, int last, int i)1292 private void add(String barString, int last, int i) { 1293 if (i > last) { 1294 String region = barString.substring(last,i); 1295 changeSet(operation, region); 1296 } 1297 } 1298 changeSet(Operation operation, String region)1299 private void changeSet(Operation operation, String region) { 1300 Collection<String> contained = CONTAINER_TO_CONTAINED_FINAL.get(region); 1301 if (contained != null && !contained.isEmpty()) { 1302 if (Operation.add == operation) { 1303 tempRegions.addAll(contained); 1304 } else { 1305 tempRegions.removeAll(contained); 1306 } 1307 } else if (Operation.add == operation) { 1308 tempRegions.add(region); 1309 } else { 1310 tempRegions.remove(region); 1311 } 1312 } 1313 } 1314 invertMap(Map<V,K> map)1315 public static <K,V> Multimap<K,V> invertMap(Map<V,K> map) { 1316 return Multimaps.invertFrom(Multimaps.forMap(map), LinkedHashMultimap.<K,V>create()); 1317 } 1318 getParadigms()1319 public Set<ULocale> getParadigms() { 1320 return regionMapper.paradigms; 1321 } 1322 getDefaultLanguageDistance()1323 public int getDefaultLanguageDistance() { 1324 return defaultLanguageDistance; 1325 } 1326 getDefaultScriptDistance()1327 public int getDefaultScriptDistance() { 1328 return defaultScriptDistance; 1329 } 1330 getDefaultRegionDistance()1331 public int getDefaultRegionDistance() { 1332 return defaultRegionDistance; 1333 } 1334 1335 static class CompactAndImmutablizer extends IdMakerFull<Object> { compact(StringDistanceTable item)1336 StringDistanceTable compact(StringDistanceTable item) { 1337 if (toId(item) != null) { 1338 return (StringDistanceTable) intern(item); 1339 } 1340 return new StringDistanceTable(compact(item.subtables, 0)); 1341 } 1342 @SuppressWarnings({ "unchecked", "rawtypes" }) compact(Map<K,T> item, int level)1343 <K,T> Map<K,T> compact(Map<K,T> item, int level) { 1344 if (toId(item) != null) { 1345 return (Map<K, T>) intern(item); 1346 } 1347 Map<K,T> copy = new LinkedHashMap<>(); 1348 for (Entry<K,T> entry : item.entrySet()) { 1349 T value = entry.getValue(); 1350 if (value instanceof Map) { 1351 copy.put(entry.getKey(), (T)compact((Map)value, level+1)); 1352 } else { 1353 copy.put(entry.getKey(), (T)compact((DistanceNode)value)); 1354 } 1355 } 1356 return ImmutableMap.copyOf(copy); 1357 } compact(DistanceNode item)1358 DistanceNode compact(DistanceNode item) { 1359 if (toId(item) != null) { 1360 return (DistanceNode) intern(item); 1361 } 1362 final DistanceTable distanceTable = item.getDistanceTable(); 1363 if (distanceTable == null || distanceTable.isEmpty()) { 1364 return new DistanceNode(item.distance); 1365 } else { 1366 return new StringDistanceNode(item.distance, compact((StringDistanceTable)((StringDistanceNode)item).distanceTable)); 1367 } 1368 } 1369 } 1370 1371 @Deprecated internalGetDistanceTable()1372 public StringDistanceTable internalGetDistanceTable() { 1373 return (StringDistanceTable) languageDesired2Supported; 1374 } 1375 main(String[] args)1376 public static void main(String[] args) { 1377 // for (Entry<String, Collection<String>> entry : containerToContained.asMap().entrySet()) { 1378 // System.out.println(entry.getKey() + "\t⥢" + entry.getValue() + "; " + containerToFinalContained.get(entry.getKey())); 1379 // } 1380 // final Multimap<String,String> regionToMacros = ImmutableMultimap.copyOf(Multimaps.invertFrom(containerToContained, TreeMultimap.create())); 1381 // for (Entry<String, Collection<String>> entry : regionToMacros.asMap().entrySet()) { 1382 // System.out.println(entry.getKey() + "\t⥤ " + entry.getValue()); 1383 // } 1384 if (PRINT_OVERRIDES) { 1385 System.out.println(getDefault().toString(true)); 1386 } 1387 DistanceTable table = getDefault().languageDesired2Supported; 1388 DistanceTable compactedTable = table.compact(); 1389 if (!table.equals(compactedTable)) { 1390 throw new IllegalArgumentException("Compaction isn't equal"); 1391 } 1392 } 1393 } 1394