1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 package com.ibm.icu.impl.locale; 4 5 import java.util.Arrays; 6 import java.util.Collection; 7 import java.util.LinkedHashSet; 8 import java.util.Map; 9 import java.util.Map.Entry; 10 import java.util.Set; 11 12 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap; 13 import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet; 14 import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap; 15 import com.ibm.icu.impl.locale.XCldrStub.Multimap; 16 import com.ibm.icu.impl.locale.XLikelySubtags.LSR; 17 import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption; 18 import com.ibm.icu.util.LocalePriorityList; 19 import com.ibm.icu.util.Output; 20 import com.ibm.icu.util.ULocale; 21 22 /** 23 * Immutable class that picks best match between user's desired locales and application's supported locales. 24 * @author markdavis 25 */ 26 public class XLocaleMatcher { 27 private static final LSR UND = new LSR("und","",""); 28 private static final ULocale UND_LOCALE = new ULocale("und"); 29 30 // Activates debugging output to stderr with details of GetBestMatch. 31 private static final boolean TRACE_MATCHER = false; 32 33 // normally the default values, but can be set via constructor 34 35 private final XLocaleDistance localeDistance; 36 private final int thresholdDistance; 37 private final int demotionPerAdditionalDesiredLocale; 38 private final DistanceOption distanceOption; 39 40 // built based on application's supported languages in constructor 41 42 private final Map<LSR, Set<ULocale>> supportedLanguages; // the locales in the collection are ordered! 43 private final Set<ULocale> exactSupportedLocales; // the locales in the collection are ordered! 44 private final ULocale defaultLanguage; 45 46 public static class Builder { 47 private Set<ULocale> supportedLanguagesList; 48 private int thresholdDistance = -1; 49 private int demotionPerAdditionalDesiredLocale = -1;; 50 private ULocale defaultLanguage; 51 private XLocaleDistance localeDistance; 52 private DistanceOption distanceOption; 53 /** 54 * @param languagePriorityList the languagePriorityList to set 55 * @return this Builder object 56 */ setSupportedLocales(String languagePriorityList)57 public Builder setSupportedLocales(String languagePriorityList) { 58 this.supportedLanguagesList = asSet(LocalePriorityList.add(languagePriorityList).build()); 59 return this; 60 } setSupportedLocales(LocalePriorityList languagePriorityList)61 public Builder setSupportedLocales(LocalePriorityList languagePriorityList) { 62 this.supportedLanguagesList = asSet(languagePriorityList); 63 return this; 64 } setSupportedLocales(Set<ULocale> languagePriorityList)65 public Builder setSupportedLocales(Set<ULocale> languagePriorityList) { 66 Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order 67 temp.addAll(languagePriorityList); 68 this.supportedLanguagesList = temp; 69 return this; 70 } 71 72 /** 73 * @param thresholdDistance the thresholdDistance to set, with -1 = default 74 * @return this Builder object 75 */ setThresholdDistance(int thresholdDistance)76 public Builder setThresholdDistance(int thresholdDistance) { 77 this.thresholdDistance = thresholdDistance; 78 return this; 79 } 80 /** 81 * @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default 82 * @return this Builder object 83 */ setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale)84 public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) { 85 this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale; 86 return this; 87 } 88 89 /** 90 * @param localeDistance the localeDistance to set, with default = XLocaleDistance.getDefault(). 91 * @return this Builder object 92 */ setLocaleDistance(XLocaleDistance localeDistance)93 public Builder setLocaleDistance(XLocaleDistance localeDistance) { 94 this.localeDistance = localeDistance; 95 return this; 96 } 97 98 /** 99 * Set the default language, with null = default = first supported language 100 * @param defaultLanguage the default language 101 * @return this Builder object 102 */ setDefaultLanguage(ULocale defaultLanguage)103 public Builder setDefaultLanguage(ULocale defaultLanguage) { 104 this.defaultLanguage = defaultLanguage; 105 return this; 106 } 107 108 /** 109 * If true, then the language differences are smaller than than script differences. 110 * This is used in situations (such as maps) where it is better to fall back to the same script than a similar language. 111 * @param distanceOption the distance option 112 * @return this Builder object 113 */ setDistanceOption(DistanceOption distanceOption)114 public Builder setDistanceOption(DistanceOption distanceOption) { 115 this.distanceOption = distanceOption; 116 return this; 117 } 118 build()119 public XLocaleMatcher build() { 120 return new XLocaleMatcher(this); 121 } 122 123 @Override toString()124 public String toString() { 125 StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder"); 126 if (!supportedLanguagesList.isEmpty()) { 127 s.append(" supported={").append(supportedLanguagesList.toString()).append("}"); 128 } 129 if (defaultLanguage != null) { 130 s.append(" default=").append(defaultLanguage.toString()); 131 } 132 if (thresholdDistance >= 0) { 133 s.append(String.format(" thresholdDistance=%d", thresholdDistance)); 134 } 135 s.append(" preference=").append(distanceOption.name()); 136 return s.append("}").toString(); 137 } 138 } 139 140 /** 141 * Returns a builder used in chaining parameters for building a Locale Matcher. 142 * @return this Builder object 143 */ builder()144 public static Builder builder() { 145 return new Builder(); 146 } 147 148 /** Convenience method */ XLocaleMatcher(String supportedLocales)149 public XLocaleMatcher(String supportedLocales) { 150 this(builder().setSupportedLocales(supportedLocales)); 151 } 152 /** Convenience method */ XLocaleMatcher(LocalePriorityList supportedLocales)153 public XLocaleMatcher(LocalePriorityList supportedLocales) { 154 this(builder().setSupportedLocales(supportedLocales)); 155 } 156 /** Convenience method */ XLocaleMatcher(Set<ULocale> supportedLocales)157 public XLocaleMatcher(Set<ULocale> supportedLocales) { 158 this(builder().setSupportedLocales(supportedLocales)); 159 } 160 161 /** 162 * Create a locale matcher with the given parameters. 163 * @param supportedLocales 164 * @param thresholdDistance 165 * @param demotionPerAdditionalDesiredLocale 166 * @param localeDistance 167 * @param likelySubtags 168 */ XLocaleMatcher(Builder builder)169 private XLocaleMatcher(Builder builder) { 170 localeDistance = builder.localeDistance == null ? XLocaleDistance.getDefault() 171 : builder.localeDistance; 172 thresholdDistance = builder.thresholdDistance < 0 ? localeDistance.getDefaultScriptDistance() 173 : builder.thresholdDistance; 174 // only do AFTER above are set 175 Set<LSR> paradigms = extractLsrSet(localeDistance.getParadigms()); 176 final Multimap<LSR, ULocale> temp2 = extractLsrMap(builder.supportedLanguagesList, paradigms); 177 supportedLanguages = temp2.asMap(); 178 exactSupportedLocales = ImmutableSet.copyOf(temp2.values()); 179 defaultLanguage = builder.defaultLanguage != null ? builder.defaultLanguage 180 : supportedLanguages.isEmpty() ? null 181 : supportedLanguages.entrySet().iterator().next().getValue().iterator().next(); // first language 182 demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ? localeDistance.getDefaultRegionDistance()+1 183 : builder.demotionPerAdditionalDesiredLocale; 184 distanceOption = builder.distanceOption; 185 } 186 187 // Result is not immutable! 188 private Set<LSR> extractLsrSet(Set<ULocale> languagePriorityList) { 189 Set<LSR> result = new LinkedHashSet<LSR>(); 190 for (ULocale item : languagePriorityList) { 191 final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item); 192 result.add(max); 193 } 194 return result; 195 } 196 197 private Multimap<LSR,ULocale> extractLsrMap(Set<ULocale> languagePriorityList, Set<LSR> priorities) { 198 Multimap<LSR, ULocale> builder = LinkedHashMultimap.create(); 199 for (ULocale item : languagePriorityList) { 200 final LSR max = item.equals(UND_LOCALE) ? UND : 201 LSR.fromMaximalized(item); 202 builder.put(max, item); 203 } 204 if (builder.size() > 1 && priorities != null) { 205 // for the supported list, we put any priorities before all others, except for the first. 206 Multimap<LSR, ULocale> builder2 = LinkedHashMultimap.create(); 207 208 // copy the long way so the priorities are in the same order as in the original 209 boolean first = true; 210 for (Entry<LSR, Set<ULocale>> entry : builder.asMap().entrySet()) { 211 final LSR key = entry.getKey(); 212 if (first || priorities.contains(key)) { 213 builder2.putAll(key, entry.getValue()); 214 first = false; 215 } 216 } 217 // now copy the rest 218 builder2.putAll(builder); 219 if (!builder2.equals(builder)) { 220 throw new IllegalArgumentException(); 221 } 222 builder = builder2; 223 } 224 return ImmutableMultimap.copyOf(builder); 225 } 226 227 228 /** Convenience method */ 229 public ULocale getBestMatch(ULocale ulocale) { 230 return getBestMatch(ulocale, null); 231 } 232 /** Convenience method */ 233 public ULocale getBestMatch(String languageList) { 234 return getBestMatch(LocalePriorityList.add(languageList).build(), null); 235 } 236 /** Convenience method */ 237 public ULocale getBestMatch(ULocale... locales) { 238 return getBestMatch(new LinkedHashSet<ULocale>(Arrays.asList(locales)), null); 239 } 240 /** Convenience method */ 241 public ULocale getBestMatch(Set<ULocale> desiredLanguages) { 242 return getBestMatch(desiredLanguages, null); 243 } 244 /** Convenience method */ 245 public ULocale getBestMatch(LocalePriorityList desiredLanguages) { 246 return getBestMatch(desiredLanguages, null); 247 } 248 /** Convenience method */ 249 public ULocale getBestMatch(LocalePriorityList desiredLanguages, Output<ULocale> outputBestDesired) { 250 return getBestMatch(asSet(desiredLanguages), outputBestDesired); 251 } 252 253 // TODO add LocalePriorityList method asSet() for ordered Set view backed by LocalePriorityList 254 private static Set<ULocale> asSet(LocalePriorityList languageList) { 255 Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order 256 for (ULocale locale : languageList) { 257 temp.add(locale); 258 }; 259 return temp; 260 } 261 262 /** 263 * Get the best match between the desired languages and supported languages 264 * @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first. 265 * @param outputBestDesired The one of the desired languages that matched best. 266 * Set to null if the best match was not below the threshold distance. 267 * @return the best match. 268 */ 269 public ULocale getBestMatch(Set<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) { 270 // fast path for singleton 271 if (desiredLanguages.size() == 1) { 272 return getBestMatch(desiredLanguages.iterator().next(), outputBestDesired); 273 } 274 // TODO produce optimized version for single desired ULocale 275 Multimap<LSR, ULocale> desiredLSRs = extractLsrMap(desiredLanguages,null); 276 int bestDistance = Integer.MAX_VALUE; 277 ULocale bestDesiredLocale = null; 278 Collection<ULocale> bestSupportedLocales = null; 279 int delta = 0; 280 mainLoop: 281 for (final Entry<LSR, Set<ULocale>> desiredLsrAndLocales : desiredLSRs.asMap().entrySet()) { 282 LSR desiredLSR = desiredLsrAndLocales.getKey(); 283 for (ULocale desiredLocale : desiredLsrAndLocales.getValue()) { 284 // quick check for exact match 285 if (delta < bestDistance) { 286 if (exactSupportedLocales.contains(desiredLocale)) { 287 if (outputBestDesired != null) { 288 outputBestDesired.value = desiredLocale; 289 } 290 if (TRACE_MATCHER) { 291 System.err.printf( 292 "Returning %s, which is an exact match for a supported language\n", 293 desiredLocale); 294 } 295 return desiredLocale; 296 } 297 // quick check for maximized locale 298 Collection<ULocale> found = supportedLanguages.get(desiredLSR); 299 if (found != null) { 300 // if we find one in the set, return first (lowest). We already know the exact one isn't 301 // there. 302 if (outputBestDesired != null) { 303 outputBestDesired.value = desiredLocale; 304 } 305 ULocale result = found.iterator().next(); 306 if (TRACE_MATCHER) { 307 System.err.printf("Returning %s\n", result.toString()); 308 } 309 return result; 310 } 311 } 312 for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) { 313 int distance = 314 delta 315 + localeDistance.distanceRaw( 316 desiredLSR, 317 supportedLsrAndLocale.getKey(), 318 thresholdDistance, 319 distanceOption); 320 if (distance < bestDistance) { 321 bestDistance = distance; 322 bestDesiredLocale = desiredLocale; 323 bestSupportedLocales = supportedLsrAndLocale.getValue(); 324 if (distance == 0) { 325 break mainLoop; 326 } 327 } 328 } 329 delta += demotionPerAdditionalDesiredLocale; 330 } 331 } 332 if (bestDistance >= thresholdDistance) { 333 if (outputBestDesired != null) { 334 outputBestDesired.value = null; 335 } 336 if (TRACE_MATCHER) { 337 System.err.printf("Returning default %s\n", defaultLanguage.toString()); 338 } 339 return defaultLanguage; 340 } 341 if (outputBestDesired != null) { 342 outputBestDesired.value = bestDesiredLocale; 343 } 344 // pick exact match if there is one 345 if (bestSupportedLocales.contains(bestDesiredLocale)) { 346 if (TRACE_MATCHER) { 347 System.err.printf( 348 "Returning %s which matches a supported language\n", bestDesiredLocale.toString()); 349 } 350 return bestDesiredLocale; 351 } 352 // otherwise return first supported, combining variants and extensions from bestDesired 353 ULocale result = bestSupportedLocales.iterator().next(); 354 if (TRACE_MATCHER) { 355 System.err.printf("Returning first supported language %s\n", result.toString()); 356 } 357 return result; 358 } 359 360 /** 361 * Get the best match between the desired languages and supported languages 362 * @param desiredLocale the supplied user's language. 363 * @param outputBestDesired The one of the desired languages that matched best. 364 * Set to null if the best match was not below the threshold distance. 365 * @return the best match. 366 */ 367 public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) { 368 int bestDistance = Integer.MAX_VALUE; 369 ULocale bestDesiredLocale = null; 370 Collection<ULocale> bestSupportedLocales = null; 371 372 // quick check for exact match, with hack for und 373 final LSR desiredLSR = desiredLocale.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(desiredLocale); 374 375 if (exactSupportedLocales.contains(desiredLocale)) { 376 if (outputBestDesired != null) { 377 outputBestDesired.value = desiredLocale; 378 } 379 if (TRACE_MATCHER) { 380 System.err.printf("Exact match with a supported locale.\n"); 381 } 382 return desiredLocale; 383 } 384 // quick check for maximized locale 385 if (distanceOption == DistanceOption.REGION_FIRST) { 386 Collection<ULocale> found = supportedLanguages.get(desiredLSR); 387 if (found != null) { 388 // if we find one in the set, return first (lowest). We already know the exact one isn't there. 389 if (outputBestDesired != null) { 390 outputBestDesired.value = desiredLocale; 391 } 392 ULocale result = found.iterator().next(); 393 if (TRACE_MATCHER) { 394 System.err.printf("Matches a maximized supported locale: %s\n", result); 395 } 396 return result; 397 } 398 } 399 for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) { 400 int distance = localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(), 401 thresholdDistance, distanceOption); 402 if (distance < bestDistance) { 403 bestDistance = distance; 404 bestDesiredLocale = desiredLocale; 405 bestSupportedLocales = supportedLsrAndLocale.getValue(); 406 if (distance == 0) { 407 break; 408 } 409 } 410 } 411 if (bestDistance >= thresholdDistance) { 412 if (outputBestDesired != null) { 413 outputBestDesired.value = null; 414 } 415 if (TRACE_MATCHER) { 416 System.err.printf( 417 "Returning default %s because everything exceeded the threshold of %d.\n", 418 defaultLanguage, thresholdDistance); 419 } 420 return defaultLanguage; 421 } 422 if (outputBestDesired != null) { 423 outputBestDesired.value = bestDesiredLocale; 424 } 425 // pick exact match if there is one 426 if (bestSupportedLocales.contains(bestDesiredLocale)) { 427 return bestDesiredLocale; 428 } 429 // otherwise return first supported, combining variants and extensions from bestDesired 430 ULocale result = bestSupportedLocales.iterator().next(); 431 if (TRACE_MATCHER) { 432 System.err.printf("First in the list of supported locales: %s\n", result); 433 } 434 return result; 435 } 436 437 /** Combine features of the desired locale into those of the supported, and return result. */ 438 public static ULocale combine(ULocale bestSupported, ULocale bestDesired) { 439 // for examples of extensions, variants, see 440 // http://unicode.org/repos/cldr/tags/latest/common/bcp47/ 441 // http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml 442 443 if (!bestSupported.equals(bestDesired) && bestDesired != null) { 444 // add region, variants, extensions 445 ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported); 446 447 // copy the region from the desired, if there is one 448 String region = bestDesired.getCountry(); 449 if (!region.isEmpty()) { 450 b.setRegion(region); 451 } 452 453 // copy the variants from desired, if there is one 454 // note that this will override any subvariants. Eg "sco-ulster-fonipa" + "…-fonupa" => "sco-fonupa" (nuking ulster) 455 String variants = bestDesired.getVariant(); 456 if (!variants.isEmpty()) { 457 b.setVariant(variants); 458 } 459 460 // copy the extensions from desired, if there are any 461 // note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "…-u-nu-native" => "th-u-nu-native" (nuking calendar) 462 for (char extensionKey : bestDesired.getExtensionKeys()) { 463 b.setExtension(extensionKey, bestDesired.getExtension(extensionKey)); 464 } 465 bestSupported = b.build(); 466 } 467 return bestSupported; 468 } 469 470 /** Returns the distance between the two languages. The values are not necessarily symmetric. 471 * @param desired A locale desired by the user 472 * @param supported A locale supported by a program. 473 * @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance). 474 * A language is first maximized with add likely subtags, then compared. 475 */ 476 public int distance(ULocale desired, ULocale supported) { 477 return localeDistance.distanceRaw( 478 LSR.fromMaximalized(desired), 479 LSR.fromMaximalized(supported), thresholdDistance, distanceOption); 480 } 481 482 /** Convenience method */ 483 public int distance(String desiredLanguage, String supportedLanguage) { 484 return localeDistance.distanceRaw( 485 LSR.fromMaximalized(new ULocale(desiredLanguage)), 486 LSR.fromMaximalized(new ULocale(supportedLanguage)), 487 thresholdDistance, distanceOption); 488 } 489 490 @Override 491 public String toString() { 492 return exactSupportedLocales.toString(); 493 } 494 495 /** Return the inverse of the distance: that is, 1-distance(desired, supported) */ 496 public double match(ULocale desired, ULocale supported) { 497 return (100-distance(desired, supported))/100.0; 498 } 499 500 /** 501 * Returns a fraction between 0 and 1, where 1 means that the languages are a 502 * perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0. 503 * <br>Note that 504 * the precise values may change over time; no code should be made dependent 505 * on the values remaining constant. 506 * @param desired Desired locale 507 * @param desiredMax Maximized locale (using likely subtags) 508 * @param supported Supported locale 509 * @param supportedMax Maximized locale (using likely subtags) 510 * @return value between 0 and 1, inclusive. 511 * @deprecated Use the form with 2 parameters instead. 512 */ 513 @Deprecated 514 public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) { 515 return match(desired, supported); 516 } 517 518 /** 519 * Canonicalize a locale (language). Note that for now, it is canonicalizing 520 * according to CLDR conventions (he vs iw, etc), since that is what is needed 521 * for likelySubtags. 522 * @param ulocale language/locale code 523 * @return ULocale with remapped subtags. 524 * @stable ICU 4.4 525 */ 526 public ULocale canonicalize(ULocale ulocale) { 527 // TODO 528 return null; 529 } 530 531 /** 532 * @return the thresholdDistance. Any distance above this value is treated as a match failure. 533 */ 534 public int getThresholdDistance() { 535 return thresholdDistance; 536 } 537 } 538