1 /* 2 **************************************************************************************** 3 * Copyright (C) 2009-2015, Google, Inc.; International Business Machines Corporation * 4 * and others. All Rights Reserved. * 5 **************************************************************************************** 6 */ 7 package com.ibm.icu.util; 8 9 import java.util.HashMap; 10 import java.util.HashSet; 11 import java.util.Iterator; 12 import java.util.LinkedHashMap; 13 import java.util.LinkedHashSet; 14 import java.util.Map; 15 import java.util.Map.Entry; 16 import java.util.Objects; 17 import java.util.Set; 18 import java.util.regex.Matcher; 19 import java.util.regex.Pattern; 20 21 import com.ibm.icu.impl.ICUData; 22 import com.ibm.icu.impl.ICUResourceBundle; 23 import com.ibm.icu.impl.Relation; 24 import com.ibm.icu.impl.Row; 25 import com.ibm.icu.impl.Row.R3; 26 27 /** 28 * Provides a way to match the languages (locales) supported by a product to the 29 * languages (locales) acceptable to a user, and get the best match. For 30 * example: 31 * 32 * <pre> 33 * LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en"); 34 * 35 * // afterwards: 36 * matcher.getBestMatch("en-US").toLanguageTag() => "en" 37 * </pre> 38 * 39 * It takes into account when languages are close to one another, such as fil 40 * and tl, and when language regional variants are close, like en-GB and en-AU. 41 * It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test 42 * file. 43 * <p>All classes implementing this interface should be immutable. Often a 44 * product will just need one static instance, built with the languages 45 * that it supports. However, it may want multiple instances with different 46 * default languages based on additional information, such as the domain. 47 * 48 * @author markdavis@google.com 49 * @stable ICU 4.4 50 */ 51 public class LocaleMatcher { 52 53 public static final boolean DEBUG = false; 54 55 private static final ULocale UNKNOWN_LOCALE = new ULocale("und"); 56 57 /** 58 * Threshold for falling back to the default (first) language. May make this 59 * a parameter in the future. 60 */ 61 private static final double DEFAULT_THRESHOLD = 0.5; 62 63 /** 64 * The default language, in case the threshold is not met. 65 */ 66 private final ULocale defaultLanguage; 67 68 /** 69 * The default language, in case the threshold is not met. 70 */ 71 private final double threshold; 72 73 /** 74 * Create a new language matcher. The highest-weighted language is the 75 * default. That means that if no other language is matches closer than a given 76 * threshold, that default language is chosen. Typically the default is English, 77 * but it could be different based on additional information, such as the domain 78 * of the page. 79 * 80 * @param languagePriorityList weighted list 81 * @stable ICU 4.4 82 */ LocaleMatcher(LocalePriorityList languagePriorityList)83 public LocaleMatcher(LocalePriorityList languagePriorityList) { 84 this(languagePriorityList, defaultWritten); 85 } 86 87 /** 88 * Create a new language matcher from a String form. The highest-weighted 89 * language is the default. 90 * 91 * @param languagePriorityListString String form of LanguagePriorityList 92 * @stable ICU 4.4 93 */ LocaleMatcher(String languagePriorityListString)94 public LocaleMatcher(String languagePriorityListString) { 95 this(LocalePriorityList.add(languagePriorityListString).build()); 96 } 97 98 /** 99 * Internal testing function; may expose API later. 100 * @param languagePriorityList LocalePriorityList to match 101 * @param matcherData Internal matching data 102 * @internal 103 * @deprecated This API is ICU internal only. 104 */ 105 @Deprecated LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData)106 public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData) { 107 this(languagePriorityList, matcherData, DEFAULT_THRESHOLD); 108 } 109 110 /** 111 * Internal testing function; may expose API later. 112 * @param languagePriorityList LocalePriorityList to match 113 * @param matcherData Internal matching data 114 * @internal 115 * @deprecated This API is ICU internal only. 116 */ 117 @Deprecated LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold)118 public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) { 119 this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze(); 120 for (final ULocale language : languagePriorityList) { 121 add(language, languagePriorityList.getWeight(language)); 122 } 123 processMapping(); 124 Iterator<ULocale> it = languagePriorityList.iterator(); 125 defaultLanguage = it.hasNext() ? it.next() : null; 126 this.threshold = threshold; 127 } 128 129 /** 130 * Returns a fraction between 0 and 1, where 1 means that the languages are a 131 * perfect match, and 0 means that they are completely different. Note that 132 * the precise values may change over time; no code should be made dependent 133 * on the values remaining constant. 134 * @param desired Desired locale 135 * @param desiredMax Maximized locale (using likely subtags) 136 * @param supported Supported locale 137 * @param supportedMax Maximized locale (using likely subtags) 138 * @return value between 0 and 1, inclusive. 139 * @stable ICU 4.4 140 */ match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax)141 public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) { 142 return matcherData.match(desired, desiredMax, supported, supportedMax); 143 } 144 145 /** 146 * Canonicalize a locale (language). Note that for now, it is canonicalizing 147 * according to CLDR conventions (he vs iw, etc), since that is what is needed 148 * for likelySubtags. 149 * @param ulocale language/locale code 150 * @return ULocale with remapped subtags. 151 * @stable ICU 4.4 152 */ canonicalize(ULocale ulocale)153 public ULocale canonicalize(ULocale ulocale) { 154 // TODO Get the data from CLDR, use Java conventions. 155 String lang = ulocale.getLanguage(); 156 String lang2 = canonicalMap.get(lang); 157 String script = ulocale.getScript(); 158 String script2 = canonicalMap.get(script); 159 String region = ulocale.getCountry(); 160 String region2 = canonicalMap.get(region); 161 if (lang2 != null || script2 != null || region2 != null) { 162 return new ULocale( 163 lang2 == null ? lang : lang2, 164 script2 == null ? script : script2, 165 region2 == null ? region : region2); 166 } 167 return ulocale; 168 } 169 170 /** 171 * Get the best match for a LanguagePriorityList 172 * 173 * @param languageList list to match 174 * @return best matching language code 175 * @stable ICU 4.4 176 */ getBestMatch(LocalePriorityList languageList)177 public ULocale getBestMatch(LocalePriorityList languageList) { 178 double bestWeight = 0; 179 ULocale bestTableMatch = null; 180 double penalty = 0; 181 OutputDouble matchWeight = new OutputDouble(); 182 for (final ULocale language : languageList) { 183 final ULocale matchLocale = getBestMatchInternal(language, matchWeight); 184 final double weight = matchWeight.value * languageList.getWeight(language) - penalty; 185 if (weight > bestWeight) { 186 bestWeight = weight; 187 bestTableMatch = matchLocale; 188 } 189 penalty += 0.07000001; 190 } 191 if (bestWeight < threshold) { 192 bestTableMatch = defaultLanguage; 193 } 194 return bestTableMatch; 195 } 196 197 /** 198 * Convenience method: Get the best match for a LanguagePriorityList 199 * 200 * @param languageList String form of language priority list 201 * @return best matching language code 202 * @stable ICU 4.4 203 */ getBestMatch(String languageList)204 public ULocale getBestMatch(String languageList) { 205 return getBestMatch(LocalePriorityList.add(languageList).build()); 206 } 207 208 /** 209 * Get the best match for an individual language code. 210 * 211 * @param ulocale locale/language code to match 212 * @return best matching language code 213 * @stable ICU 4.4 214 */ getBestMatch(ULocale ulocale)215 public ULocale getBestMatch(ULocale ulocale) { 216 return getBestMatchInternal(ulocale, null); 217 } 218 219 /** 220 * @internal 221 * @deprecated This API is ICU internal only. 222 */ 223 @Deprecated getBestMatch(ULocale... ulocales)224 public ULocale getBestMatch(ULocale... ulocales) { 225 return getBestMatch(LocalePriorityList.add(ulocales).build()); 226 } 227 228 /** 229 * {@inheritDoc} 230 * @stable ICU 4.4 231 */ 232 @Override toString()233 public String toString() { 234 return "{" + defaultLanguage + ", " 235 + localeToMaxLocaleAndWeight + "}"; 236 } 237 238 // ================= Privates ===================== 239 240 /** 241 * Get the best match for an individual language code. 242 * 243 * @param languageCode 244 * @return best matching language code and weight (as per 245 * {@link #match(ULocale, ULocale)}) 246 */ getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight)247 private ULocale getBestMatchInternal(ULocale languageCode, OutputDouble outputWeight) { 248 languageCode = canonicalize(languageCode); 249 final ULocale maximized = addLikelySubtags(languageCode); 250 if (DEBUG) { 251 System.out.println("\ngetBestMatchInternal: " + languageCode + ";\t" + maximized); 252 } 253 double bestWeight = 0; 254 ULocale bestTableMatch = null; 255 String baseLanguage = maximized.getLanguage(); 256 Set<R3<ULocale, ULocale, Double>> searchTable = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(baseLanguage); 257 if (searchTable != null) { // we preprocessed the table so as to filter by lanugage 258 if (DEBUG) System.out.println("\tSearching: " + searchTable); 259 for (final R3<ULocale, ULocale, Double> tableKeyValue : searchTable) { 260 ULocale tableKey = tableKeyValue.get0(); 261 ULocale maxLocale = tableKeyValue.get1(); 262 Double matchedWeight = tableKeyValue.get2(); 263 final double match = match(languageCode, maximized, tableKey, maxLocale); 264 if (DEBUG) { 265 System.out.println("\t" + tableKeyValue + ";\t" + match + "\n"); 266 } 267 final double weight = match * matchedWeight; 268 if (weight > bestWeight) { 269 bestWeight = weight; 270 bestTableMatch = tableKey; 271 if (weight > 0.999d) { // bail on good enough match. 272 break; 273 } 274 } 275 } 276 } 277 if (bestWeight < threshold) { 278 bestTableMatch = defaultLanguage; 279 } 280 if (outputWeight != null) { 281 outputWeight.value = bestWeight; // only return the weight when needed 282 } 283 return bestTableMatch; 284 } 285 286 public static class OutputDouble { // TODO, move to where OutputInt is 287 double value; 288 } 289 add(ULocale language, Double weight)290 private void add(ULocale language, Double weight) { 291 language = canonicalize(language); 292 R3<ULocale, ULocale, Double> row = Row.of(language, addLikelySubtags(language), weight); 293 row.freeze(); 294 localeToMaxLocaleAndWeight.add(row); 295 } 296 297 /** 298 * We preprocess the data to get just the possible matches for each desired base language. 299 */ processMapping()300 private void processMapping() { 301 for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) { 302 String desired = desiredToMatchingLanguages.getKey(); 303 Set<String> supported = desiredToMatchingLanguages.getValue(); 304 for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) { 305 final ULocale key = localeToMaxAndWeight.get0(); 306 String lang = key.getLanguage(); 307 if (supported.contains(lang)) { 308 addFiltered(desired, localeToMaxAndWeight); 309 } 310 } 311 } 312 // now put in the values directly, since languages always map to themselves 313 for (R3<ULocale, ULocale, Double> localeToMaxAndWeight : localeToMaxLocaleAndWeight) { 314 final ULocale key = localeToMaxAndWeight.get0(); 315 String lang = key.getLanguage(); 316 addFiltered(lang, localeToMaxAndWeight); 317 } 318 } 319 addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight)320 private void addFiltered(String desired, R3<ULocale, ULocale, Double> localeToMaxAndWeight) { 321 Set<R3<ULocale, ULocale, Double>> map = desiredLanguageToPossibleLocalesToMaxLocaleToData.get(desired); 322 if (map == null) { 323 desiredLanguageToPossibleLocalesToMaxLocaleToData.put(desired, map = new LinkedHashSet<R3<ULocale, ULocale, Double>>()); 324 } 325 map.add(localeToMaxAndWeight); 326 if (DEBUG) { 327 System.out.println(desired + ", " + localeToMaxAndWeight); 328 } 329 } 330 331 Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<Row.R3<ULocale, ULocale, Double>>(); 332 Map<String, Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData = new LinkedHashMap<String, Set<Row.R3<ULocale, ULocale, Double>>>(); 333 334 // =============== Special Mapping Information ============== 335 336 /** 337 * We need to add another method to addLikelySubtags that doesn't return 338 * null, but instead substitutes Zzzz and ZZ if unknown. There are also 339 * a few cases where addLikelySubtags needs to have expanded data, to handle 340 * all deprecated codes. 341 * @param languageCode 342 * @return "fixed" addLikelySubtags 343 */ addLikelySubtags(ULocale languageCode)344 private ULocale addLikelySubtags(ULocale languageCode) { 345 // max("und") = "en_Latn_US", and since matching is based on maximized tags, the undefined 346 // language would normally match English. But that would produce the counterintuitive results 347 // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and 348 // getBestMatch("en", LocaleMatcher("it,und")) would be "und". 349 // 350 // To avoid that, we change the matcher's definitions of max (AddLikelySubtagsWithDefaults) 351 // so that max("und")="und". That produces the following, more desirable results: 352 if (languageCode.equals(UNKNOWN_LOCALE)) { 353 return UNKNOWN_LOCALE; 354 } 355 final ULocale result = ULocale.addLikelySubtags(languageCode); 356 // should have method on getLikelySubtags for this 357 if (result == null || result.equals(languageCode)) { 358 final String language = languageCode.getLanguage(); 359 final String script = languageCode.getScript(); 360 final String region = languageCode.getCountry(); 361 return new ULocale((language.length() == 0 ? "und" 362 : language) 363 + "_" 364 + (script.length() == 0 ? "Zzzz" : script) 365 + "_" 366 + (region.length() == 0 ? "ZZ" : region)); 367 } 368 return result; 369 } 370 371 private static class LocalePatternMatcher { 372 // a value of null means a wildcard; matches any. 373 private String lang; 374 private String script; 375 private String region; 376 private Level level; 377 static Pattern pattern = Pattern.compile( 378 "([a-z]{1,8}|\\*)" 379 + "(?:[_-]([A-Z][a-z]{3}|\\*))?" 380 + "(?:[_-]([A-Z]{2}|[0-9]{3}|\\*))?"); 381 LocalePatternMatcher(String toMatch)382 public LocalePatternMatcher(String toMatch) { 383 Matcher matcher = pattern.matcher(toMatch); 384 if (!matcher.matches()) { 385 throw new IllegalArgumentException("Bad pattern: " + toMatch); 386 } 387 lang = matcher.group(1); 388 script = matcher.group(2); 389 region = matcher.group(3); 390 level = region != null ? Level.region : script != null ? Level.script : Level.language; 391 392 if (lang.equals("*")) { 393 lang = null; 394 } 395 if (script != null && script.equals("*")) { 396 script = null; 397 } 398 if (region != null && region.equals("*")) { 399 region = null; 400 } 401 } 402 matches(ULocale ulocale)403 boolean matches(ULocale ulocale) { 404 if (lang != null && !lang.equals(ulocale.getLanguage())) { 405 return false; 406 } 407 if (script != null && !script.equals(ulocale.getScript())) { 408 return false; 409 } 410 if (region != null && !region.equals(ulocale.getCountry())) { 411 return false; 412 } 413 return true; 414 } 415 getLevel()416 public Level getLevel() { 417 return level; 418 } 419 getLanguage()420 public String getLanguage() { 421 return (lang == null ? "*" : lang); 422 } 423 getScript()424 public String getScript() { 425 return (script == null ? "*" : script); 426 } 427 getRegion()428 public String getRegion() { 429 return (region == null ? "*" : region); 430 } 431 toString()432 public String toString() { 433 String result = getLanguage(); 434 if (level != Level.language) { 435 result += "-" + getScript(); 436 if (level != Level.script) { 437 result += "-" + getRegion(); 438 } 439 } 440 return result; 441 } 442 443 /* (non-Javadoc) 444 * @see java.lang.Object#equals(java.lang.Object) 445 */ 446 @Override equals(Object obj)447 public boolean equals(Object obj) { 448 LocalePatternMatcher other = (LocalePatternMatcher) obj; 449 return Objects.equals(level, other.level) 450 && Objects.equals(lang, other.lang) 451 && Objects.equals(script, other.script) 452 && Objects.equals(region, other.region); 453 } 454 455 /* (non-Javadoc) 456 * @see java.lang.Object#hashCode() 457 */ 458 @Override hashCode()459 public int hashCode() { 460 return level.ordinal() 461 ^ (lang == null ? 0 : lang.hashCode()) 462 ^ (script == null ? 0 : script.hashCode()) 463 ^ (region == null ? 0 : region.hashCode()); 464 } 465 } 466 467 enum Level { 468 language(0.99), 469 script(0.2), 470 region(0.04); 471 472 final double worst; 473 Level(double d)474 Level(double d) { 475 worst = d; 476 } 477 } 478 479 private static class ScoreData implements Freezable<ScoreData> { 480 @SuppressWarnings("unused") 481 private static final double maxUnequal_changeD_sameS = 0.5; 482 483 @SuppressWarnings("unused") 484 private static final double maxUnequal_changeEqual = 0.75; 485 486 LinkedHashSet<Row.R3<LocalePatternMatcher, LocalePatternMatcher, Double>> scores = new LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>(); 487 final Level level; 488 ScoreData(Level level)489 public ScoreData(Level level) { 490 this.level = level; 491 } 492 addDataToScores(String desired, String supported, R3<LocalePatternMatcher, LocalePatternMatcher, Double> data)493 void addDataToScores(String desired, String supported, R3<LocalePatternMatcher, LocalePatternMatcher, Double> data) { 494 // Map<String, Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>>> lang_result = scores.get(desired); 495 // if (lang_result == null) { 496 // scores.put(desired, lang_result = new HashMap()); 497 // } 498 // Set<R3<LocalePatternMatcher,LocalePatternMatcher,Double>> result = lang_result.get(supported); 499 // if (result == null) { 500 // lang_result.put(supported, result = new LinkedHashSet()); 501 // } 502 // result.add(data); 503 boolean added = scores.add(data); 504 if (!added) { 505 throw new ICUException("trying to add duplicate data: " + data); 506 } 507 } 508 getScore(ULocale dMax, String desiredRaw, String desiredMax, ULocale sMax, String supportedRaw, String supportedMax)509 double getScore(ULocale dMax, String desiredRaw, String desiredMax, 510 ULocale sMax, String supportedRaw, String supportedMax) { 511 double distance = 0; 512 if (!desiredMax.equals(supportedMax)) { 513 distance = getRawScore(dMax, sMax); 514 } else if (!desiredRaw.equals(supportedRaw)) { // maxes are equal, changes are equal 515 distance += 0.001; 516 } 517 return distance; 518 } 519 getRawScore(ULocale desiredLocale, ULocale supportedLocale)520 private double getRawScore(ULocale desiredLocale, ULocale supportedLocale) { 521 if (DEBUG) { 522 System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale); 523 } 524 for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> datum : scores) { // : result 525 if (datum.get0().matches(desiredLocale) 526 && datum.get1().matches(supportedLocale)) { 527 if (DEBUG) { 528 System.out.println("\t\t\t\tFOUND\t" + datum); 529 } 530 return datum.get2(); 531 } 532 } 533 if (DEBUG) { 534 System.out.println("\t\t\t\tNOTFOUND\t" + level.worst); 535 } 536 return level.worst; 537 } 538 toString()539 public String toString() { 540 StringBuilder result = new StringBuilder().append(level); 541 for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) { 542 result.append("\n\t\t").append(score); 543 } 544 return result.toString(); 545 } 546 547 @SuppressWarnings("unchecked") cloneAsThawed()548 public ScoreData cloneAsThawed() { 549 try { 550 ScoreData result = (ScoreData) clone(); 551 result.scores = (LinkedHashSet<R3<LocalePatternMatcher, LocalePatternMatcher, Double>>) result.scores.clone(); 552 result.frozen = false; 553 return result; 554 } catch (CloneNotSupportedException e) { 555 throw new ICUCloneNotSupportedException(e); // will never happen 556 } 557 558 } 559 560 private volatile boolean frozen = false; 561 freeze()562 public ScoreData freeze() { 563 return this; 564 } 565 isFrozen()566 public boolean isFrozen() { 567 return frozen; 568 } 569 getMatchingLanguages()570 public Relation<String, String> getMatchingLanguages() { 571 Relation<String, String> desiredToSupported = Relation.of(new LinkedHashMap<String, Set<String>>(), HashSet.class); 572 for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> item : scores) { 573 LocalePatternMatcher desired = item.get0(); 574 LocalePatternMatcher supported = item.get1(); 575 if (desired.lang != null && supported.lang != null) { // explicitly mentioned languages must have reasonable distance 576 desiredToSupported.put(desired.lang, supported.lang); 577 } 578 } 579 desiredToSupported.freeze(); 580 return desiredToSupported; 581 } 582 } 583 584 /** 585 * Only for testing and use by tools. Interface may change!! 586 * @internal 587 * @deprecated This API is ICU internal only. 588 */ 589 @Deprecated 590 public static class LanguageMatcherData implements Freezable<LanguageMatcherData> { 591 private ScoreData languageScores = new ScoreData(Level.language); 592 private ScoreData scriptScores = new ScoreData(Level.script); 593 private ScoreData regionScores = new ScoreData(Level.region); 594 private Relation<String, String> matchingLanguages; 595 private volatile boolean frozen = false; 596 597 /** 598 * @internal 599 * @deprecated This API is ICU internal only. 600 */ 601 @Deprecated LanguageMatcherData()602 public LanguageMatcherData() { 603 } 604 605 /** 606 * @internal 607 * @deprecated This API is ICU internal only. 608 */ 609 @Deprecated matchingLanguages()610 public Relation<String, String> matchingLanguages() { 611 return matchingLanguages; 612 } 613 614 /** 615 * @internal 616 * @deprecated This API is ICU internal only. 617 */ 618 @Deprecated toString()619 public String toString() { 620 return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores; 621 } 622 623 /** 624 * @internal 625 * @deprecated This API is ICU internal only. 626 */ 627 @Deprecated match(ULocale a, ULocale aMax, ULocale b, ULocale bMax)628 public double match(ULocale a, ULocale aMax, ULocale b, ULocale bMax) { 629 double diff = 0; 630 diff += languageScores.getScore(aMax, a.getLanguage(), aMax.getLanguage(), bMax, b.getLanguage(), bMax.getLanguage()); 631 if (diff > 0.999d) { // with no language match, we bail 632 return 0.0d; 633 } 634 diff += scriptScores.getScore(aMax, a.getScript(), aMax.getScript(), bMax, b.getScript(), bMax.getScript()); 635 diff += regionScores.getScore(aMax, a.getCountry(), aMax.getCountry(), bMax, b.getCountry(), bMax.getCountry()); 636 637 if (!a.getVariant().equals(b.getVariant())) { 638 diff += 0.01; 639 } 640 if (diff < 0.0d) { 641 diff = 0.0d; 642 } else if (diff > 1.0d) { 643 diff = 1.0d; 644 } 645 if (DEBUG) { 646 System.out.println("\t\t\tTotal Distance\t" + diff); 647 } 648 return 1.0 - diff; 649 } 650 651 /** 652 * Add an exceptional distance between languages, typically because regional 653 * dialects were given their own language codes. At this point the code is 654 * symmetric. We don't bother producing an equivalence class because there are 655 * so few cases; this function depends on the other permutations being 656 * added specifically. 657 * @internal 658 * @deprecated This API is ICU internal only. 659 */ 660 @SuppressWarnings("unused") 661 @Deprecated addDistance(String desired, String supported, int percent)662 private LanguageMatcherData addDistance(String desired, String supported, int percent) { 663 return addDistance(desired, supported, percent, false, null); 664 } 665 666 /** 667 * @internal 668 * @deprecated This API is ICU internal only. 669 */ 670 @Deprecated addDistance(String desired, String supported, int percent, String comment)671 public LanguageMatcherData addDistance(String desired, String supported, int percent, String comment) { 672 return addDistance(desired, supported, percent, false, comment); 673 } 674 675 /** 676 * @internal 677 * @deprecated This API is ICU internal only. 678 */ 679 @Deprecated addDistance(String desired, String supported, int percent, boolean oneway)680 public LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway) { 681 return addDistance(desired, supported, percent, oneway, null); 682 } 683 addDistance(String desired, String supported, int percent, boolean oneway, String comment)684 private LanguageMatcherData addDistance(String desired, String supported, int percent, boolean oneway, String comment) { 685 if (DEBUG) { 686 System.out.println("\t<languageMatch desired=\"" + desired + "\"" + 687 " supported=\"" + supported + "\"" + 688 " percent=\"" + percent + "\"" 689 + (oneway ? " oneway=\"true\"" : "") 690 + "/>" 691 + (comment == null ? "" : "\t<!-- " + comment + " -->")); 692 // // .addDistance("nn", "nb", 4, true) 693 // System.out.println(".addDistance(\"" + desired + "\"" + 694 // ", \"" + supported + "\"" + 695 // ", " + percent + "" 696 // + (oneway ? "" : ", true") 697 // + (comment == null ? "" : ", \"" + comment + "\"") 698 // + ")" 699 // ); 700 701 } 702 double score = 1 - percent / 100.0; // convert from percentage 703 LocalePatternMatcher desiredMatcher = new LocalePatternMatcher(desired); 704 Level desiredLen = desiredMatcher.getLevel(); 705 LocalePatternMatcher supportedMatcher = new LocalePatternMatcher(supported); 706 Level supportedLen = supportedMatcher.getLevel(); 707 if (desiredLen != supportedLen) { 708 throw new IllegalArgumentException("Lengths unequal: " + desired + ", " + supported); 709 } 710 R3<LocalePatternMatcher, LocalePatternMatcher, Double> data = Row.of(desiredMatcher, supportedMatcher, score); 711 R3<LocalePatternMatcher, LocalePatternMatcher, Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score); 712 boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher); 713 switch (desiredLen) { 714 case language: 715 String dlanguage = desiredMatcher.getLanguage(); 716 String slanguage = supportedMatcher.getLanguage(); 717 languageScores.addDataToScores(dlanguage, slanguage, data); 718 if (!oneway && !desiredEqualsSupported) { 719 languageScores.addDataToScores(slanguage, dlanguage, data2); 720 } 721 break; 722 case script: 723 String dscript = desiredMatcher.getScript(); 724 String sscript = supportedMatcher.getScript(); 725 scriptScores.addDataToScores(dscript, sscript, data); 726 if (!oneway && !desiredEqualsSupported) { 727 scriptScores.addDataToScores(sscript, dscript, data2); 728 } 729 break; 730 case region: 731 String dregion = desiredMatcher.getRegion(); 732 String sregion = supportedMatcher.getRegion(); 733 regionScores.addDataToScores(dregion, sregion, data); 734 if (!oneway && !desiredEqualsSupported) { 735 regionScores.addDataToScores(sregion, dregion, data2); 736 } 737 break; 738 } 739 return this; 740 } 741 742 /** 743 * {@inheritDoc} 744 * @internal 745 * @deprecated This API is ICU internal only. 746 */ 747 @Deprecated cloneAsThawed()748 public LanguageMatcherData cloneAsThawed() { 749 LanguageMatcherData result; 750 try { 751 result = (LanguageMatcherData) clone(); 752 result.languageScores = languageScores.cloneAsThawed(); 753 result.scriptScores = scriptScores.cloneAsThawed(); 754 result.regionScores = regionScores.cloneAsThawed(); 755 result.frozen = false; 756 return result; 757 } catch (CloneNotSupportedException e) { 758 throw new ICUCloneNotSupportedException(e); // will never happen 759 } 760 } 761 762 /** 763 * {@inheritDoc} 764 * @internal 765 * @deprecated This API is ICU internal only. 766 */ 767 @Deprecated freeze()768 public LanguageMatcherData freeze() { 769 languageScores.freeze(); 770 regionScores.freeze(); 771 scriptScores.freeze(); 772 matchingLanguages = languageScores.getMatchingLanguages(); 773 frozen = true; 774 return this; 775 } 776 777 /** 778 * {@inheritDoc} 779 * @internal 780 * @deprecated This API is ICU internal only. 781 */ 782 @Deprecated isFrozen()783 public boolean isFrozen() { 784 return frozen; 785 } 786 } 787 788 LanguageMatcherData matcherData; 789 790 private static final LanguageMatcherData defaultWritten; 791 792 private static HashMap<String, String> canonicalMap = new HashMap<String, String>(); 793 794 static { 795 canonicalMap.put("iw", "he"); 796 canonicalMap.put("mo", "ro"); 797 canonicalMap.put("tl", "fil"); 798 799 ICUResourceBundle suppData = getICUSupplementalData(); 800 ICUResourceBundle languageMatching = suppData.findTopLevel("languageMatching"); 801 ICUResourceBundle written = (ICUResourceBundle) languageMatching.get("written"); 802 defaultWritten = new LanguageMatcherData(); 803 804 for (UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) { 805 ICUResourceBundle item = (ICUResourceBundle) iter.next(); 806 /* 807 "*_*_*", 808 "*_*_*", 809 "96", 810 */ 811 // <languageMatch desired="gsw" supported="de" percent="96" oneway="true" /> 812 boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3)); 813 defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway); 814 } defaultWritten.freeze()815 defaultWritten.freeze(); 816 } 817 818 /** 819 * @internal 820 * @deprecated This API is ICU internal only. 821 */ 822 @Deprecated getICUSupplementalData()823 public static ICUResourceBundle getICUSupplementalData() { 824 ICUResourceBundle suppData = (ICUResourceBundle) UResourceBundle.getBundleInstance( 825 ICUData.ICU_BASE_NAME, 826 "supplementalData", 827 ICUResourceBundle.ICU_DATA_CLASS_LOADER); 828 return suppData; 829 } 830 831 /** 832 * @internal 833 * @deprecated This API is ICU internal only. 834 */ 835 @Deprecated match(ULocale a, ULocale b)836 public static double match(ULocale a, ULocale b) { 837 final LocaleMatcher matcher = new LocaleMatcher(""); 838 return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b)); 839 } 840 } 841