1 package org.unicode.cldr.unittest; 2 3 import java.util.HashSet; 4 import java.util.LinkedHashMap; 5 import java.util.Map; 6 import java.util.Set; 7 8 import org.unicode.cldr.util.CLDRConfig; 9 import org.unicode.cldr.util.Pair; 10 11 import com.ibm.icu.dev.test.TestFmwk; 12 import com.ibm.icu.impl.Row.R4; 13 import com.ibm.icu.util.LocaleMatcher; 14 import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData; 15 import com.ibm.icu.util.LocalePriorityList; 16 import com.ibm.icu.util.ULocale; 17 18 public class LanguageInfoTest extends TestFmwk { 19 static CLDRConfig testInfo = CLDRConfig.getInstance(); 20 static LanguageMatcherData data = LocaleMatcherTest.LANGUAGE_MATCHER_DATA; 21 static Map<ULocale, ULocale> FALLBACKS = new LinkedHashMap<>(); 22 23 // @Override 24 // protected void init() throws Exception { 25 // super.init(); 26 // SupplementalDataInfo supp = testInfo.getSupplementalDataInfo(); 27 // List<R4<String, String, Integer, Boolean>> languageData = supp 28 // .getLanguageMatcherData("written"); 29 // for (R4<String, String, Integer, Boolean> item : languageData) { 30 // data.addDistance(item.get0().replace('_', '-'), item.get1() 31 // .replace('_', '-'), item.get2(), item.get3()); 32 // logln(item.get0() + "\t" + getName(item.get0()) + "\t" 33 // + item.get1() + "\t" + getName(item.get1()) + "\t" 34 // + item.get2() + "\t" + item.get3()); 35 // if (item.get2() == 10) { 36 // FALLBACKS.put(new ULocale(item.get0()), 37 // new ULocale(item.get1())); 38 // } 39 // } 40 // data.freeze(); 41 // } 42 testGetData()43 public void testGetData() { 44 Set<Pair<String, String>> alreadySeen = new HashSet<>(); 45 for (R4<String, String, Integer, Boolean> foo : testInfo.getSupplementalDataInfo().getLanguageMatcherData("written")) { 46 // assertTrue("check bounds", foo.get2() >= 0 && foo.get2() <= 100); 47 48 String desired = foo.get0(); 49 String supported = foo.get1(); 50 Integer score = foo.get2(); 51 Boolean oneway = foo.get3(); 52 assertEquals("Same number of fields", count('_', desired), count('_', supported)); 53 54 Pair<String, String> source = Pair.of(desired, supported); 55 if (alreadySeen.contains(source)) { 56 errln("Duplicate entry for " + source); 57 continue; 58 } 59 alreadySeen.add(source); 60 logln(score 61 + "\t" + desired + "\t" + getName(desired) 62 + "\t" + supported + "\t" + getName(supported) 63 + "\t" + oneway); 64 } 65 } 66 count(char c, String string)67 private int count(char c, String string) { 68 int count = 0; 69 int pos = string.indexOf(c); 70 while (pos >= 0) { 71 ++count; 72 pos = string.indexOf(c, pos + 1); 73 } 74 return count; 75 } 76 getName(String item)77 public static String getName(String item) { 78 return item.contains("*") ? "n/a" : testInfo.getEnglish().getName(item); 79 } 80 main(String[] args)81 public static void main(String[] args) { 82 new LanguageInfoTest().run(args); 83 } 84 testBasics()85 public void testBasics() { 86 final LocaleMatcher matcher = new LocaleMatcher(LocalePriorityList 87 .add(ULocale.FRENCH).add(ULocale.UK).add(ULocale.ENGLISH) 88 .build(), data); 89 logln(matcher.toString()); 90 91 assertEquals("UK in FR, UK, EN", ULocale.UK, 92 matcher.getBestMatch(ULocale.UK)); 93 assertEquals("US in FR, UK, EN", ULocale.ENGLISH, 94 matcher.getBestMatch(ULocale.US)); 95 assertEquals("FR in FR, UK, EN", ULocale.FRENCH, 96 matcher.getBestMatch(ULocale.FRANCE)); 97 assertEquals("JA in FR, UK, EN", ULocale.FRENCH, 98 matcher.getBestMatch(ULocale.JAPAN)); 99 } 100 TestChinese()101 public void TestChinese() { 102 // if (logKnownIssue("Cldrbug:8169", 103 // "Problems with language matcher TestChinese.")) { 104 // return; 105 // } 106 LocaleMatcher matcher = new LocaleMatcher(LocalePriorityList.add( 107 "zh_CN, zh_TW, iw").build(), data); 108 ULocale taiwanChinese = new ULocale("zh_TW"); 109 ULocale chinaChinese = new ULocale("zh_CN"); 110 assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, 111 matcher.getBestMatch("zh_Hant_HK")); 112 113 assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, 114 matcher.getBestMatch("zh_Hant_TW")); 115 assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, 116 matcher.getBestMatch("zh_Hant")); 117 assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, 118 matcher.getBestMatch("zh_TW")); 119 assertEquals("zh_CN, zh_TW, iw;", chinaChinese, 120 matcher.getBestMatch("zh_Hans_CN")); 121 assertEquals("zh_CN, zh_TW, iw;", chinaChinese, 122 matcher.getBestMatch("zh_CN")); 123 assertEquals("zh_CN, zh_TW, iw;", chinaChinese, 124 matcher.getBestMatch("zh")); 125 } 126 127 static final ULocale MUL = new ULocale("mul"); 128 testFallbacks()129 public void testFallbacks() { 130 131 for (R4<String, String, Integer, Boolean> foo : testInfo.getSupplementalDataInfo().getLanguageMatcherData("written")) { 132 String rawDesired = foo.get0(); 133 if (rawDesired.contains("*")) { 134 continue; 135 } 136 if (rawDesired.equals("tlh")) { 137 if (logKnownIssue("cldrbug:8919", "Hack until tlh has likely subtags")) { 138 continue; 139 } 140 } 141 ULocale desired = new ULocale(rawDesired); 142 ULocale supported = new ULocale(foo.get1()); 143 Integer score = foo.get2(); 144 Boolean oneway = foo.get3(); 145 if (!oneway) { 146 continue; 147 } 148 149 // we put "mul" first in the list, to verify that the fallback works enough to be better than the default. 150 151 @SuppressWarnings("deprecation") 152 final LocaleMatcher matcher = new LocaleMatcher( 153 LocalePriorityList 154 .add(MUL).add(supported) 155 .build(), 156 data); 157 158 ULocale bestMatch = matcher.getBestMatch(desired); 159 if (!assertEquals("fallback for " + desired + ", " + score, supported, bestMatch)) { 160 ULocale max = ULocale.addLikelySubtags(desired); 161 warnln("Might be missing something like\n" 162 + "<languageMatch desired=\"" 163 + desired.getLanguage() + "_" + max.getScript() 164 + "\" supported=\"en_Latn\" percent=\"90\" oneway=\"true\" />"); 165 bestMatch = matcher.getBestMatch(desired); // for debugging 166 } 167 } 168 } 169 } 170