1 package org.unicode.cldr.unittest;
2 
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.Collection;
6 import java.util.Collections;
7 import java.util.Date;
8 import java.util.EnumMap;
9 import java.util.EnumSet;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.Iterator;
13 import java.util.LinkedHashMap;
14 import java.util.LinkedHashSet;
15 import java.util.List;
16 import java.util.Locale;
17 import java.util.Map;
18 import java.util.Map.Entry;
19 import java.util.Set;
20 import java.util.TreeMap;
21 import java.util.TreeSet;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24 
25 import org.unicode.cldr.draft.ScriptMetadata;
26 import org.unicode.cldr.test.CoverageLevel2;
27 import org.unicode.cldr.tool.LikelySubtags;
28 import org.unicode.cldr.tool.PluralMinimalPairs;
29 import org.unicode.cldr.tool.PluralRulesFactory;
30 import org.unicode.cldr.util.Builder;
31 import org.unicode.cldr.util.CLDRConfig;
32 import org.unicode.cldr.util.CLDRFile;
33 import org.unicode.cldr.util.CLDRFile.WinningChoice;
34 import org.unicode.cldr.util.CLDRLocale;
35 import org.unicode.cldr.util.CldrUtility;
36 import org.unicode.cldr.util.Iso639Data;
37 import org.unicode.cldr.util.Iso639Data.Scope;
38 import org.unicode.cldr.util.IsoCurrencyParser;
39 import org.unicode.cldr.util.LanguageTagCanonicalizer;
40 import org.unicode.cldr.util.LanguageTagParser;
41 import org.unicode.cldr.util.Level;
42 import org.unicode.cldr.util.Organization;
43 import org.unicode.cldr.util.Pair;
44 import org.unicode.cldr.util.PluralRanges;
45 import org.unicode.cldr.util.PreferredAndAllowedHour;
46 import org.unicode.cldr.util.PreferredAndAllowedHour.HourStyle;
47 import org.unicode.cldr.util.StandardCodes;
48 import org.unicode.cldr.util.StandardCodes.CodeType;
49 import org.unicode.cldr.util.StandardCodes.LstrType;
50 import org.unicode.cldr.util.SupplementalDataInfo;
51 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
52 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
53 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle;
54 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
55 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo;
56 import org.unicode.cldr.util.SupplementalDataInfo.DateRange;
57 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange;
58 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
59 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
60 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
61 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
62 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
63 import org.unicode.cldr.util.SupplementalDataInfo.SampleList;
64 import org.unicode.cldr.util.Validity;
65 import org.unicode.cldr.util.Validity.Status;
66 
67 import com.google.common.collect.ImmutableSet;
68 import com.google.common.collect.Multimap;
69 import com.google.common.collect.TreeMultimap;
70 import com.ibm.icu.dev.util.CollectionUtilities;
71 import com.ibm.icu.impl.Relation;
72 import com.ibm.icu.impl.Row;
73 import com.ibm.icu.impl.Row.R2;
74 import com.ibm.icu.impl.Row.R3;
75 import com.ibm.icu.impl.Utility;
76 import com.ibm.icu.lang.UCharacter;
77 import com.ibm.icu.lang.UCharacterEnums;
78 import com.ibm.icu.lang.UScript;
79 import com.ibm.icu.text.PluralRules;
80 import com.ibm.icu.text.PluralRules.FixedDecimal;
81 import com.ibm.icu.text.PluralRules.FixedDecimalRange;
82 import com.ibm.icu.text.PluralRules.FixedDecimalSamples;
83 import com.ibm.icu.text.PluralRules.SampleType;
84 import com.ibm.icu.text.StringTransform;
85 import com.ibm.icu.text.UnicodeSet;
86 import com.ibm.icu.util.Output;
87 import com.ibm.icu.util.TimeZone;
88 import com.ibm.icu.util.ULocale;
89 
90 public class TestSupplementalInfo extends TestFmwkPlus {
91     static CLDRConfig testInfo = CLDRConfig.getInstance();
92 
93     private static final StandardCodes STANDARD_CODES = testInfo
94         .getStandardCodes();
95 
96     private static final SupplementalDataInfo SUPPLEMENTAL = testInfo
97         .getSupplementalDataInfo();
98 
main(String[] args)99     public static void main(String[] args) {
100         new TestSupplementalInfo().run(args);
101     }
102 
TestPluralSampleOrder()103     public void TestPluralSampleOrder() {
104         HashSet<PluralInfo> seen = new HashSet<PluralInfo>();
105         for (String locale : SUPPLEMENTAL.getPluralLocales()) {
106             if (locale.equals("root")) {
107                 continue;
108             }
109             PluralInfo pi = SUPPLEMENTAL.getPlurals(locale);
110             if (seen.contains(pi)) {
111                 continue;
112             }
113             seen.add(pi);
114             for (SampleType s : SampleType.values()) {
115                 for (Count c : pi.getCounts(s)) {
116                     FixedDecimalSamples sSamples = pi.getPluralRules()
117                         .getDecimalSamples(c.toString(), s);
118                     if (sSamples == null) {
119                         errln(locale + " no sample for " + c);
120                         continue;
121                     }
122                     if (s == SampleType.DECIMAL) {
123                         continue; // skip
124                     }
125                     FixedDecimalRange lastSample = null;
126                     for (FixedDecimalRange sample : sSamples.samples) {
127                         if (lastSample != null) {
128                             if (lastSample.start.compareTo(sample.start) > 0) {
129                                 errln(locale + ":" + c + ": out of order with "
130                                     + lastSample + " > " + sample);
131                             } else if (false) {
132                                 logln(locale + ":" + c + ": in order with "
133                                     + lastSample + " < " + sample);
134                             }
135                         }
136                         lastSample = sample;
137                     }
138                 }
139             }
140         }
141     }
142 
TestPluralRanges()143     public void TestPluralRanges() {
144         PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
145         Set<String> localesToTest = new TreeSet<String>(
146             SUPPLEMENTAL.getPluralRangesLocales());
147         for (String locale : StandardCodes.make().getLocaleCoverageLocales(
148             "google")) { // superset
149             if (locale.equals("*") || locale.contains("_")) {
150                 continue;
151             }
152             localesToTest.add(locale);
153         }
154         Set<String> modernLocales = testInfo.getStandardCodes()
155             .getLocaleCoverageLocales(Organization.cldr,
156                 EnumSet.of(Level.MODERN));
157 
158         Output<FixedDecimal> maxSample = new Output<FixedDecimal>();
159         Output<FixedDecimal> minSample = new Output<FixedDecimal>();
160 
161         for (String locale : localesToTest) {
162             final String templateLine = "Template for " + ULocale.getDisplayName(locale, "en") + " (" + locale + ") translators to fix:";
163             PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale);
164             Set<Count> counts = pluralInfo.getCounts();
165 
166             final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(new ULocale(locale).toString());
167 
168             // check that there are no null values
169             PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale);
170             if (pluralRanges == null) {
171                 if (!modernLocales.contains(locale)) {
172                     logln("Missing plural ranges for " + locale);
173                 } else {
174                     errOrLog(CoverageIssue.error, locale + "\tMissing plural ranges", "Cldrbug:7839", "Missing plural data for modern locales");
175                     StringBuilder failureCases = new StringBuilder(templateLine);
176                     for (Count start : counts) {
177                         for (Count end : counts) {
178                             pluralInfo.rangeExists(start, end, minSample, maxSample);
179                             final String rangeLine = getRangeLine(start, end, null, maxSample, minSample, samplePatterns);
180                             failureCases.append("\n" + locale + "\t" + rangeLine);
181                         }
182                     }
183                     errOrLog(CoverageIssue.warn, failureCases.toString());
184                 }
185                 continue;
186             }
187             EnumSet<Count> found = EnumSet.noneOf(Count.class);
188             for (Count count : Count.values()) {
189                 if (pluralRanges.isExplicitlySet(count)
190                     && !counts.contains(count)) {
191                     assertTrue(
192                         locale
193                             + "\t pluralRanges categories must be valid for locale:\t"
194                             + count + " must be in " + counts,
195                         !pluralRanges.isExplicitlySet(count));
196                 }
197                 for (Count end : Count.values()) {
198                     Count result = pluralRanges.getExplicit(count, end);
199                     if (result != null) {
200                         found.add(result);
201                     }
202                 }
203             }
204 
205             // check empty range results
206             if (found.isEmpty()) {
207                 errOrLog(CoverageIssue.error, "Empty range results for " + locale, "Cldrbug:7839", "Missing plural data for modern locales");
208             } else {
209                 if (samplePatterns == null) {
210                     errOrLog(CoverageIssue.error, locale + "\tMissing sample patterns", "Cldrbug:7839", "Missing plural data for modern locales");
211                 } else {
212                     for (Count result : found) {
213                         String samplePattern = samplePatterns.get(
214                             PluralRules.PluralType.CARDINAL, result);
215                         if (samplePattern != null && !samplePattern.contains("{0}")) {
216                             errln("Plural Ranges cannot have results that don't use {0} in samples: "
217                                 + locale
218                                 + ", "
219                                 + result
220                                 + "\t«"
221                                 + samplePattern + "»");
222                         }
223                     }
224                 }
225                 if (isVerbose()) {
226                     logln("Range results for " + locale + ":\t" + found);
227                 }
228             }
229 
230             // check for missing values
231             boolean failure = false;
232             StringBuilder failureCases = new StringBuilder(templateLine);
233             for (Count start : counts) {
234                 for (Count end : counts) {
235                     boolean needsValue = pluralInfo.rangeExists(start, end,
236                         minSample, maxSample);
237                     Count explicitValue = pluralRanges.getExplicit(start, end);
238                     final String rangeLine = getRangeLine(start, end, explicitValue, maxSample, minSample, samplePatterns);
239                     failureCases.append("\n" + locale + "\t" + rangeLine);
240                     if (needsValue && explicitValue == null) {
241                         errOrLog(CoverageIssue.error, locale + "\tNo explicit value for range: "
242                             + rangeLine,
243                             "Cldrbug:7839", "Missing plural data for modern locales");
244                         failure = true;
245                         failureCases.append("\tError — need explicit result");
246                     } else if (!needsValue && explicitValue != null) {
247                         errOrLog(CoverageIssue.error, locale + "\tDoesn't need explicit value, but has one: "
248                             + PluralRanges.showRange(start, end, explicitValue),
249                             "Cldrbug:7839", "Missing plural data for modern locales");
250                         failureCases.append("\tUnnecessary");
251                         failure = true;
252                     } else {
253                         failureCases.append("\tOK");
254                     }
255                 }
256             }
257             if (failure) {
258                 errOrLog(CoverageIssue.warn, failureCases.toString());
259             }
260         }
261     }
262 
getRangeLine(Count start, Count end, Count result, Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample, PluralMinimalPairs samplePatterns)263     private String getRangeLine(Count start, Count end, Count result,
264         Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample,
265         PluralMinimalPairs samplePatterns) {
266         final String range = minSample + "–" + maxSample;
267         String example = range;
268         if (samplePatterns != null) {
269             example = "";
270             if (result != null) {
271                 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, result);
272                 example += "«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»";
273             } else {
274                 for (Count c : new TreeSet<>(Arrays.asList(start, end, Count.other))) {
275                     String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, c);
276                     example += c + ":«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»" + "?\tOR ";
277                 }
278                 example += " …";
279             }
280         }
281         return start + "\t" + end + "\t" + (result == null ? "?" : result.toString()) + "\t" + example;
282     }
283 
getRangeLine(Count count, PluralRules pluralRules, String pattern)284     private String getRangeLine(Count count, PluralRules pluralRules, String pattern) {
285         String sample = "?";
286         FixedDecimalSamples exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.INTEGER);
287         if (exampleList == null) {
288             exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.DECIMAL);
289         }
290         FixedDecimal sampleDecimal = PluralInfo.getNonZeroSampleIfPossible(exampleList);
291         sample = sampleDecimal.toString();
292 
293         String example = pattern == null ? "NO-SAMPLE!" : "«" + pattern.replace("{0}", sample) + "»";
294         return count + "\t" + example;
295     }
296 
TestPluralSamples()297     public void TestPluralSamples() {
298         String[][] test = { { "en", "ordinal", "1", "one" },
299             { "en", "ordinal", "2", "two" },
300             { "en", "ordinal", "3", "few" },
301             { "en", "ordinal", "4", "other" },
302             { "sl", "cardinal", "2", "two" }, };
303         for (String[] row : test) {
304             checkPluralSamples(row);
305         }
306     }
307 
TestPluralSamples2()308     public void TestPluralSamples2() {
309         PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
310         for (String locale : prf.getLocales()) {
311             if (locale.equals("und")) {
312                 continue;
313             }
314             if (locale.equals("pl")) {
315                 int debug = 0;
316             }
317             final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(locale);
318             for (PluralRules.PluralType type : PluralRules.PluralType.values()) {
319                 PluralInfo rules = SUPPLEMENTAL.getPlurals(
320                     SupplementalDataInfo.PluralType.fromStandardType(type),
321                     locale.toString());
322                 if (rules.getCounts().size() == 1) {
323                     continue; // don't require rules for unary cases
324                 }
325                 Multimap<String, Count> sampleToCount = TreeMultimap.create();
326 
327                 for (Count count : rules.getCounts()) {
328                     String sample = samplePatterns.get(type, count);
329                     if (sample == null) {
330                         errOrLog(CoverageIssue.error, locale + "\t" + type + " \tmissing samples for " + count, "cldrbug:7075",
331                             "Missing ordinal minimal pairs");
332                     } else {
333                         sampleToCount.put(sample, count);
334                         PluralRules pRules = rules.getPluralRules();
335                         double unique = pRules.getUniqueKeywordValue(count
336                             .toString());
337                         if (unique == PluralRules.NO_UNIQUE_VALUE
338                             && !sample.contains("{0}")) {
339                             errln("Missing {0} in sample: " + locale + ", " + type + ", " + count + " «" + sample + "»");
340                         }
341                     }
342                 }
343                 for (Entry<String, Collection<Count>> entry : sampleToCount.asMap().entrySet()) {
344                     if (entry.getValue().size() > 1) {
345                         errln("Colliding minimal pair samples: " + locale + ", " + type + ", " + entry.getValue() + " «" + entry.getKey() + "»");
346                     }
347                 }
348             }
349         }
350     }
351 
TestCldrScriptCodes()352     public void TestCldrScriptCodes() {
353         Set<String> codes = SUPPLEMENTAL.getCLDRScriptCodes();
354 
355         Set<String> unicodeScripts = ScriptMetadata.getScripts();
356         assertRelation("getCLDRScriptCodes contains Unicode Scripts", true, codes, CONTAINS_ALL, unicodeScripts);
357 
358         ImmutableSet<String> allSpecials = ImmutableSet.of("Zinh", "Zmth", "Zsye", "Zsym", "Zxxx", "Zyyy", "Zzzz");
359         assertRelation("getCLDRScriptCodes contains allSpecials", true, codes, CONTAINS_ALL, allSpecials);
360 
361         ImmutableSet<String> allCompos = ImmutableSet.of("Hanb", "Hrkt", "Jamo", "Jpan", "Kore");
362         assertRelation("getCLDRScriptCodes contains allCompos", true, codes, CONTAINS_ALL, allCompos);
363 
364         Map<Status, Set<String>> scripts = Validity.getInstance().getStatusToCodes(LstrType.script);
365         for (Entry<Status, Set<String>> e : scripts.entrySet()) {
366             switch (e.getKey()) {
367             case regular:
368             case special:
369             case unknown:
370                 assertRelation("getCLDRScriptCodes contains " + e.getKey(), true, codes, CONTAINS_ALL, e.getValue());
371                 break;
372             default:
373                 break; // do nothin
374             }
375         }
376 
377         ImmutableSet<String> variants = ImmutableSet.of("Aran", "Cyrs", "Geok", "Latf", "Latg", "Syre", "Syrj", "Syrn");
378         assertRelation("getCLDRScriptCodes contains variants", false, codes, CONTAINS_SOME, variants);
379     }
380 
checkPluralSamples(String... row)381     public void checkPluralSamples(String... row) {
382         PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(
383             PluralType.valueOf(row[1]), row[0]);
384         Count count = pluralInfo.getCount(new FixedDecimal(row[2]));
385         assertEquals(CollectionUtilities.join(row, ", "),
386             Count.valueOf(row[3]), count);
387     }
388 
TestPluralLocales()389     public void TestPluralLocales() {
390         // get the unique rules
391         for (PluralType type : PluralType.values()) {
392             Relation<PluralInfo, String> pluralsToLocale = Relation.of(
393                 new HashMap<PluralInfo, Set<String>>(), TreeSet.class);
394             for (String locale : new TreeSet<String>(
395                 SUPPLEMENTAL.getPluralLocales(type))) {
396                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale);
397                 pluralsToLocale.put(pluralInfo, locale);
398             }
399 
400             String[][] equivalents = { { "mo", "ro" }, { "tl", "fil" },
401                 { "he", "iw" }, { "in", "id" }, { "jw", "jv" },
402                 { "ji", "yi" }, { "sh", "sr" }, };
403             for (Entry<PluralInfo, Set<String>> pluralInfoEntry : pluralsToLocale
404                 .keyValuesSet()) {
405                 PluralInfo pluralInfo2 = pluralInfoEntry.getKey();
406                 Set<String> locales = pluralInfoEntry.getValue();
407                 // check that equivalent locales are either both in or both out
408                 for (String[] row : equivalents) {
409                     assertEquals(
410                         type + " must be equivalent: " + Arrays.asList(row),
411                         locales.contains(row[0]), locales.contains(row[1]));
412                 }
413                 // check that no rules contain 'within'
414                 for (Count count : pluralInfo2.getCounts()) {
415                     String rule = pluralInfo2.getRule(count);
416                     if (rule == null) {
417                         continue;
418                     }
419                     assertFalse(
420                         "Rule '" + rule + "' for " + Arrays.asList(locales)
421                             + " doesn't contain 'within'",
422                         rule.contains("within"));
423                 }
424             }
425         }
426     }
427 
TestDigitPluralCases()428     public void TestDigitPluralCases() {
429         String[][] tests = {
430             { "en", "one", "1", "1" },
431             { "en", "one", "2", "" },
432             { "en", "one", "3", "" },
433             { "en", "one", "4", "" },
434             { "en", "other", "1", "0, 2-9, 0.0, 0.1, 0.2, …" },
435             { "en", "other", "2", "10-99, 10.0, 10.1, 10.2, …" },
436             { "en", "other", "3", "100-999, 100.0, 100.1, 100.2, …" },
437             { "en", "other", "4", "1000-9999, 1000.0, 1000.1, 1000.2, …" },
438             { "hr", "one", "1", "1, 0.1, 2.10, 1.1, …" },
439             { "hr", "one", "2",
440                 "21, 31, 41, 51, 61, 71, …, 10.1, 12.10, 11.1, …" },
441             { "hr", "one", "3",
442                 "101, 121, 131, 141, 151, 161, …, 100.1, 102.10, 101.1, …" },
443             { "hr", "one", "4",
444                 "1001, 1021, 1031, 1041, 1051, 1061, …, 1000.1, 1002.10, 1001.1, …" },
445             { "hr", "few", "1", "2-4, 0.2, 0.3, 0.4, …" },
446             { "hr", "few", "2",
447                 "22-24, 32-34, 42-44, …, 10.2, 10.3, 10.4, …" },
448             { "hr", "few", "3",
449                 "102-104, 122-124, 132-134, …, 100.2, 100.3, 100.4, …" },
450             { "hr", "few", "4",
451                 "1002-1004, 1022-1024, 1032-1034, …, 1000.2, 1000.3, 1000.4, …" },
452             { "hr", "other", "1", "0, 5-9, 0.0, 0.5, 0.6, …" },
453             { "hr", "other", "2",
454                 "10-20, 25-30, 35-40, …, 10.0, 10.5, 10.6, …" },
455             { "hr", "other", "3",
456                 "100, 105-120, 125-130, 135-140, …, 100.0, 100.5, 100.6, …" },
457             { "hr", "other", "4",
458                 "1000, 1005-1020, 1025-1030, 1035-1040, …, 1000.0, 1000.5, 1000.6, …" }, };
459         for (String[] row : tests) {
460             PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]);
461             SampleList uset = plurals.getSamples9999(Count.valueOf(row[1]),
462                 Integer.parseInt(row[2]));
463             assertEquals(row[0] + ", " + row[1] + ", " + row[2], row[3],
464                 uset.toString());
465         }
466     }
467 
TestDigitPluralCompleteness()468     public void TestDigitPluralCompleteness() {
469         String[][] exceptionStrings = {
470             // defaults
471             { "*", "zero", "0,00,000,0000" }, { "*", "one", "0" },
472             { "*", "two", "0,00,000,0000" },
473             { "*", "few", "0,00,000,0000" },
474             { "*", "many", "0,00,000,0000" },
475             { "*", "other", "0,00,000,0000" },
476             // others
477             { "mo", "other", "00,000,0000" }, //
478             { "ro", "other", "00,000,0000" }, //
479             { "cs", "few", "0" }, // j in 2..4
480             { "sk", "few", "0" }, // j in 2..4
481             { "da", "one", "0" }, // j is 1 or t is not 0 and n within 0..2
482             { "is", "one", "0,00,000,0000" }, // j is 1 or f is 1
483             { "sv", "one", "0" }, // j is 1
484             { "he", "two", "0" }, // j is 2
485             { "ru", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
486             // is not 11
487             { "uk", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
488             // is not 11
489             { "bs", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
490             // is not 11 or f mod 10 is
491             // 1 and f mod 100 is not 11
492             { "hr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
493             // is not 11 or f mod 10 is
494             // 1 and f mod 100 is not 11
495             { "sh", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
496             // is not 11 or f mod 10 is
497             // 1 and f mod 100 is not 11
498             { "sr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
499             // is not 11 or f mod 10 is
500             // 1 and f mod 100 is not 11
501             { "mk", "one", "0,00,000,0000" }, // j mod 10 is 1 or f mod 10
502             // is 1
503             { "sl", "one", "0,000,0000" }, // j mod 100 is 1
504             { "sl", "two", "0,000,0000" }, // j mod 100 is 2
505             { "he", "many", "00,000,0000" }, // j not in 0..10 and j mod 10
506             // is 0
507             { "tzm", "one", "0,00" }, // n in 0..1 or n in 11..99
508             { "gd", "one", "0,00" }, // n in 1,11
509             { "gd", "two", "0,00" }, // n in 2,12
510             { "shi", "few", "0,00" }, // n in 2..10
511             { "gd", "few", "0,00" }, // n in 3..10,13..19
512             { "ga", "few", "0" }, // n in 3..6
513             { "ga", "many", "0,00" }, // n in 7..10
514             { "ar", "zero", "0" }, // n is 0
515             { "cy", "zero", "0" }, // n is 0
516             { "ksh", "zero", "0" }, // n is 0
517             { "lag", "zero", "0" }, // n is 0
518             { "pt", "one", "0" }, // i = 1 and v = 0 or i = 0 and t = 1
519             { "pt_PT", "one", "0" }, // n = 1 and v = 0
520             { "ar", "two", "0" }, // n is 2
521             { "cy", "two", "0" }, // n is 2
522             { "ga", "two", "0" }, // n is 2
523             { "iu", "two", "0" }, // n is 2
524             { "kw", "two", "0" }, // n is 2
525             { "naq", "two", "0" }, // n is 2
526             { "se", "two", "0" }, // n is 2
527             { "sma", "two", "0" }, // n is 2
528             { "smi", "two", "0" }, // n is 2
529             { "smj", "two", "0" }, // n is 2
530             { "smn", "two", "0" }, // n is 2
531             { "sms", "two", "0" }, // n is 2
532             { "cy", "few", "0" }, // n is 3
533             { "cy", "many", "0" }, // n is 6
534             { "br", "many", "" }, // n is not 0 and n mod 1000000 is 0
535             { "gv", "one", "0,00,000,0000" }, // n mod 10 is 1
536             { "be", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
537             // is not 11
538             { "lv", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
539             // is not 11 or v is 2 and f
540             // mod 10 is 1 and f mod 100
541             // is not 11 or v is not 2
542             // and f mod 10 is 1
543             { "br", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
544             // not in 11,71,91
545             { "lt", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
546             // not in 11..19
547             { "fil", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v =
548             // 0 and i % 10 != 4,6,9 or
549             // v != 0 and f % 10 !=
550             // 4,6,9
551             { "tl", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v =
552             // 0 and i % 10 != 4,6,9 or
553             // v != 0 and f % 10 !=
554             // 4,6,9
555             { "dsb", "one", "0,00,000,0000" }, // v = 0 and i % 100 = 1 or f
556             // % 100 = 1
557         };
558         // parse out the exceptions
559         Map<PluralInfo, Relation<Count, Integer>> exceptions = new HashMap<PluralInfo, Relation<Count, Integer>>();
560         Relation<Count, Integer> fallback = Relation.of(
561             new EnumMap<Count, Set<Integer>>(Count.class), TreeSet.class);
562         for (String[] row : exceptionStrings) {
563             Relation<Count, Integer> countToDigits;
564             if (row[0].equals("*")) {
565                 countToDigits = fallback;
566             } else {
567                 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]);
568                 countToDigits = exceptions.get(plurals);
569                 if (countToDigits == null) {
570                     exceptions.put(
571                         plurals,
572                         countToDigits = Relation.of(
573                             new EnumMap<Count, Set<Integer>>(
574                                 Count.class),
575                             TreeSet.class));
576                 }
577             }
578             Count c = Count.valueOf(row[1]);
579             for (String digit : row[2].split(",")) {
580                 // "99" is special, just to have the result be non-empty
581                 countToDigits.put(c, digit.length());
582             }
583         }
584         Set<PluralInfo> seen = new HashSet<PluralInfo>();
585         Set<String> sorted = new TreeSet<String>(
586             SUPPLEMENTAL.getPluralLocales(PluralType.cardinal));
587         Relation<String, String> ruleToExceptions = Relation.of(
588             new TreeMap<String, Set<String>>(), TreeSet.class);
589 
590         for (String locale : sorted) {
591             PluralInfo plurals = SUPPLEMENTAL.getPlurals(locale);
592             if (seen.contains(plurals)) { // skip identicals
593                 continue;
594             }
595             Relation<Count, Integer> countToDigits = exceptions.get(plurals);
596             if (countToDigits == null) {
597                 countToDigits = fallback;
598             }
599             for (Count c : plurals.getCounts()) {
600                 List<String> compose = new ArrayList<String>();
601                 boolean needLine = false;
602                 Set<Integer> digitSet = countToDigits.get(c);
603                 if (digitSet == null) {
604                     digitSet = fallback.get(c);
605                 }
606                 for (int digits = 1; digits < 5; ++digits) {
607                     boolean expected = digitSet.contains(digits);
608                     boolean hasSamples = plurals.hasSamples(c, digits);
609                     if (hasSamples) {
610                         compose.add(Utility.repeat("0", digits));
611                     }
612                     if (!assertEquals(locale + ", " + digits + ", " + c,
613                         expected, hasSamples)) {
614                         needLine = true;
615                     }
616                 }
617                 if (needLine) {
618                     String countRules = plurals.getPluralRules().getRules(
619                         c.toString());
620                     ruleToExceptions.put(countRules == null ? "" : countRules,
621                         "{\"" + locale + "\", \"" + c + "\", \""
622                             + CollectionUtilities.join(compose, ",")
623                             + "\"},");
624                 }
625             }
626         }
627         if (!ruleToExceptions.isEmpty()) {
628             System.out
629                 .println("To fix the above, review the following, then replace in TestDigitPluralCompleteness");
630             for (Entry<String, String> entry : ruleToExceptions.entrySet()) {
631                 System.out.println(entry.getValue() + "\t// " + entry.getKey());
632             }
633         }
634     }
635 
TestLikelyCode()636     public void TestLikelyCode() {
637         Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags();
638         String[][] tests = { { "it_AQ", "it_Latn_AQ" },
639             { "it_Arab", "it_Arab_IT" }, { "az_Cyrl", "az_Cyrl_AZ" }, };
640         for (String[] pair : tests) {
641             String newMax = LikelySubtags.maximize(pair[0], likely);
642             assertEquals("Likely", pair[1], newMax);
643         }
644 
645     }
646 
TestLikelySubtagCompleteness()647     public void TestLikelySubtagCompleteness() {
648         Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags();
649 
650         for (String language : SUPPLEMENTAL.getCLDRLanguageCodes()) {
651             if (!likely.containsKey(language)) {
652                 logln("WARNING: No likely subtag for CLDR language code ("
653                     + language + ")");
654             }
655         }
656         for (String script : SUPPLEMENTAL.getCLDRScriptCodes()) {
657             if (!likely.containsKey("und_" + script)
658                 && !script.equals("Latn")
659                 && !script.equals("Zinh")
660                 && !script.equals("Zyyy")
661                 && ScriptMetadata.getInfo(script) != null
662                 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.EXCLUSION
663                 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.UNKNOWN) {
664                 errln("No likely subtag for CLDR script code (und_" + script
665                     + ")");
666             }
667         }
668 
669     }
670 
TestEquivalentLocales()671     public void TestEquivalentLocales() {
672         Set<Set<String>> seen = new HashSet<Set<String>>();
673         Set<String> toTest = new TreeSet<String>(testInfo.getCldrFactory()
674             .getAvailable());
675         toTest.addAll(SUPPLEMENTAL.getLikelySubtags().keySet());
676         toTest.addAll(SUPPLEMENTAL.getLikelySubtags().values());
677         toTest.addAll(SUPPLEMENTAL.getDefaultContentLocales());
678         LanguageTagParser ltp = new LanguageTagParser();
679         main: for (String locale : toTest) {
680             if (locale.startsWith("und") || locale.equals("root")) {
681                 continue;
682             }
683             Set<String> s = SUPPLEMENTAL.getEquivalentsForLocale(locale);
684             if (seen.contains(s)) {
685                 continue;
686             }
687             // System.out.println(s + " => " + VettingViewer.gatherCodes(s));
688 
689             List<String> ss = new ArrayList<String>(s);
690             String last = ss.get(ss.size() - 1);
691             ltp.set(last);
692             if (!ltp.getVariants().isEmpty() || !ltp.getExtensions().isEmpty()) {
693                 continue; // skip variants for now.
694             }
695             String language = ltp.getLanguage();
696             String script = ltp.getScript();
697             String region = ltp.getRegion();
698             if (!script.isEmpty() && !region.isEmpty()) {
699                 String noScript = ltp.setScript("").toString();
700                 String noRegion = ltp.setScript(script).setRegion("")
701                     .toString();
702                 switch (s.size()) {
703                 case 1: // ok if already maximized and strange script/country,
704                     // eg it_Arab_JA
705                     continue main;
706                 case 2: // ok if adds default country/script, eg {en_Cyrl,
707                     // en_Cyrl_US} or {en_GB, en_Latn_GB}
708                     String first = ss.get(0);
709                     if (first.equals(noScript) || first.equals(noRegion)) {
710                         continue main;
711                     }
712                     break;
713                 case 3: // ok if different script in different country, eg
714                     // {az_IR, az_Arab, az_Arab_IR}
715                     if (noScript.equals(ss.get(0))
716                         && noRegion.equals(ss.get(1))) {
717                         continue main;
718                     }
719                     break;
720                 case 4: // ok if all combinations, eg {en, en_US, en_Latn,
721                     // en_Latn_US}
722                     if (language.equals(ss.get(0))
723                         && noScript.equals(ss.get(1))
724                         && noRegion.equals(ss.get(2))) {
725                         continue main;
726                     }
727                     break;
728                 }
729             }
730             errln("Strange size or composition:\t" + s + " \t"
731                 + showLocaleParts(s));
732             seen.add(s);
733         }
734     }
735 
showLocaleParts(Set<String> s)736     private String showLocaleParts(Set<String> s) {
737         LanguageTagParser ltp = new LanguageTagParser();
738         Set<String> b = new LinkedHashSet<String>();
739         for (String ss : s) {
740             ltp.set(ss);
741             addName(CLDRFile.LANGUAGE_NAME, ltp.getLanguage(), b);
742             addName(CLDRFile.SCRIPT_NAME, ltp.getScript(), b);
743             addName(CLDRFile.TERRITORY_NAME, ltp.getRegion(), b);
744         }
745         return CollectionUtilities.join(b, "; ");
746     }
747 
addName(int languageName, String code, Set<String> b)748     private void addName(int languageName, String code, Set<String> b) {
749         if (code.isEmpty()) {
750             return;
751         }
752         String name = testInfo.getEnglish().getName(languageName, code);
753         if (!code.equals(name)) {
754             b.add(code + "=" + name);
755         }
756     }
757 
TestDefaultScriptCompleteness()758     public void TestDefaultScriptCompleteness() {
759         Relation<String, String> scriptToBase = Relation.of(
760             new LinkedHashMap<String, Set<String>>(), TreeSet.class);
761         main: for (String locale : testInfo.getCldrFactory()
762             .getAvailableLanguages()) {
763             if (!locale.contains("_") && !"root".equals(locale)) {
764                 String defaultScript = SUPPLEMENTAL.getDefaultScript(locale);
765                 if (defaultScript != null) {
766                     continue;
767                 }
768                 CLDRFile cldrFile = testInfo.getCLDRFile(locale,
769                     false);
770                 UnicodeSet set = cldrFile.getExemplarSet("",
771                     WinningChoice.NORMAL);
772                 for (String s : set) {
773                     int script = UScript.getScript(s.codePointAt(0));
774                     if (script != UScript.UNKNOWN && script != UScript.COMMON
775                         && script != UScript.INHERITED) {
776                         scriptToBase.put(UScript.getShortName(script), locale);
777                         continue main;
778                     }
779                 }
780                 scriptToBase.put(UScript.getShortName(UScript.UNKNOWN), locale);
781             }
782         }
783         if (scriptToBase.size() != 0) {
784             for (Entry<String, Set<String>> entry : scriptToBase.keyValuesSet()) {
785                 errln("Default Scripts missing:\t" + entry.getKey() + "\t"
786                     + entry.getValue());
787             }
788         }
789     }
790 
TestTimeData()791     public void TestTimeData() {
792         Map<String, PreferredAndAllowedHour> timeData = SUPPLEMENTAL
793             .getTimeData();
794         Set<String> regionsSoFar = new HashSet<String>();
795         Set<String> current24only = new HashSet<String>();
796         Set<String> current12preferred = new HashSet<String>();
797 
798         boolean haveWorld = false;
799 
800         ImmutableSet<HourStyle> oldSchool = ImmutableSet.copyOf(EnumSet.of(HourStyle.H, HourStyle.h, HourStyle.K, HourStyle.k));
801 
802         for (Entry<String, PreferredAndAllowedHour> e : timeData.entrySet()) {
803             String region = e.getKey();
804             if (region.equals("001")) {
805                 haveWorld = true;
806             }
807             regionsSoFar.add(region);
808             PreferredAndAllowedHour preferredAndAllowedHour = e.getValue();
809             assertNotNull("Preferred must not be null", preferredAndAllowedHour.preferred);
810 
811             // find first h or H
812             HourStyle found = null;
813 
814             for (HourStyle item : preferredAndAllowedHour.allowed) {
815                 if (oldSchool.contains(item)) {
816                     found = item;
817                     if (item != preferredAndAllowedHour.preferred) {
818                         String message = "Inconsistent values for " + region + ": preferred=" + preferredAndAllowedHour.preferred
819                             + " but that isn't the first " + oldSchool + " in allowed: " + preferredAndAllowedHour.allowed;
820                         if (!logKnownIssue("cldrbug:11448", message)) {
821                             errln(message);
822                         }
823                     }
824                     break;
825                 }
826             }
827             if (found == null) {
828                 errln(region + ": preferred " + preferredAndAllowedHour.preferred
829                     + " not in " + preferredAndAllowedHour.allowed);
830             }
831 //            final HourStyle firstAllowed = preferredAndAllowedHour.allowed.iterator().next();
832 //            if (preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.h
833 //                || preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.hb
834 //                || preferredAndAllowedHour.preferred == HourStyle.h && firstAllowed == HourStyle.H) {
835 //                errln(region + ": allowed " + preferredAndAllowedHour.allowed
836 //                    + " starts with preferred " + preferredAndAllowedHour.preferred);
837 //            } else if (isVerbose()) {
838 //                logln(region + ": allowed " + preferredAndAllowedHour.allowed
839 //                    + " starts with preferred " + preferredAndAllowedHour.preferred);
840 //            }
841             // for (HourStyle c : preferredAndAllowedHour.allowed) {
842             // if (!PreferredAndAllowedHour.HOURS.contains(c)) {
843             // errln(region + ": illegal character in " +
844             // preferredAndAllowedHour.allowed + ". It contains " + c
845             // + " which is not in " + PreferredAndAllowedHour.HOURS);
846             // }
847             // }
848             if (!preferredAndAllowedHour.allowed.contains(HourStyle.h)
849                 && !preferredAndAllowedHour.allowed.contains(HourStyle.hb)) {
850                 current24only.add(region);
851             }
852             if (preferredAndAllowedHour.preferred == HourStyle.h) {
853                 current12preferred.add(region);
854             }
855         }
856         Set<String> missing = new TreeSet<String>(
857             STANDARD_CODES.getGoodAvailableCodes(CodeType.territory));
858         missing.removeAll(regionsSoFar);
859         for (Iterator<String> it = missing.iterator(); it.hasNext();) {
860             if (!StandardCodes.isCountry(it.next())) {
861                 it.remove();
862             }
863         }
864 
865         // if we don't have 001, then we can't miss any regions
866         if (!missing.isEmpty()) {
867             if (haveWorld) {
868                 logln("Implicit regions: " + missing);
869             } else {
870                 errln("Missing regions: " + missing);
871             }
872         }
873 
874         // The feedback gathered from our translators is that the following use
875         // 24 hour time ONLY:
876         Set<String> only24lang = new TreeSet<String>(
877             Arrays.asList(("sq, br, bu, ca, hr, cs, da, de, nl, et, eu, fi, "
878                 + "fr, gl, he, is, id, it, nb, pt, ro, ru, sr, sk, sl, sv, tr, hy")
879                     .split(",\\s*")));
880         // With the new preferences, this is changed
881         Set<String> only24region = new TreeSet<String>();
882         Set<String> either24or12region = new TreeSet<String>();
883 
884         // get all countries where official or de-facto official
885         // add them two one of two lists, based on the above list of languages
886         for (String language : SUPPLEMENTAL
887             .getLanguagesForTerritoriesPopulationData()) {
888             boolean a24lang = only24lang.contains(language);
889             for (String region : SUPPLEMENTAL
890                 .getTerritoriesForPopulationData(language)) {
891                 PopulationData pop = SUPPLEMENTAL
892                     .getLanguageAndTerritoryPopulationData(language, region);
893                 if (pop.getOfficialStatus().compareTo(
894                     OfficialStatus.de_facto_official) < 0) {
895                     continue;
896                 }
897                 if (a24lang) {
898                     only24region.add(region);
899                 } else {
900                     either24or12region.add(region);
901                 }
902             }
903         }
904         // if we have a case like CA, where en uses 12/24 but fr uses 24, remove
905         // it for safety
906         only24region.removeAll(either24or12region);
907         // There are always exceptions... Remove VA (Vatican), since it allows 12/24
908         // but the de facto langauge is Italian.
909         only24region.remove("VA");
910         // also remove all the regions where 'h' is preferred
911         only24region.removeAll(current12preferred);
912         // now verify
913         if (!current24only.containsAll(only24region)) {
914             Set<String> missing24only = new TreeSet<String>(only24region);
915             missing24only.removeAll(current24only);
916 
917             errln("24-hour-only doesn't include needed items:\n"
918                 + " add "
919                 + CldrUtility.join(missing24only, " ")
920                 + "\n\t\t"
921                 + CldrUtility.join(missing24only, "\n\t\t",
922                     new NameCodeTransform(testInfo.getEnglish(),
923                         CLDRFile.TERRITORY_NAME)));
924         }
925     }
926 
927     public static class NameCodeTransform implements StringTransform {
928         private final CLDRFile file;
929         private final int codeType;
930 
NameCodeTransform(CLDRFile file, int code)931         public NameCodeTransform(CLDRFile file, int code) {
932             this.file = file;
933             this.codeType = code;
934         }
935 
936         @Override
transform(String code)937         public String transform(String code) {
938             return file.getName(codeType, code) + " [" + code + "]";
939         }
940     }
941 
TestAliases()942     public void TestAliases() {
943         testInfo.getStandardCodes();
944         Map<String, Map<String, Map<String, String>>> bcp47Data = StandardCodes
945             .getLStreg();
946         Map<String, Map<String, R2<List<String>, String>>> aliases = SUPPLEMENTAL
947             .getLocaleAliasInfo();
948 
949         for (Entry<String, Map<String, R2<List<String>, String>>> typeMap : aliases
950             .entrySet()) {
951             String type = typeMap.getKey();
952             Map<String, R2<List<String>, String>> codeReplacement = typeMap
953                 .getValue();
954 
955             Map<String, Map<String, String>> bcp47DataTypeData = bcp47Data
956                 .get(type.equals("territory") ? "region" : type);
957             if (bcp47DataTypeData == null) {
958                 logln("skipping BCP47 test for " + type);
959             } else {
960                 for (Entry<String, Map<String, String>> codeData : bcp47DataTypeData
961                     .entrySet()) {
962                     String code = codeData.getKey();
963                     if (codeReplacement.containsKey(code)
964                         || codeReplacement.containsKey(code
965                             .toUpperCase(Locale.ENGLISH))) {
966                         continue;
967                         // TODO, check the value
968                     }
969                     Map<String, String> data = codeData.getValue();
970                     if (data.containsKey("Deprecated")
971                         && SUPPLEMENTAL.getCLDRLanguageCodes().contains(
972                             code)) {
973                         errln("supplementalMetadata.xml: alias is missing <languageAlias type=\""
974                             + code + "\" ... /> " + "\t" + data);
975                     }
976                 }
977             }
978 
979             Set<R3<String, List<String>, List<String>>> failures = new TreeSet<R3<String, List<String>, List<String>>>();
980             Set<String> nullReplacements = new TreeSet<String>();
981             for (Entry<String, R2<List<String>, String>> codeRep : codeReplacement
982                 .entrySet()) {
983                 String code = codeRep.getKey();
984                 List<String> replacements = codeRep.getValue().get0();
985                 if (replacements == null) {
986                     nullReplacements.add(code);
987                     continue;
988                 }
989                 Set<String> fixedReplacements = new LinkedHashSet<String>();
990                 for (String replacement : replacements) {
991                     R2<List<String>, String> newReplacement = codeReplacement
992                         .get(replacement);
993                     if (newReplacement != null) {
994                         List<String> list = newReplacement.get0();
995                         if (list != null) {
996                             fixedReplacements.addAll(list);
997                         }
998                     } else {
999                         fixedReplacements.add(replacement);
1000                     }
1001                 }
1002                 List<String> fixedList = new ArrayList<String>(
1003                     fixedReplacements);
1004                 if (!replacements.equals(fixedList)) {
1005                     R3<String, List<String>, List<String>> row = Row.of(code,
1006                         replacements, fixedList);
1007                     System.out.println(row.toString());
1008                     failures.add(row);
1009                 }
1010             }
1011 
1012             if (failures.size() != 0) {
1013                 for (R3<String, List<String>, List<String>> item : failures) {
1014                     String code = item.get0();
1015                     List<String> oldReplacement = item.get1();
1016                     List<String> newReplacement = item.get2();
1017 
1018                     errln(code + "\t=>\t" + oldReplacement + "\tshould be:\n\t"
1019                         + "<" + type + "Alias type=\"" + code
1020                         + "\" replacement=\""
1021                         + CollectionUtilities.join(newReplacement, " ")
1022                         + "\" reason=\"XXX\"/> <!-- YYY -->\n");
1023                 }
1024             }
1025             if (nullReplacements.size() != 0) {
1026                 logln("No Replacements\t" + type + "\t" + nullReplacements);
1027             }
1028         }
1029     }
1030 
1031     static final List<String> oldRegions = Arrays
1032         .asList("NT, YD, QU, SU, DD, FX, ZR, AN, BU, TP, CS, YU"
1033             .split(", "));
1034 
TestTerritoryContainment()1035     public void TestTerritoryContainment() {
1036         Relation<String, String> map = SUPPLEMENTAL
1037             .getTerritoryToContained(ContainmentStyle.all);
1038         Relation<String, String> mapCore = SUPPLEMENTAL.getContainmentCore();
1039         Set<String> mapItems = new LinkedHashSet<String>();
1040         // get all the items
1041         for (String item : map.keySet()) {
1042             mapItems.add(item);
1043             mapItems.addAll(map.getAll(item));
1044         }
1045         Map<String, Map<String, String>> bcp47RegionData = StandardCodes
1046             .getLStreg().get("region");
1047 
1048         // verify that all regions are covered
1049         Set<String> bcp47Regions = new LinkedHashSet<String>(
1050             bcp47RegionData.keySet());
1051         bcp47Regions.remove("ZZ"); // We don't care about ZZ since it is the
1052         // unknown region...
1053         for (Iterator<String> it = bcp47Regions.iterator(); it.hasNext();) {
1054             String region = it.next();
1055             Map<String, String> data = bcp47RegionData.get(region);
1056             if (data.containsKey("Deprecated")) {
1057                 logln("Removing deprecated " + region);
1058                 it.remove();
1059             }
1060             if ("Private use".equals(data.get("Description"))) {
1061                 it.remove();
1062             }
1063         }
1064 
1065         if (!mapItems.equals(bcp47Regions)) {
1066             mapItems.removeAll(oldRegions);
1067             errlnDiff("containment items not in bcp47 regions: ", mapItems,
1068                 bcp47Regions);
1069             errlnDiff("bcp47 regions not in containment items: ", bcp47Regions,
1070                 mapItems);
1071         }
1072 
1073         // verify that everything in the containment core can be reached
1074         // downwards from 001.
1075 
1076         Map<String, Integer> from001 = getRecursiveContainment("001", map,
1077             new LinkedHashMap<String, Integer>(), 1);
1078         from001.put("001", 0);
1079         Set<String> keySet = from001.keySet();
1080         for (String region : keySet) {
1081             logln(Utility.repeat("\t", from001.get(region)) + "\t" + region
1082                 + "\t" + getRegionName(region));
1083         }
1084 
1085         // Populate mapItems with the core containment
1086         mapItems.clear();
1087         for (String item : mapCore.keySet()) {
1088             mapItems.add(item);
1089             mapItems.addAll(mapCore.getAll(item));
1090         }
1091 
1092         if (!mapItems.equals(keySet)) {
1093             errlnDiff(
1094                 "containment core items that can't be reached from 001: ",
1095                 mapItems, keySet);
1096         }
1097     }
1098 
errlnDiff(String title, Set<String> mapItems, Set<String> keySet)1099     private void errlnDiff(String title, Set<String> mapItems,
1100         Set<String> keySet) {
1101         Set<String> diff = new LinkedHashSet<String>(mapItems);
1102         diff.removeAll(keySet);
1103         if (diff.size() != 0) {
1104             errln(title + diff);
1105         }
1106     }
1107 
getRegionName(String region)1108     private String getRegionName(String region) {
1109         return testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region);
1110     }
1111 
getRecursiveContainment(String region, Relation<String, String> map, Map<String, Integer> result, int depth)1112     private Map<String, Integer> getRecursiveContainment(String region,
1113         Relation<String, String> map, Map<String, Integer> result, int depth) {
1114         Set<String> contained = map.getAll(region);
1115         if (contained == null) {
1116             return result;
1117         }
1118         for (String item : contained) {
1119             if (result.containsKey(item)) {
1120                 logln("Duplicate containment " + item + "\t"
1121                     + getRegionName(item));
1122                 continue;
1123             }
1124             result.put(item, depth);
1125             getRecursiveContainment(item, map, result, depth + 1);
1126         }
1127         return result;
1128     }
1129 
TestMacrolanguages()1130     public void TestMacrolanguages() {
1131         Set<String> languageCodes = STANDARD_CODES
1132             .getAvailableCodes("language");
1133         Map<String, Map<String, R2<List<String>, String>>> typeToTagToReplacement = SUPPLEMENTAL
1134             .getLocaleAliasInfo();
1135         Map<String, R2<List<String>, String>> tagToReplacement = typeToTagToReplacement
1136             .get("language");
1137 
1138         Relation<String, String> replacementToReplaced = Relation.of(
1139             new TreeMap<String, Set<String>>(), TreeSet.class);
1140         for (String language : tagToReplacement.keySet()) {
1141             List<String> replacements = tagToReplacement.get(language).get0();
1142             if (replacements != null) {
1143                 replacementToReplaced.putAll(replacements, language);
1144             }
1145         }
1146         replacementToReplaced.freeze();
1147 
1148         Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes
1149             .getLStreg();
1150         Map<String, Map<String, String>> lstregLanguageInfo = lstreg
1151             .get("language");
1152 
1153         Relation<Scope, String> scopeToCodes = Relation.of(
1154             new TreeMap<Scope, Set<String>>(), TreeSet.class);
1155         // the invariant is that every macrolanguage has exactly 1 encompassed
1156         // language that maps to it
1157 
1158         main: for (String language : Builder.with(new TreeSet<String>())
1159             .addAll(languageCodes).addAll(Iso639Data.getAvailable()).get()) {
1160             if (language.equals("no") || language.equals("sh"))
1161                 continue; // special cases
1162             Scope languageScope = getScope(language, lstregLanguageInfo);
1163             if (languageScope == Scope.Macrolanguage) {
1164                 if (Iso639Data.getHeirarchy(language) != null) {
1165                     continue main; // is real family
1166                 }
1167                 Set<String> replacements = replacementToReplaced
1168                     .getAll(language);
1169                 if (replacements == null || replacements.size() == 0) {
1170                     scopeToCodes.put(languageScope, language);
1171                 } else {
1172                     // it still might be bad, if we don't have a mapping to a
1173                     // regular language
1174                     for (String replacement : replacements) {
1175                         Scope replacementScope = getScope(replacement,
1176                             lstregLanguageInfo);
1177                         if (replacementScope == Scope.Individual) {
1178                             continue main;
1179                         }
1180                     }
1181                     scopeToCodes.put(languageScope, language);
1182                 }
1183             }
1184         }
1185         // now show the items we found
1186         for (Scope scope : scopeToCodes.keySet()) {
1187             for (String language : scopeToCodes.getAll(scope)) {
1188                 String name = testInfo.getEnglish().getName(language);
1189                 if (name == null || name.equals(language)) {
1190                     Set<String> set = Iso639Data.getNames(language);
1191                     if (set != null) {
1192                         name = set.iterator().next();
1193                     } else {
1194                         Map<String, String> languageInfo = lstregLanguageInfo
1195                             .get(language);
1196                         if (languageInfo != null) {
1197                             name = languageInfo.get("Description");
1198                         }
1199                     }
1200                 }
1201                 errln(scope + "\t" + language + "\t" + name + "\t"
1202                     + Iso639Data.getType(language));
1203             }
1204         }
1205     }
1206 
getScope(String language, Map<String, Map<String, String>> lstregLanguageInfo)1207     private Scope getScope(String language,
1208         Map<String, Map<String, String>> lstregLanguageInfo) {
1209         Scope languageScope = Iso639Data.getScope(language);
1210         Map<String, String> languageInfo = lstregLanguageInfo.get(language);
1211         if (languageInfo == null) {
1212             // System.out.println("Couldn't get lstreg info for " + language);
1213         } else {
1214             String lstregScope = languageInfo.get("Scope");
1215             if (lstregScope != null) {
1216                 Scope scope2 = Scope.fromString(lstregScope);
1217                 if (languageScope != scope2) {
1218                     // System.out.println("Mismatch in scope between LSTR and ISO 639:\t"
1219                     // + scope2 + "\t" +
1220                     // languageScope);
1221                     languageScope = scope2;
1222                 }
1223             }
1224         }
1225         return languageScope;
1226     }
1227 
1228     static final boolean LOCALES_FIXED = true;
1229 
TestPopulation()1230     public void TestPopulation() {
1231         Set<String> languages = SUPPLEMENTAL
1232             .getLanguagesForTerritoriesPopulationData();
1233         Relation<String, String> baseToLanguages = Relation.of(
1234             new TreeMap<String, Set<String>>(), TreeSet.class);
1235         LanguageTagParser ltp = new LanguageTagParser();
1236         LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(false);
1237 
1238         for (String language : languages) {
1239             if (LOCALES_FIXED) {
1240                 String canonicalForm = ltc.transform(language);
1241                 if (!assertEquals("Canonical form", canonicalForm, language)) {
1242                     int debug = 0;
1243                 }
1244             }
1245 
1246             String base = ltp.set(language).getLanguage();
1247             String script = ltp.getScript();
1248             baseToLanguages.put(base, language);
1249 
1250             // add basic data, basically just for wo!
1251             // if there are primary scripts, they must include script (if not
1252             // empty)
1253             Set<String> primaryScripts = Collections.emptySet();
1254             Map<Type, BasicLanguageData> basicData = SUPPLEMENTAL
1255                 .getBasicLanguageDataMap(base);
1256             if (basicData != null) {
1257                 BasicLanguageData s = basicData
1258                     .get(BasicLanguageData.Type.primary);
1259                 if (s != null) {
1260                     primaryScripts = s.getScripts();
1261                 }
1262             }
1263 
1264             // do some consistency tests; if there is a script, it must be in
1265             // primaryScripts
1266             if (!script.isEmpty() && !primaryScripts.contains(script)) {
1267                 errln(base + ": Script found in territory data (" + script
1268                     + ") is not in primary scripts :\t" + primaryScripts);
1269             }
1270 
1271             // if there are multiple primary scripts, they will be in
1272             // baseToLanguages
1273             if (primaryScripts.size() > 1) {
1274                 for (String script2 : primaryScripts) {
1275                     baseToLanguages.put(base, base + "_" + script2);
1276                 }
1277             }
1278         }
1279 
1280         if (!LOCALES_FIXED) {
1281             // the invariants are that if we have a base, we must not have a script.
1282             // and if we don't have a base, we must have two items
1283             for (String base : baseToLanguages.keySet()) {
1284                 Set<String> languagesForBase = baseToLanguages.getAll(base);
1285                 if (languagesForBase.contains(base)) {
1286                     if (languagesForBase.size() > 1) {
1287                         errln("Cannot have base alone with other scripts:\t"
1288                             + languagesForBase);
1289                     }
1290                 } else {
1291                     if (languagesForBase.size() == 1) {
1292                         errln("Cannot have only one script for language:\t"
1293                             + languagesForBase);
1294                     }
1295                 }
1296             }
1297         }
1298     }
1299 
TestCompleteness()1300     public void TestCompleteness() {
1301         if (SUPPLEMENTAL.getSkippedElements().size() > 0) {
1302             logln("SupplementalDataInfo API doesn't support: "
1303                 + SUPPLEMENTAL.getSkippedElements().toString());
1304         }
1305     }
1306 
1307     // these are settings for exceptional cases we want to allow
1308     private static final Set<String> EXCEPTION_CURRENCIES_WITH_NEW = new TreeSet<String>(
1309         Arrays.asList("ILS", "NZD", "PGK", "TWD"));
1310 
1311     // ok since there is no problem with confusion
1312     private static final Set<String> OK_TO_NOT_HAVE_OLD = new TreeSet<String>(
1313         Arrays.asList("ADP", "ATS", "BEF", "CYP", "DEM", "ESP", "FIM",
1314             "FRF", "GRD", "IEP", "ITL", "LUF", "MTL", "MTP", "NLG",
1315             "PTE", "YUM", "ARA", "BAD", "BGL", "BOP", "BRC", "BRN",
1316             "BRR", "BUK", "CSK", "ECS", "GEK", "GNS", "GQE", "HRD",
1317             "ILP", "LTT", "LVR", "MGF", "MLF", "MZE", "NIC", "PEI",
1318             "PES", "SIT", "SRG", "SUR", "TJR", "TPE", "UAK", "YUD",
1319             "YUN", "ZRZ", "GWE"));
1320 
1321     private static final Date LIMIT_FOR_NEW_CURRENCY = new Date(
1322         new Date().getYear() - 5, 1, 1);
1323     private static final Date NOW = new Date();
1324     private Matcher oldMatcher = Pattern.compile(
1325         "\\bold\\b|\\([0-9]{4}-[0-9]{4}\\)", Pattern.CASE_INSENSITIVE)
1326         .matcher("");
1327     private Matcher newMatcher = Pattern.compile("\\bnew\\b",
1328         Pattern.CASE_INSENSITIVE).matcher("");
1329 
1330     /**
1331      * Test that access to currency info in supplemental data is ok. At this
1332      * point just a simple test.
1333      *
1334      * @param args
1335      */
TestCurrency()1336     public void TestCurrency() {
1337         IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance();
1338         Set<String> currencyCodes = STANDARD_CODES
1339             .getGoodAvailableCodes("currency");
1340         Relation<String, Pair<String, CurrencyDateInfo>> nonModernCurrencyCodes = Relation
1341             .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(),
1342                 TreeSet.class);
1343         Relation<String, Pair<String, CurrencyDateInfo>> modernCurrencyCodes = Relation
1344             .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(),
1345                 TreeSet.class);
1346         Set<String> territoriesWithoutModernCurrencies = new TreeSet<String>(
1347             STANDARD_CODES.getGoodAvailableCodes("territory"));
1348         Map<String, Date> currencyFirstValid = new TreeMap<String, Date>();
1349         Map<String, Date> currencyLastValid = new TreeMap<String, Date>();
1350         territoriesWithoutModernCurrencies.remove("ZZ");
1351 
1352         for (String territory : STANDARD_CODES
1353             .getGoodAvailableCodes("territory")) {
1354             /* "EU" behaves like a country for purposes of this test */
1355             if ((SUPPLEMENTAL.getContained(territory) != null)
1356                 && !territory.equals("EU")) {
1357                 territoriesWithoutModernCurrencies.remove(territory);
1358                 continue;
1359             }
1360             Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL
1361                 .getCurrencyDateInfo(territory);
1362             if (currencyInfo == null) {
1363                 continue; // error, but will pick up below.
1364             }
1365             for (CurrencyDateInfo dateInfo : currencyInfo) {
1366                 final String currency = dateInfo.getCurrency();
1367                 final Date start = dateInfo.getStart();
1368                 final Date end = dateInfo.getEnd();
1369                 if (dateInfo.getErrors().length() != 0) {
1370                     logln("parsing " + territory + "\t" + dateInfo.toString()
1371                         + "\t" + dateInfo.getErrors());
1372                 }
1373                 Date firstValue = currencyFirstValid.get(currency);
1374                 if (firstValue == null || firstValue.compareTo(start) < 0) {
1375                     currencyFirstValid.put(currency, start);
1376                 }
1377                 Date lastValue = currencyLastValid.get(currency);
1378                 if (lastValue == null || lastValue.compareTo(end) > 0) {
1379                     currencyLastValid.put(currency, end);
1380                 }
1381                 if (start.compareTo(NOW) < 0 && end.compareTo(NOW) >= 0) { // Non-tender
1382                     // is
1383                     // OK...
1384                     modernCurrencyCodes.put(currency,
1385                         new Pair<String, CurrencyDateInfo>(territory,
1386                             dateInfo));
1387                     territoriesWithoutModernCurrencies.remove(territory);
1388                 } else {
1389                     nonModernCurrencyCodes.put(currency,
1390                         new Pair<String, CurrencyDateInfo>(territory,
1391                             dateInfo));
1392                 }
1393                 logln(territory
1394                     + "\t"
1395                     + dateInfo.toString()
1396                     + "\t"
1397                     + testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME,
1398                         currency));
1399             }
1400         }
1401         // fix up
1402         nonModernCurrencyCodes.removeAll(modernCurrencyCodes.keySet());
1403         Relation<String, String> isoCurrenciesToCountries = Relation.of(
1404             new TreeMap<String, Set<String>>(), TreeSet.class)
1405             .addAllInverted(isoCodes.getCountryToCodes());
1406         // now print error messages
1407         logln("Modern Codes: " + modernCurrencyCodes.size() + "\t"
1408             + modernCurrencyCodes);
1409         Set<String> missing = new TreeSet<String>(
1410             isoCurrenciesToCountries.keySet());
1411         missing.removeAll(modernCurrencyCodes.keySet());
1412         if (missing.size() != 0) {
1413             errln("Missing codes compared to ISO: " + missing.toString());
1414         }
1415 
1416         for (String currency : modernCurrencyCodes.keySet()) {
1417             Set<Pair<String, CurrencyDateInfo>> data = modernCurrencyCodes
1418                 .getAll(currency);
1419             final String name = testInfo.getEnglish().getName(
1420                 CLDRFile.CURRENCY_NAME, currency);
1421 
1422             Set<String> isoCountries = isoCurrenciesToCountries
1423                 .getAll(currency);
1424             if (isoCountries == null) {
1425                 isoCountries = new TreeSet<String>();
1426             }
1427 
1428             TreeSet<String> cldrCountries = new TreeSet<String>();
1429             for (Pair<String, CurrencyDateInfo> x : data) {
1430                 cldrCountries.add(x.getFirst());
1431             }
1432             if (!isoCountries.equals(cldrCountries)) {
1433                 if (!logKnownIssue("cldrbug:10765", "Missing codes compared to ISO: " + missing.toString())) {
1434 
1435                     errln("Mismatch between ISO and Cldr modern currencies for "
1436                         + currency + "\tISO:" + isoCountries + "\tCLDR:"
1437                         + cldrCountries);
1438                     showCountries("iso-cldr", isoCountries, cldrCountries, missing);
1439                     showCountries("cldr-iso", cldrCountries, isoCountries, missing);
1440                 }
1441             }
1442 
1443             if (oldMatcher.reset(name).find()) {
1444                 errln("Has 'old' in name but still used " + "\t" + currency
1445                     + "\t" + name + "\t" + data);
1446             }
1447             if (newMatcher.reset(name).find()
1448                 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) {
1449                 // find the first use. If older than 5 years, flag as error
1450                 if (currencyFirstValid.get(currency).compareTo(
1451                     LIMIT_FOR_NEW_CURRENCY) < 0) {
1452                     errln("Has 'new' in name but used since "
1453                         + CurrencyDateInfo.formatDate(currencyFirstValid
1454                             .get(currency))
1455                         + "\t" + currency + "\t"
1456                         + name + "\t" + data);
1457                 } else {
1458                     logln("Has 'new' in name but used since "
1459                         + CurrencyDateInfo.formatDate(currencyFirstValid
1460                             .get(currency))
1461                         + "\t" + currency + "\t"
1462                         + name + "\t" + data);
1463                 }
1464             }
1465         }
1466         logln("Non-Modern Codes (with dates): " + nonModernCurrencyCodes.size()
1467             + "\t" + nonModernCurrencyCodes);
1468         for (String currency : nonModernCurrencyCodes.keySet()) {
1469             final String name = testInfo.getEnglish().getName(
1470                 CLDRFile.CURRENCY_NAME, currency);
1471             if (newMatcher.reset(name).find()
1472                 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) {
1473                 logln("Has 'new' in name but NOT used since "
1474                     + CurrencyDateInfo.formatDate(currencyLastValid
1475                         .get(currency))
1476                     + "\t" + currency + "\t" + name
1477                     + "\t" + nonModernCurrencyCodes.getAll(currency));
1478             } else if (!oldMatcher.reset(name).find()
1479                 && !OK_TO_NOT_HAVE_OLD.contains(currency)) {
1480                 logln("Doesn't have 'old' or date range in name but NOT used since "
1481                     + CurrencyDateInfo.formatDate(currencyLastValid
1482                         .get(currency))
1483                     + "\t"
1484                     + currency
1485                     + "\t"
1486                     + name
1487                     + "\t" + nonModernCurrencyCodes.getAll(currency));
1488                 for (Pair<String, CurrencyDateInfo> pair : nonModernCurrencyCodes
1489                     .getAll(currency)) {
1490                     final String territory = pair.getFirst();
1491                     Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL
1492                         .getCurrencyDateInfo(territory);
1493                     for (CurrencyDateInfo dateInfo : currencyInfo) {
1494                         if (dateInfo.getEnd().compareTo(NOW) < 0) {
1495                             continue;
1496                         }
1497                         logln("\tCurrencies used instead: "
1498                             + territory
1499                             + "\t"
1500                             + dateInfo
1501                             + "\t"
1502                             + testInfo.getEnglish().getName(
1503                                 CLDRFile.CURRENCY_NAME,
1504                                 dateInfo.getCurrency()));
1505 
1506                     }
1507                 }
1508 
1509             }
1510         }
1511         Set<String> remainder = new TreeSet<String>();
1512         remainder.addAll(currencyCodes);
1513         remainder.removeAll(nonModernCurrencyCodes.keySet());
1514         // TODO make this an error, except for allowed exceptions.
1515         logln("Currencies without Territories: " + remainder);
1516         if (territoriesWithoutModernCurrencies.size() != 0) {
1517             errln("Modern territory missing currency: "
1518                 + territoriesWithoutModernCurrencies);
1519         }
1520     }
1521 
showCountries(final String title, Set<String> isoCountries, Set<String> cldrCountries, Set<String> missing)1522     private void showCountries(final String title, Set<String> isoCountries,
1523         Set<String> cldrCountries, Set<String> missing) {
1524         missing.clear();
1525         missing.addAll(isoCountries);
1526         missing.removeAll(cldrCountries);
1527         for (String country : missing) {
1528             logln("\t\tExtra in " + title + "\t" + country + " - "
1529                 + getRegionName(country));
1530         }
1531     }
1532 
TestCurrencyDecimalPlaces()1533     public void TestCurrencyDecimalPlaces() {
1534         IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance();
1535         Relation<String, IsoCurrencyParser.Data> codeList = isoCodes
1536             .getCodeList();
1537         Set<String> currencyCodes = STANDARD_CODES
1538             .getGoodAvailableCodes("currency");
1539         for (String cc : currencyCodes) {
1540             Set<IsoCurrencyParser.Data> d = codeList.get(cc);
1541             if (d != null) {
1542                 for (IsoCurrencyParser.Data x : d) {
1543                     CurrencyNumberInfo cni = SUPPLEMENTAL.getCurrencyNumberInfo(cc);
1544                     if (cni.digits != x.getMinorUnit()) {
1545                         logln("Mismatch between ISO/CLDR for decimal places for currency => " + cc +
1546                             ". ISO = " + x.getMinorUnit() + " CLDR = " + cni.digits);
1547                     }
1548                 }
1549             }
1550         }
1551     }
1552 
1553     /**
1554      * Verify that we have a default script for every CLDR base language
1555      */
TestDefaultScripts()1556     public void TestDefaultScripts() {
1557         SupplementalDataInfo supp = SUPPLEMENTAL;
1558         Map<String, String> likelyData = supp.getLikelySubtags();
1559         Map<String, String> baseToDefaultContentScript = new HashMap<String, String>();
1560         for (CLDRLocale locale : supp.getDefaultContentCLDRLocales()) {
1561             String script = locale.getScript();
1562             if (!script.isEmpty() && locale.getCountry().isEmpty()) {
1563                 baseToDefaultContentScript.put(locale.getLanguage(), script);
1564             }
1565         }
1566         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
1567             if ("root".equals(locale)) {
1568                 continue;
1569             }
1570             CLDRLocale loc = CLDRLocale.getInstance(locale);
1571             String baseLanguage = loc.getLanguage();
1572             String defaultScript = supp.getDefaultScript(baseLanguage);
1573 
1574             String defaultContentScript = baseToDefaultContentScript
1575                 .get(baseLanguage);
1576             if (defaultContentScript != null) {
1577                 assertEquals(loc + " defaultContentScript = default",
1578                     defaultScript, defaultContentScript);
1579             }
1580             String likely = likelyData.get(baseLanguage);
1581             String likelyScript = likely == null ? null : CLDRLocale
1582                 .getInstance(likely).getScript();
1583             Map<Type, BasicLanguageData> scriptInfo = supp
1584                 .getBasicLanguageDataMap(baseLanguage);
1585             if (scriptInfo == null) {
1586                 errln(loc + ": has no BasicLanguageData");
1587             } else {
1588                 BasicLanguageData data = scriptInfo.get(Type.primary);
1589                 if (data == null) {
1590                     data = scriptInfo.get(Type.secondary);
1591                 }
1592                 if (data == null) {
1593                     errln(loc + ": has no scripts in BasicLanguageData");
1594                 } else if (!data.getScripts().contains(defaultScript)) {
1595                     errln(loc + ": " + defaultScript
1596                         + " not in BasicLanguageData " + data.getScripts());
1597                 }
1598             }
1599 
1600             assertEquals(loc + " likely = default", defaultScript, likelyScript);
1601 
1602             assertNotNull(loc + ": needs default script", defaultScript);
1603 
1604             if (!loc.getScript().isEmpty()) {
1605                 if (!loc.getScript().equals(defaultScript)) {
1606                     assertNotEquals(locale
1607                         + ": only include script if not default",
1608                         loc.getScript(), defaultScript);
1609                 }
1610             }
1611 
1612         }
1613     }
1614 
1615     enum CoverageIssue {
1616         log, warn, error
1617     }
1618 
TestPluralCompleteness()1619     public void TestPluralCompleteness() {
1620         // Set<String> cardinalLocales = new
1621         // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal));
1622         // Set<String> ordinalLocales = new
1623         // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.ordinal));
1624         // Map<ULocale, PluralRulesFactory.SamplePatterns> sampleCardinals =
1625         // PluralRulesFactory.getLocaleToSamplePatterns();
1626         // Set<ULocale> sampleCardinalLocales = PluralRulesFactory.getLocales();
1627         // // new HashSet(PluralRulesFactory.getSampleCounts(uLocale,
1628         // type).keySet());
1629         // Map<ULocale, PluralRules> overrideCardinals =
1630         // PluralRulesFactory.getPluralOverrides();
1631         // Set<ULocale> overrideCardinalLocales = new
1632         // HashSet<ULocale>(overrideCardinals.keySet());
1633 
1634         Set<String> testLocales = STANDARD_CODES.getLocaleCoverageLocales(
1635             Organization.google, EnumSet.of(Level.MODERN));
1636         Set<String> allLocales = testInfo.getCldrFactory().getAvailable();
1637         LanguageTagParser ltp = new LanguageTagParser();
1638         for (String locale : allLocales) {
1639             // the only known case where plural rules depend on region or script
1640             // is pt_PT
1641             if (locale.equals("root")) {
1642                 continue;
1643             }
1644             ltp.set(locale);
1645             if (!ltp.getRegion().isEmpty() || !ltp.getScript().isEmpty()) {
1646                 continue;
1647             }
1648             CoverageIssue needsCoverage = testLocales.contains(locale)
1649                 ? CoverageIssue.error
1650                 : CoverageIssue.log;
1651             CoverageIssue needsCoverage2 = needsCoverage == CoverageIssue.error ? CoverageIssue.warn : needsCoverage;
1652 
1653             //            if (logKnownIssue("Cldrbug:8809", "Missing plural rules/samples be and ga locales")) {
1654             //                if (locale.equals("be") || locale.equals("ga")) {
1655             //                    needsCoverage = CoverageIssue.warn;
1656             //                }
1657             //            }
1658             PluralRulesFactory prf = PluralRulesFactory
1659                 .getInstance(CLDRConfig.getInstance()
1660                     .getSupplementalDataInfo());
1661 
1662             for (PluralType type : PluralType.values()) {
1663                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale,
1664                     false);
1665                 if (pluralInfo == null) {
1666                     errOrLog(needsCoverage, locale + "\t" + type + " \tmissing plural rules", "Cldrbug:7839", "Missing plural data for modern locales");
1667                     continue;
1668                 }
1669                 Set<Count> counts = pluralInfo.getCounts();
1670                 // if (counts.size() == 1) {
1671                 // continue; // skip checking samples
1672                 // }
1673                 HashSet<String> samples = new HashSet<String>();
1674                 EnumSet<Count> countsWithNoSamples = EnumSet
1675                     .noneOf(Count.class);
1676                 Relation<String, Count> samplesToCounts = Relation.of(
1677                     new HashMap(), LinkedHashSet.class);
1678                 Set<Count> countsFound = prf.getSampleCounts(locale,
1679                     type.standardType);
1680                 StringBuilder failureCases = new StringBuilder();
1681                 for (Count count : counts) {
1682                     String pattern = prf.getSamplePattern(locale, type.standardType, count);
1683                     final String rangeLine = getRangeLine(count, pluralInfo.getPluralRules(), pattern);
1684                     failureCases.append('\n').append(locale).append('\t').append(type).append('\t').append(rangeLine);
1685                     if (countsFound == null || !countsFound.contains(count)) {
1686                         countsWithNoSamples.add(count);
1687                     } else {
1688                         samplesToCounts.put(pattern, count);
1689                         logln(locale + "\t" + type + "\t" + count + "\t"
1690                             + pattern);
1691                     }
1692                 }
1693                 if (!countsWithNoSamples.isEmpty()) {
1694                     errOrLog(needsCoverage, locale + "\t" + type + "\t missing samples:\t" + countsWithNoSamples,
1695                         "cldrbug:7075", "Missing ordinal minimal pairs");
1696                     errOrLog(needsCoverage2, failureCases.toString());
1697                 }
1698                 for (Entry<String, Set<Count>> entry : samplesToCounts
1699                     .keyValuesSet()) {
1700                     if (entry.getValue().size() != 1) {
1701                         errOrLog(needsCoverage, locale + "\t" + type + "\t duplicate samples: " + entry.getValue()
1702                             + " => «" + entry.getKey() + "»", "cldrbug:7119", "Some duplicate minimal pairs");
1703                         errOrLog(needsCoverage2, failureCases.toString());
1704                     }
1705                 }
1706             }
1707         }
1708     }
1709 
errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment)1710     public void errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment) {
1711         switch (causeError) {
1712         case error:
1713             if (logTicket == null) {
1714                 errln(message);
1715                 break;
1716             }
1717             logKnownIssue(logTicket, logComment);
1718             // fall through
1719         case warn:
1720             warnln(message);
1721             break;
1722         case log:
1723             logln(message);
1724             break;
1725         }
1726     }
1727 
errOrLog(CoverageIssue causeError, String message)1728     public void errOrLog(CoverageIssue causeError, String message) {
1729         errOrLog(causeError, message, null, null);
1730     }
1731 
TestNumberingSystemDigits()1732     public void TestNumberingSystemDigits() {
1733 
1734         // Don't worry about digits from supplemental planes yet ( ICU can't
1735         // handle them anyways )
1736         // hanidec is the only known non codepoint order numbering system
1737         // TODO: Fix so that it works properly on non-BMP digit strings.
1738         String[] knownExceptions = { "brah", "cakm", "hanidec", "osma", "shrd",
1739             "sora", "takr" };
1740         List<String> knownExceptionList = Arrays.asList(knownExceptions);
1741         for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) {
1742             if (knownExceptionList.contains(ns)) {
1743                 continue;
1744             }
1745             String digits = SUPPLEMENTAL.getDigits(ns);
1746             int previousChar = 0;
1747             int ch;
1748 
1749             for (int i = 0; i < digits.length(); i += Character.charCount(ch)) {
1750                 ch = digits.codePointAt(i);
1751                 if (i > 0 && ch != previousChar + 1) {
1752                     errln("Digits for numbering system "
1753                         + ns
1754                         + " are not in code point order. Previous char = U+"
1755                         + Utility.hex(previousChar, 4)
1756                         + " Current char = U+" + Utility.hex(ch, 4));
1757                     break;
1758                 }
1759                 previousChar = ch;
1760             }
1761         }
1762     }
1763 
TestNumberingSystemDigitCompleteness()1764     public void TestNumberingSystemDigitCompleteness() {
1765         List<Integer> unicodeDigits = new ArrayList<Integer>();
1766         for (int cp = UCharacter.MIN_CODE_POINT; cp <= UCharacter.MAX_CODE_POINT; cp++) {
1767             if (UCharacter.getType(cp) == UCharacterEnums.ECharacterCategory.DECIMAL_DIGIT_NUMBER) {
1768                 unicodeDigits.add(Integer.valueOf(cp));
1769             }
1770         }
1771 
1772         for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) {
1773             String digits = SUPPLEMENTAL.getDigits(ns);
1774             int ch;
1775 
1776             for (int i = 0; i < digits.length(); i += Character.charCount(ch)) {
1777                 ch = digits.codePointAt(i);
1778                 unicodeDigits.remove(Integer.valueOf(ch));
1779             }
1780         }
1781 
1782         if (unicodeDigits.size() > 0) {
1783             for (Integer i : unicodeDigits) {
1784                 errln("Unicode digit: " + UCharacter.getName(i) + " is not in any numbering system. Script = "
1785                     + UScript.getShortName(UScript.getScript(i)));
1786             }
1787         }
1788     }
1789 
TestMetazones()1790     public void TestMetazones() {
1791         Date goalMin = new Date(70, 0, 1);
1792         Date goalMax = new Date(300, 0, 2);
1793         ImmutableSet<String> knownTZWithoutMetazone = ImmutableSet.of("America/Montreal", "Asia/Barnaul", "Asia/Tomsk", "Europe/Kirov");
1794         for (String timezoneRaw : TimeZone.getAvailableIDs()) {
1795             String timezone = TimeZone.getCanonicalID(timezoneRaw);
1796             String region = TimeZone.getRegion(timezone);
1797             if (!timezone.equals(timezoneRaw) || "001".equals(region)) {
1798                 continue;
1799             }
1800             if (knownTZWithoutMetazone.contains(timezone)) {
1801                 continue;
1802             }
1803             final Set<MetaZoneRange> ranges = SUPPLEMENTAL
1804                 .getMetaZoneRanges(timezone);
1805 
1806             if (assertNotNull("metazones for " + timezone, ranges)) {
1807                 long min = Long.MAX_VALUE;
1808                 long max = Long.MIN_VALUE;
1809                 for (MetaZoneRange range : ranges) {
1810                     if (range.dateRange.from != DateRange.START_OF_TIME) {
1811                         min = Math.min(min, range.dateRange.from);
1812                     }
1813                     if (range.dateRange.to != DateRange.END_OF_TIME) {
1814                         max = Math.max(max, range.dateRange.to);
1815                     }
1816                 }
1817                 assertRelation(timezone + " has metazone before 1970?", true,
1818                     goalMin, LEQ, new Date(min));
1819                 assertRelation(timezone
1820                     + " has metazone until way in the future?", true,
1821                     goalMax, GEQ, new Date(max));
1822             }
1823         }
1824         com.google.common.collect.Interners i;
1825     }
1826 
Test9924()1827     public void Test9924() {
1828         PopulationData zhCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(LOCALES_FIXED ? "zh" : "zh_Hans", "CN");
1829         PopulationData yueCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData("yue_Hans", "CN");
1830         assertTrue("yue*10 < zh", yueCNData.getPopulation() < zhCNData.getPopulation());
1831     }
1832 
Test10765()1833     public void Test10765() { //
1834         Set<String> surveyToolLanguages = SUPPLEMENTAL.getCLDRLanguageCodes(); // codes that show up in Survey Tool
1835         Set<String> mainLanguages = new TreeSet<>();
1836         LanguageTagParser ltp = new LanguageTagParser();
1837         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
1838             mainLanguages.add(ltp.set(locale).getLanguage());
1839         }
1840         // add special codes we want to see anyway
1841         mainLanguages.add("und");
1842         mainLanguages.add("mul");
1843         mainLanguages.add("zxx");
1844 
1845         if (!mainLanguages.containsAll(surveyToolLanguages)) {
1846             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SUPPLEMENTAL, "ja"); // pick "neutral" locale
1847             Set<String> temp = new TreeSet<>(surveyToolLanguages);
1848             temp.removeAll(mainLanguages);
1849             Set<String> modern = new TreeSet<>();
1850             Set<String> comprehensive = new TreeSet<>();
1851             for (String lang : temp) {
1852                 Level level = coverageLevel.getLevel(CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, lang));
1853                 if (level.compareTo(Level.MODERN) <= 0) {
1854                     modern.add(lang);
1855                 } else {
1856                     comprehensive.add(lang);
1857                 }
1858             }
1859             warnln("«Modern» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(modern));
1860             logln("«Comprehensive» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(comprehensive));
1861         }
1862         if (!surveyToolLanguages.containsAll(mainLanguages)) {
1863             mainLanguages.removeAll(surveyToolLanguages);
1864             assertEquals("No main/* languages are missing from Survey Tool:language names (eg <variable id='$language' type='choice'>) ",
1865                 Collections.EMPTY_SET, mainLanguages);
1866         }
1867     }
1868 
getNames(Set<String> temp)1869     private Set<String> getNames(Set<String> temp) {
1870         Set<String> tempNames = new TreeSet<>();
1871         for (String langCode : temp) {
1872             tempNames.add(testInfo.getEnglish().getName(CLDRFile.LANGUAGE_NAME, langCode) + " (" + langCode + ")");
1873         }
1874         return tempNames;
1875     }
1876 }
1877