1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.PrintWriter;
6 import java.util.ArrayList;
7 import java.util.Arrays;
8 import java.util.Collection;
9 import java.util.Collections;
10 import java.util.EnumMap;
11 import java.util.EnumSet;
12 import java.util.HashMap;
13 import java.util.HashSet;
14 import java.util.Iterator;
15 import java.util.LinkedHashMap;
16 import java.util.LinkedHashSet;
17 import java.util.List;
18 import java.util.Locale;
19 import java.util.Map;
20 import java.util.Map.Entry;
21 import java.util.Set;
22 import java.util.TreeMap;
23 import java.util.TreeSet;
24 import java.util.regex.Matcher;
25 
26 import org.unicode.cldr.draft.FileUtilities;
27 import org.unicode.cldr.test.CheckCLDR.InputMethod;
28 import org.unicode.cldr.test.CheckCLDR.Phase;
29 import org.unicode.cldr.test.CheckCLDR.StatusAction;
30 import org.unicode.cldr.test.CoverageLevel2;
31 import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
32 import org.unicode.cldr.tool.Option.Options;
33 import org.unicode.cldr.util.Annotations;
34 import org.unicode.cldr.util.CLDRConfig;
35 import org.unicode.cldr.util.CLDRFile;
36 import org.unicode.cldr.util.CLDRFile.DraftStatus;
37 import org.unicode.cldr.util.CLDRFile.Status;
38 import org.unicode.cldr.util.CLDRInfo.CandidateInfo;
39 import org.unicode.cldr.util.CLDRInfo.PathValueInfo;
40 import org.unicode.cldr.util.CLDRInfo.UserInfo;
41 import org.unicode.cldr.util.CLDRLocale;
42 import org.unicode.cldr.util.CLDRPaths;
43 import org.unicode.cldr.util.CLDRURLS;
44 import org.unicode.cldr.util.CldrUtility;
45 import org.unicode.cldr.util.CoreCoverageInfo;
46 import org.unicode.cldr.util.CoreCoverageInfo.CoreItems;
47 import org.unicode.cldr.util.Counter;
48 import org.unicode.cldr.util.Counter2;
49 import org.unicode.cldr.util.CoverageInfo;
50 import org.unicode.cldr.util.DtdType;
51 import org.unicode.cldr.util.LanguageTagCanonicalizer;
52 import org.unicode.cldr.util.LanguageTagParser;
53 import org.unicode.cldr.util.Level;
54 import org.unicode.cldr.util.Organization;
55 import org.unicode.cldr.util.PathHeader;
56 import org.unicode.cldr.util.PathHeader.Factory;
57 import org.unicode.cldr.util.PathHeader.SurveyToolStatus;
58 import org.unicode.cldr.util.PatternCache;
59 import org.unicode.cldr.util.RegexLookup;
60 import org.unicode.cldr.util.RegexLookup.LookupType;
61 import org.unicode.cldr.util.SimpleFactory;
62 import org.unicode.cldr.util.StandardCodes;
63 import org.unicode.cldr.util.SupplementalDataInfo;
64 import org.unicode.cldr.util.VettingViewer;
65 import org.unicode.cldr.util.VettingViewer.MissingStatus;
66 import org.unicode.cldr.util.VoteResolver.VoterInfo;
67 
68 import com.google.common.collect.ImmutableSet;
69 import com.google.common.collect.LinkedHashMultimap;
70 import com.google.common.collect.Multimap;
71 import com.google.common.collect.Ordering;
72 import com.ibm.icu.dev.util.CollectionUtilities;
73 import com.ibm.icu.dev.util.UnicodeMap;
74 import com.ibm.icu.impl.Relation;
75 import com.ibm.icu.lang.UCharacter;
76 import com.ibm.icu.text.NumberFormat;
77 import com.ibm.icu.text.UnicodeSet;
78 import com.ibm.icu.util.ICUUncheckedIOException;
79 
80 public class ShowLocaleCoverage {
81     private static final String SPREADSHEET_MISSING = "#LCode\tEnglish Name\tScript\tEnglish Value\tNative Value\tCldr Target\tPath Level\tStatus\tAction\tSTStatus\tST Link\tSection\tPage\tHeader\tCode\tPath";
82     private static final boolean DEBUG = false;
83     private static final char DEBUG_FILTER = 0; // use letter to only load locales starting with that letter
84 
85     private static final String LATEST = ToolConstants.CHART_VERSION;
86     private static final double CORE_SIZE = CoreItems.values().length - CoreItems.ONLY_RECOMMENDED.size();
87     public static CLDRConfig testInfo = ToolConfig.getToolInstance();
88     private static final StandardCodes SC = testInfo.getStandardCodes();
89     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = testInfo.getSupplementalDataInfo();
90     private static final StandardCodes STANDARD_CODES = SC;
91 
92     static org.unicode.cldr.util.Factory factory = testInfo.getCommonAndSeedAndMainAndAnnotationsFactory();
93     private static final CLDRFile ENGLISH = factory.make("en", true);
94 
95     private static UnicodeSet ENG_ANN = Annotations.getData("en").keySet();
96 
97     // added info using pattern in VettingViewer.
98 
99     static final RegexLookup<Boolean> HACK = RegexLookup.<Boolean> of(LookupType.STANDARD, RegexLookup.RegexFinderTransformPath)
100         .add("//ldml/localeDisplayNames/keys/key[@type=\"(d0|em|fw|i0|k0|lw|m0|rg|s0|ss|t0|x0)\"]", true)
101         .add("//ldml/localeDisplayNames/types/type[@key=\"(em|fw|kr|lw|ss)\"].*", true)
102         .add("//ldml/localeDisplayNames/languages/language[@type=\".*_.*\"]", true)
103         .add("//ldml/localeDisplayNames/languages/language[@type=\".*\"][@alt=\".*\"]", true)
104         .add("//ldml/localeDisplayNames/territories/territory[@type=\".*\"][@alt=\".*\"]", true)
105         .add("//ldml/localeDisplayNames/territories/territory[@type=\"EZ\"]", true);
106 
107     //private static final String OUT_DIRECTORY = CLDRPaths.GEN_DIRECTORY + "/coverage/"; // CldrUtility.MAIN_DIRECTORY;
108 
109     final static Options myOptions = new Options();
110 
111     enum MyOptions {
112         filter(".+", ".*", "Filter the information based on id, using a regex argument."),
113         //        draftStatus(".+", "unconfirmed", "Filter the information to a minimum draft status."),
114         chart(null, null, "chart only"),
115         growth("true", "true", "Compute growth data"),
116         organization(".+", null, "Only locales for organization"),
117         version(".+",
118             LATEST, "To get different versions"),
119         rawData(null, null, "Output the raw data from all coverage levels"),
120         targetDir(".*",
121             CLDRPaths.GEN_DIRECTORY + "/statistics/", "target output file."),
122         directories("(.*:)?[a-z]+(,[a-z]+)*", "common",
123             "Space-delimited list of main source directories: common,seed,exemplar.\n" +
124             "Optional, <baseDir>:common,seed"),;
125 
126         // targetDirectory(".+", CldrUtility.CHART_DIRECTORY + "keyboards/", "The target directory."),
127         // layouts(null, null, "Only create html files for keyboard layouts"),
128         // repertoire(null, null, "Only create html files for repertoire"), ;
129         // boilerplate
130         final Option option;
131 
MyOptions(String argumentPattern, String defaultArgument, String helpText)132         MyOptions(String argumentPattern, String defaultArgument, String helpText) {
133             option = myOptions.add(this, argumentPattern, defaultArgument, helpText);
134         }
135     }
136 
137     static final RegexLookup<Boolean> SUPPRESS_PATHS_CAN_BE_EMPTY = new RegexLookup<Boolean>()
138         .add("\\[@alt=\"accounting\"]", true)
139         .add("\\[@alt=\"variant\"]", true)
140         .add("^//ldml/localeDisplayNames/territories/territory.*@alt=\"short", true)
141         .add("^//ldml/localeDisplayNames/languages/language.*_", true)
142         .add("^//ldml/numbers/currencies/currency.*/symbol", true)
143         .add("^//ldml/characters/exemplarCharacters", true);
144 
145     static DraftStatus minimumDraftStatus = DraftStatus.unconfirmed;
146     static final Factory pathHeaderFactory = PathHeader.getFactory(ENGLISH);
147 
148     static boolean RAW_DATA = true;
149     private static Set<String> COMMON_LOCALES;
150 
main(String[] args)151     public static void main(String[] args) throws IOException {
152         myOptions.parse(MyOptions.filter, args, true);
153 
154         if (MyOptions.chart.option.doesOccur()) {
155             showCoverage(null);
156             return;
157         }
158 
159         Matcher matcher = PatternCache.get(MyOptions.filter.option.getValue()).matcher("");
160 
161         if (MyOptions.growth.option.doesOccur()) {
162             try (PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-growth.tsv")) {
163                 doGrowth(matcher, out);
164                 return;
165             }
166         }
167 
168         Set<String> locales = null;
169         String organization = MyOptions.organization.option.getValue();
170         boolean useOrgLevel = MyOptions.organization.option.doesOccur();
171         if (useOrgLevel) {
172             locales = STANDARD_CODES.getLocaleCoverageLocales(organization);
173         }
174 
175         if (MyOptions.version.option.doesOccur()) {
176             String number = MyOptions.version.option.getValue().trim();
177             if (!number.contains(".")) {
178                 number += ".0";
179             }
180             factory = org.unicode.cldr.util.Factory.make(
181                 CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + number + "/common/main/", ".*");
182         } else {
183             if (MyOptions.directories.option.doesOccur()) {
184                 String directories = MyOptions.directories.option.getValue().trim();
185                 CLDRConfig cldrConfig = CLDRConfig.getInstance();
186                 String base = null;
187                 int colonPos = directories.indexOf(':');
188                 if (colonPos >= 0) {
189                     base = directories.substring(0, colonPos).trim();
190                     directories = directories.substring(colonPos + 1).trim();
191                 } else {
192                     base = cldrConfig.getCldrBaseDirectory().toString();
193                 }
194                 String[] items = directories.split(",\\s*");
195                 File[] fullDirectories = new File[items.length];
196                 int i = 0;
197                 for (String item : items) {
198                     fullDirectories[i++] = new File(base + "/" + item + "/main");
199                 }
200                 factory = SimpleFactory.make(fullDirectories, ".*");
201                 COMMON_LOCALES = SimpleFactory.make(base + "/" + "common" + "/main", ".*").getAvailableLanguages();
202             }
203         }
204         fixCommonLocales();
205 
206         RAW_DATA = MyOptions.rawData.option.doesOccur();
207 
208         //showEnglish();
209 
210         showCoverage(null, matcher, locales, useOrgLevel);
211     }
212 
fixCommonLocales()213     public static void fixCommonLocales() {
214         if (COMMON_LOCALES == null) {
215             COMMON_LOCALES = factory.getAvailableLanguages();
216         }
217     }
218 
doGrowth(Matcher matcher, PrintWriter out)219     private static void doGrowth(Matcher matcher, PrintWriter out) {
220         TreeMap<String, List<Double>> growthData = new TreeMap<>(Ordering.natural().reverse()); // sort by version, descending
221 //        if (DEBUG) {
222 //            for (String dir : new File(CLDRPaths.ARCHIVE_DIRECTORY).list()) {
223 //                if (!dir.startsWith("cldr")) {
224 //                    continue;
225 //                }
226 //                String version = getNormalizedVersion(dir);
227 //                if (version == null) {
228 //                    continue;
229 //                }
230 //                org.unicode.cldr.util.Factory newFactory = org.unicode.cldr.util.Factory.make(
231 //                    CLDRPaths.ARCHIVE_DIRECTORY + "/" + dir + "/common/main/", ".*");
232 //                System.out.println("Reading: " + version);
233 //                Map<String, FoundAndTotal> currentData = addGrowth(newFactory, matcher);
234 //                System.out.println("Read: " + version + "\t" + currentData);
235 //                break;
236 //            }
237 //        }
238         Map<String, FoundAndTotal> latestData = addGrowth(factory, null, matcher, DEBUG);
239         addCompletionList(getYearFromVersion(LATEST, false), getCompletion(latestData, latestData), growthData);
240         if (DEBUG) System.out.println(latestData);
241         //System.out.println(growthData);
242         List<String> dirs = new ArrayList<>(Arrays.asList(new File(CLDRPaths.ARCHIVE_DIRECTORY).list()));
243         Collections.reverse(dirs);
244         for (String dir : dirs) {
245             if (!dir.startsWith("cldr")) {
246                 continue;
247             }
248             String version = getNormalizedVersion(dir);
249             if (version == null) {
250                 continue;
251             }
252 //            if (version.compareTo("12") < 0) {
253 //                continue;
254 //            }
255             System.out.println("Reading: " + version);
256             if (version.equals("2008")) {
257                 int debug = 0;
258             }
259             Map<String, FoundAndTotal> currentData = addGrowth(factory, dir, matcher, false);
260             System.out.println("Read: " + version + "\t" + currentData);
261             Counter2<String> completionData = getCompletion(latestData, currentData);
262             //System.out.println(version + "\t" + completionData);
263             addCompletionList(version, completionData, growthData);
264             if (DEBUG) System.out.println(currentData);
265         }
266         boolean first = true;
267         for (Entry<String, List<Double>> entry : growthData.entrySet()) {
268             if (first) {
269                 for (int i = 0; i < entry.getValue().size(); ++i) {
270                     out.print("\t" + i);
271                 }
272                 out.println();
273                 first = false;
274             }
275             out.println(entry.getKey() + "\t" + CollectionUtilities.join(entry.getValue(), "\t"));
276         }
277     }
278 
279     static final Map<String, String> versionToYear = new HashMap<>();
280     static {
281         int[][] mapping = {
282             { 34, 2018 },
283             { 32, 2017 },
284             { 30, 2016 },
285             { 28, 2015 },
286             { 26, 2014 },
287             { 24, 2013 },
288             { 22, 2012 },
289             { 20, 2011 },
290             { 19, 2010 },
291             { 17, 2009 },
292             { 16, 2008 },
293             { 15, 2007 },
294             { 14, 2006 },
295             { 13, 2005 },
296             { 12, 2004 },
297             { 10, 2003 },
298         };
299         for (int[] row : mapping) {
String.valueOf(row[0])300             versionToYear.put(String.valueOf(row[0]), String.valueOf(row[1]));
301         }
302     }
303 
getNormalizedVersion(String dir)304     public static String getNormalizedVersion(String dir) {
305         String rawVersion = dir.substring(dir.indexOf('-') + 1);
306         int firstDot = rawVersion.indexOf('.');
307         int secondDot = rawVersion.indexOf('.', firstDot + 1);
308         if (secondDot > 0) {
309             rawVersion = rawVersion.substring(0, firstDot) + rawVersion.substring(firstDot + 1, secondDot);
310         } else {
311             rawVersion = rawVersion.substring(0, firstDot);
312         }
313         String result = getYearFromVersion(rawVersion, true);
314         return result == null ? null : result.toString();
315     }
316 
getYearFromVersion(String version, boolean allowNull)317     private static String getYearFromVersion(String version, boolean allowNull) {
318         String result = versionToYear.get(version);
319         if (!allowNull && result == null) {
320             throw new IllegalArgumentException("No year for version: " + version);
321         }
322         return result;
323     }
324 
addCompletionList(String version, Counter2<String> completionData, TreeMap<String, List<Double>> growthData)325     public static void addCompletionList(String version, Counter2<String> completionData, TreeMap<String, List<Double>> growthData) {
326         List<Double> x = new ArrayList<>();
327         for (String key : completionData.getKeysetSortedByCount(false)) {
328             x.add(completionData.getCount(key));
329         }
330         growthData.put(version, x);
331         System.out.println(version + "\t" + x.size());
332     }
333 
getCompletion(Map<String, FoundAndTotal> latestData, Map<String, FoundAndTotal> currentData)334     public static Counter2<String> getCompletion(Map<String, FoundAndTotal> latestData, Map<String, FoundAndTotal> currentData) {
335         Counter2<String> completionData = new Counter2<>();
336         for (Entry<String, FoundAndTotal> entry : latestData.entrySet()) {
337             final String locale = entry.getKey();
338             final FoundAndTotal currentRecord = currentData.get(locale);
339             if (currentRecord == null) {
340                 continue;
341             }
342             double total = entry.getValue().total;
343             if (total == 0) {
344                 continue;
345             }
346             double completion = currentRecord.found / total;
347             completionData.add(locale, completion);
348         }
349         return completionData;
350     }
351 
352     static class FoundAndTotal {
353         final int found;
354         final int total;
355 
FoundAndTotal(Counter<Level>.... counters)356         public FoundAndTotal(Counter<Level>... counters) {
357             final int[] count = { 0, 0, 0 };
358             for (Level level : Level.values()) {
359                 if (level == Level.COMPREHENSIVE || level == Level.OPTIONAL) {
360                     continue;
361                 }
362                 int i = 0;
363                 for (Counter<Level> counter : counters) {
364                     count[i++] += counter.get(level);
365                 }
366             }
367             found = count[0];
368             total = found + count[1] + count[2];
369         }
370 
371         @Override
toString()372         public String toString() {
373             return found + "/" + total;
374         }
375     }
376 
addGrowth(org.unicode.cldr.util.Factory latestFactory, String dir, Matcher matcher, boolean showMissing)377     private static Map<String, FoundAndTotal> addGrowth(org.unicode.cldr.util.Factory latestFactory, String dir, Matcher matcher, boolean showMissing) {
378         org.unicode.cldr.util.Factory newFactory = dir == null ? factory
379             : org.unicode.cldr.util.Factory.make(
380                 CLDRPaths.ARCHIVE_DIRECTORY + "/" + dir + "/common/main/", ".*");
381         Map<String, FoundAndTotal> data = new HashMap<>();
382         char c = 0;
383         Set<String> latestAvailable = newFactory.getAvailableLanguages();
384         for (String locale : newFactory.getAvailableLanguages()) {
385             if (!matcher.reset(locale).matches()) {
386                 continue;
387             }
388             if (!latestAvailable.contains(locale)) {
389                 continue;
390             }
391             if (SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales().contains(locale)
392                 || locale.equals("root")
393                 || locale.equals("supplementalData")) {
394                 continue;
395             }
396             char nc = locale.charAt(0);
397             if (nc != c) {
398                 System.out.println("\t" + locale);
399                 c = nc;
400             }
401             if (DEBUG_FILTER != 0 && DEBUG_FILTER != nc) {
402                 continue;
403             }
404             CLDRFile latestFile = null;
405             try {
406                 latestFile = latestFactory.make(locale, true);
407             } catch (Exception e2) {
408                 continue;
409             }
410             final CLDRFile file = newFactory.make(locale, true);
411             // HACK check bogus
412 //            Collection<String> extra = file.getExtraPaths();
413 //
414 //            final Iterable<String> fullIterable = file.fullIterable();
415 //            for (String path : fullIterable) {
416 //                if (path.contains("\"one[@")) {
417 //                    boolean inside = extra.contains(path);
418 //                    Status status = new Status();
419 //                    String loc = file.getSourceLocaleID(path, status );
420 //                    int debug = 0;
421 //                }
422 //            }
423             // END HACK
424             Counter<Level> foundCounter = new Counter<Level>();
425             Counter<Level> unconfirmedCounter = new Counter<Level>();
426             Counter<Level> missingCounter = new Counter<Level>();
427             Set<String> unconfirmedPaths = null;
428             Relation<MissingStatus, String> missingPaths = null;
429             unconfirmedPaths = new LinkedHashSet<>();
430             missingPaths = Relation.of(new LinkedHashMap(), LinkedHashSet.class);
431             VettingViewer.getStatus(latestFile.fullIterable(), file,
432                 pathHeaderFactory, foundCounter, unconfirmedCounter,
433                 missingCounter, missingPaths, unconfirmedPaths);
434 
435             // HACK
436             Set<Entry<MissingStatus, String>> missingRemovals = new HashSet<>();
437             for (Entry<MissingStatus, String> e : missingPaths.keyValueSet()) {
438                 if (e.getKey() == MissingStatus.ABSENT) {
439                     final String path = e.getValue();
440                     if (HACK.get(path) != null) {
441                         missingRemovals.add(e);
442                         missingCounter.add(Level.MODERN, -1);
443                         foundCounter.add(Level.MODERN, 1);
444                     } else {
445                         Status status = new Status();
446                         String loc = file.getSourceLocaleID(path, status);
447                         int debug = 0;
448                     }
449                 }
450             }
451             for (Entry<MissingStatus, String> e : missingRemovals) {
452                 missingPaths.remove(e.getKey(), e.getValue());
453             }
454             // END HACK
455 
456             if (showMissing) {
457                 int count = 0;
458                 for (String s : unconfirmedPaths) {
459                     System.out.println(++count + "\t" + locale + "\tunconfirmed\t" + s);
460                 }
461                 for (Entry<MissingStatus, String> e : missingPaths.keyValueSet()) {
462                     String path = e.getValue();
463                     Status status = new Status();
464                     String loc = file.getSourceLocaleID(path, status);
465                     int debug = 0;
466 
467                     System.out.println(++count + "\t" + locale + "\t" + CldrUtility.toString(e));
468                 }
469                 int debug = 0;
470             }
471 
472             // add annotations
473             System.out.println(locale + " annotations");
474             try {
475                 UnicodeMap<Annotations> annotations = dir == null ? Annotations.getData(locale)
476                     : Annotations.getData(CLDRPaths.ARCHIVE_DIRECTORY + "/" + dir + "/common/annotations/", locale);
477                 for (String cp : ENG_ANN) {
478                     Annotations annotation = annotations.get(cp);
479                     if (annotation == null) {
480                         missingCounter.add(Level.MODERN, 1);
481                     } else if (annotation.getShortName() == null) {
482                         missingCounter.add(Level.MODERN, 1);
483                     } else {
484                         foundCounter.add(Level.MODERN, 1);
485                     }
486                 }
487             } catch (Exception e1) {
488                 missingCounter.add(Level.MODERN, ENG_ANN.size());
489             }
490 
491             data.put(locale, new FoundAndTotal(foundCounter, unconfirmedCounter, missingCounter));
492         }
493         return Collections.unmodifiableMap(data);
494     }
495 
showCoverage(Anchors anchors)496     public static void showCoverage(Anchors anchors) throws IOException {
497         showCoverage(anchors, PatternCache.get(".*").matcher(""), null, false);
498     }
499 
showCoverage(Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel)500     public static void showCoverage(Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel) throws IOException {
501         final String title = "Locale Coverage";
502         try (PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, anchors));
503             PrintWriter tsv_summary = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-coverage.tsv");
504             PrintWriter tsv_missing = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing.tsv");
505             ){
506             printData(pw, tsv_summary, tsv_missing, locales, matcher, useOrgLevel);
507             new ShowPlurals().appendBlanksForScrolling(pw);
508         }
509     }
510 
511 //    public static void showEnglish() {
512 //        Map<PathHeader,String> sorted = new TreeMap<>();
513 //        CoverageInfo coverageInfo=CLDRConfig.getInstance().getCoverageInfo();
514 //        for (String path : ENGLISH) {
515 ////            Level currentLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, "en");
516 //            Level currentLevel=coverageInfo.getCoverageLevel(path, "en");
517 //            if (currentLevel.compareTo(Level.MINIMAL) <= 0) {
518 //                PathHeader ph = pathHeaderFactory.fromPath(path);
519 //                sorted.put(ph, currentLevel + "\t" + ENGLISH.getStringValue(path));
520 //            }
521 //        }
522 //        for (Entry<PathHeader, String> entry : sorted.entrySet()) {
523 //            System.out.println(entry.getKey() + "\t" + entry.getValue());
524 //        }
525 //    }
526 
527     static class IterableFilter implements Iterable<String> {
528         private Iterable<String> source;
529 
IterableFilter(Iterable<String> source)530         IterableFilter(Iterable<String> source) {
531             this.source = source;
532         }
533 
534         /**
535          * When some paths are defined after submission, we need to change them to COMPREHENSIVE in computing the vetting status.
536          */
537 
538         static final Set<String> SUPPRESS_PATHS_AFTER_SUBMISSION = ImmutableSet.of(
539             "//ldml/localeDisplayNames/languages/language[@type=\"ccp\"]",
540             "//ldml/localeDisplayNames/territories/territory[@type=\"XA\"]",
541             "//ldml/localeDisplayNames/territories/territory[@type=\"XB\"]",
542             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"G\"]",
543             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"y\"]",
544             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"G\"]",
545             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"M\"]",
546             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"y\"]",
547             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"d\"]",
548             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"G\"]",
549             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"M\"]",
550             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"y\"]",
551             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"d\"]",
552             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"G\"]",
553             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"M\"]",
554             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"y\"]",
555             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"G\"]",
556             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"M\"]",
557             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"y\"]",
558             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"d\"]",
559             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"G\"]",
560             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"M\"]",
561             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"y\"]",
562             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"d\"]",
563             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"G\"]",
564             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"M\"]",
565             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"y\"]",
566             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"G\"]",
567             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"y\"]",
568             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"G\"]",
569             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"M\"]",
570             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"y\"]",
571             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"d\"]",
572             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"G\"]",
573             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"M\"]",
574             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"y\"]",
575             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"d\"]",
576             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"G\"]",
577             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"M\"]",
578             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"y\"]",
579             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"G\"]",
580             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"M\"]",
581             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"y\"]",
582             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"d\"]",
583             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"G\"]",
584             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"M\"]",
585             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"y\"]",
586             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"d\"]",
587             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"G\"]",
588             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"M\"]",
589             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"y\"]"
590             );
591         @Override
iterator()592         public Iterator<String> iterator() {
593             return new IteratorFilter(source.iterator());
594         }
595 
596         static class IteratorFilter implements Iterator<String> {
597             Iterator<String> source;
598             String peek;
599 
IteratorFilter(Iterator<String> source)600             public IteratorFilter(Iterator<String> source) {
601                 this.source = source;
602                 fillPeek();
603             }
604             @Override
hasNext()605             public boolean hasNext() {
606                 return peek != null;
607             }
608             @Override
next()609             public String next() {
610                 String result = peek;
611                 fillPeek();
612                 return result;
613             }
614 
fillPeek()615             private void fillPeek() {
616                 peek = null;
617                 while (source.hasNext()) {
618                     peek = source.next();
619                     // if it is ok to assess, then break
620                     if (!SUPPRESS_PATHS_AFTER_SUBMISSION.contains(peek)
621                         && SUPPRESS_PATHS_CAN_BE_EMPTY.get(peek) != Boolean.TRUE) {
622                         break;
623                     }
624                     peek = null;
625                 }
626             }
627         }
628 
629     }
printData(PrintWriter pw, PrintWriter tsv_summary, PrintWriter tsv_missing, Set<String> locales, Matcher matcher, boolean useOrgLevel)630     static void printData(PrintWriter pw, PrintWriter tsv_summary, PrintWriter tsv_missing, Set<String> locales, Matcher matcher, boolean useOrgLevel) {
631 //        Set<String> checkModernLocales = STANDARD_CODES.getLocaleCoverageLocales("google", EnumSet.of(Level.MODERN));
632         Set<String> checkModernLocales = STANDARD_CODES.getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN));
633         Set<String> availableLanguages = new TreeSet<>(factory.getAvailableLanguages());
634         availableLanguages.addAll(checkModernLocales);
635         Relation<String, String> languageToRegion = Relation.of(new TreeMap(), TreeSet.class);
636         LanguageTagParser ltp = new LanguageTagParser();
637         LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(true);
638         for (String locale : factory.getAvailable()) {
639             String country = ltp.set(locale).getRegion();
640             if (!country.isEmpty()) {
641                 languageToRegion.put(ltc.transform(ltp.getLanguageScript()), country);
642             }
643         }
644 
645         fixCommonLocales();
646 
647         System.out.println(CollectionUtilities.join(languageToRegion.keyValuesSet(), "\n"));
648 
649         System.out.println("# Checking: " + availableLanguages);
650 
651         pw.println("<p style='text-align: left'>This chart shows the coverage levels for this release. </p>" +
652             "<ol>"
653             + "<li>Fields = fields found at a modern level</li>"
654             + "<li>UC = unconfirmed values: typically treated as missing by implementations</li>"
655             + "<li>Miss = missing values</li>"
656             + "<li>Modern%, etc = fields/(fields + missing + unconfirmed) — at that level</li></ol>"
657             + "<li>Core Missing = missing core fields — optionals marked with *</li></ol>"
658             + "<p>A high-level summary of the meaning of the coverage values are at " +
659             "<a target='_blank' href='http://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels'>Coverage Levels</a>. " +
660             "The Core values are described on " +
661             "<a target='_blank' href='http://cldr.unicode.org/index/cldr-spec/minimaldata'>Core Data</a>." +
662             "</p>");
663 
664         Relation<MissingStatus, String> missingPaths = Relation.of(new EnumMap<MissingStatus, Set<String>>(
665             MissingStatus.class), TreeSet.class, CLDRFile.getComparator(DtdType.ldml));
666         Set<String> unconfirmed = new TreeSet<String>(CLDRFile.getComparator(DtdType.ldml));
667 
668         //Map<String, String> likely = testInfo.getSupplementalDataInfo().getLikelySubtags();
669         Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales();
670 
671         // Map<String,Counter<Level>> counts = new HashMap();
672         //        System.out.print("Script\tEnglish\tNative\tCode\tCode*");
673         //        for (Level level : Level.values()) {
674         //            if (skipPrintingLevels.contains(level)) {
675         //                continue;
676         //            }
677         //            System.out.print("\t≤" + level + " (f)\t(u)\t(m)");
678         //        }
679         //        System.out.println();
680         // Factory pathHeaderFactory = PathHeader.getFactory(testInfo.getCldrFactory().make("en", true));
681 
682         tsv_missing.println(SPREADSHEET_MISSING);
683 
684         Counter<Level> foundCounter = new Counter<Level>();
685         Counter<Level> unconfirmedCounter = new Counter<Level>();
686         Counter<Level> missingCounter = new Counter<Level>();
687 
688         List<Level> reversedLevels = new ArrayList<>(EnumSet.allOf(Level.class));
689         reversedLevels.remove(Level.COMPREHENSIVE);
690         reversedLevels.remove(Level.UNDETERMINED);
691         Collections.reverse(reversedLevels);
692 
693         PrintWriter out2;
694         try {
695             out2 = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "showLocaleCoverage.tsv");
696         } catch (IOException e1) {
697             throw new ICUUncheckedIOException(e1);
698         }
699 
700         out2.print("Code\tCom?\tEnglish Name\tNative Name\tScript\tSublocales\tStrings");
701         for (Level level : reversedLevels) {
702             out2.print("\t" + level + " %\t" + level + " UC%");
703         }
704         out2.println();
705         //System.out.println("\tCore*\nCore* Missing");
706         int localeCount = 0;
707 
708         final TablePrinter tablePrinter = new TablePrinter()
709             .addColumn("Direct.", "class='source'", null, "class='source'", true)
710             .setBreakSpans(true).setSpanRows(false)
711             .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true).setBreakSpans(true)
712             .addColumn("English Name", "class='source'", null, "class='source'", true).setBreakSpans(true)
713             .addColumn("Native Name", "class='source'", null, "class='source'", true).setBreakSpans(true)
714             .addColumn("Script", "class='source'", null, "class='source'", true).setBreakSpans(true)
715             .addColumn("CLDR target", "class='source'", null, "class='source'", true).setBreakSpans(true).setSortPriority(0).setSortAscending(false)
716             .addColumn("Sublocales", "class='target'", null, "class='targetRight'", true).setBreakSpans(true)
717             .setCellPattern("{0,number}")
718             .addColumn("Fields", "class='target'", null, "class='targetRight'", true).setBreakSpans(true)
719             .setCellPattern("{0,number}")
720             .addColumn("UC", "class='target'", null, "class='targetRight'", true).setBreakSpans(true)
721             .setCellPattern("{0,number}")
722             .addColumn("Miss", "class='target'", null, "class='targetRight'", true).setBreakSpans(true)
723             .setCellPattern("{0,number}")
724             //.addColumn("Target Level", "class='target'", null, "class='target'", true).setBreakSpans(true)
725             ;
726         tsv_summary.println("Dir"
727             + "\tCode"
728             + "\tEnglish Name"
729             + "\tNative Name"
730             + "\tScript"
731             + "\tCLDR target"
732             + "\tSublocales"
733             + "\tFields\tUC\tMissing"
734             + "\tModern\tMiss +UC"
735             + "\tModerate\tMiss +UC"
736             + "\tBasic\tMiss +UC"
737             + "\tCore\tMiss +UC"
738             + "\tCore-Missing");
739         NumberFormat tsvPercent = NumberFormat.getPercentInstance(Locale.ENGLISH);
740         tsvPercent.setMaximumFractionDigits(2);
741 
742         for (Level level : reversedLevels) {
743             String titleLevel = level.toString();
744             tablePrinter.addColumn(UCharacter.toTitleCase(titleLevel, null) + "%", "class='target'", null, "class='targetRight'", true)
745             .setCellPattern("{0,number,0.0%}")
746             .setBreakSpans(true);
747             switch(level) {
748             case CORE:
749                 tablePrinter.setSortPriority(4).setSortAscending(false);
750                 break;
751             case BASIC:
752                 tablePrinter.setSortPriority(3).setSortAscending(false);
753                 break;
754             case MODERATE:
755                 tablePrinter.setSortPriority(2).setSortAscending(false);
756                 break;
757             case MODERN:
758                 tablePrinter.setSortPriority(1).setSortAscending(false);
759                 break;
760             }
761 //            tablePrinter
762 //            .addColumn("∪ UC%", "class='target'", null, "class='targetRight'", true)
763 //            .setCellPattern("{0,number,0.0%}")
764 //            .setBreakSpans(true)
765             ;
766         }
767         tablePrinter.addColumn("Core Missing", "class='target'", null, "class='targetRight'", true)
768         .setBreakSpans(true);
769 
770         long start = System.currentTimeMillis();
771         LikelySubtags likelySubtags = new LikelySubtags();
772 
773         EnumMap<Level, Double> targetLevel = new EnumMap<>(Level.class);
774         targetLevel.put(Level.CORE, 2 / 100d);
775         targetLevel.put(Level.BASIC, 16 / 100d);
776         targetLevel.put(Level.MODERATE, 33 / 100d);
777         targetLevel.put(Level.MODERN, 100 / 100d);
778 
779 //        NumberFormat percentFormat = NumberFormat.getPercentInstance(ULocale.ENGLISH);
780 //        percentFormat.setMaximumFractionDigits(2);
781 //        percentFormat.setMinimumFractionDigits(2);
782 //        NumberFormat intFormat = NumberFormat.getIntegerInstance(ULocale.ENGLISH);
783 
784         int counter = 0;
785         for (String locale : availableLanguages) {
786             try {
787                 if (locale.contains("supplemental")) { // for old versions
788                     continue;
789                 }
790                 if (locales != null && !locales.contains(locale)) {
791                     String base = CLDRLocale.getInstance(locale).getLanguage();
792                     if (!locales.contains(base)) {
793                         continue;
794                     }
795                 }
796                 if (!matcher.reset(locale).matches()) {
797                     continue;
798                 }
799                 if (defaultContents.contains(locale) || "root".equals(locale) || "und".equals(locale)) {
800                     continue;
801                 }
802 
803                 boolean isSeed = new File(CLDRPaths.SEED_DIRECTORY, locale + ".xml").exists();
804 
805                 //boolean capture = locale.equals("en");
806                 String region = ltp.set(locale).getRegion();
807                 if (!region.isEmpty()) continue; // skip regions
808 
809                 final Level cldrLocaleLevelGoal = SC.getLocaleCoverageLevel(Organization.cldr.toString(), locale);
810                 final boolean cldrLevelGoalModerateOrAbove = cldrLocaleLevelGoal.compareTo(Level.MODERATE) >= 0;
811 
812                 String isCommonLocale = Level.MODERN == cldrLocaleLevelGoal ? "C*"
813                     : COMMON_LOCALES.contains(locale) ? "C"
814                         : "";
815 
816                 String max = likelySubtags.maximize(locale);
817                 String script = ltp.set(max).getScript();
818 
819                 String language = likelySubtags.minimize(locale);
820 //                Level otherLevel = STANDARD_CODES.getLocaleCoverageLevel("apple", locale);
821 //                if (otherLevel.compareTo(currentLevel) > 0
822 //                    && otherLevel.compareTo(Level.MODERN) <= 0) {
823 //                    currentLevel = otherLevel;
824 //                }
825 
826                 missingPaths.clear();
827                 unconfirmed.clear();
828 
829                 final CLDRFile file = factory.make(locale, true, minimumDraftStatus);
830 
831                 if (locale.equals("af")) {
832                     int debug = 0;
833                 }
834 
835                 Iterable<String> pathSource = new IterableFilter(file.fullIterable());
836 
837                 VettingViewer.getStatus(pathSource, file,
838                     pathHeaderFactory, foundCounter, unconfirmedCounter,
839                     missingCounter, missingPaths, unconfirmed);
840 
841                 Set<String> sublocales = languageToRegion.get(language);
842                 if (sublocales == null) {
843                     //System.err.println("No Sublocales: " + language);
844                     sublocales = Collections.EMPTY_SET;
845                 }
846 
847 //                List s = Lists.newArrayList(file.fullIterable());
848 
849                 String seedString = isSeed ? "seed" : "common";
850                 tablePrinter.addRow()
851                 .addCell(seedString)
852                 .addCell(language)
853                 .addCell(ENGLISH.getName(language))
854                 .addCell(file.getName(language))
855                 .addCell(script)
856                 .addCell(cldrLocaleLevelGoal)
857                 .addCell(sublocales.size());
858 
859                 tsv_summary
860                 .append(seedString)
861                 .append('\t').append(language)
862                 .append('\t').append(ENGLISH.getName(language))
863                 .append('\t').append(file.getName(language))
864                 .append('\t').append(script)
865                 .append('\t').append(cldrLocaleLevelGoal.toString())
866                 .append('\t').append(sublocales.size()+"");
867                 ;
868 
869 //                String header = language
870 //                    + "\t" + isCommonLocale
871 //                    + "\t" + ENGLISH.getName(language)
872 //                    + "\t" + file.getName(language)
873 //                    + "\t" + script
874 //                    + "\t" + sublocales.size()
875 //                    //+ "\t" + currentLevel
876 //                    ;
877 
878                 int sumFound = 0;
879                 int sumMissing = 0;
880                 int sumUnconfirmed = 0;
881 
882                 // get the totals
883 
884                 EnumMap<Level, Integer> totals = new EnumMap<>(Level.class);
885                 EnumMap<Level, Integer> confirmed = new EnumMap<>(Level.class);
886 //                EnumMap<Level, Integer> unconfirmedByLevel = new EnumMap<>(Level.class);
887                 Set<String> coreMissing = new LinkedHashSet<>();
888 
889                 if (locale.equals("af")) {
890                     int debug = 0;
891                 }
892 
893                 { // CORE
894                     long missingExemplarCount = missingCounter.get(Level.CORE);
895                     if (missingExemplarCount > 0) {
896                         for (Entry<MissingStatus, String> statusAndPath : missingPaths.entrySet()) {
897                             String path = statusAndPath.getValue();
898                             if (path.startsWith("//ldml/characters/exemplarCharacters")) {
899                                 PathHeader ph = pathHeaderFactory.fromPath(path);
900                                 String problem = ph.getCode().replaceAll("Others: ","").replaceAll("Main Letters", "main-letters");
901                                 coreMissing.add(problem);
902                                 // String line = spreadsheetLine(locale, script, language, cldrLevelGoal, foundLevel, missingStatus.toString(), path, file.getStringValue(path));
903                                 String line = spreadsheetLine(locale, script, language, cldrLocaleLevelGoal, Level.CORE, "ABSENT", path, "«No " + problem + "»");
904                                 tsv_missing.println(line);
905                             }
906                         }
907                     }
908                     Multimap<CoreItems, String> detailedErrors = LinkedHashMultimap.create();
909                     Set<CoreItems> coverage = new TreeSet<>(
910                         CoreCoverageInfo.getCoreCoverageInfo(file, detailedErrors));
911                     Set<CoreItems> missing = EnumSet.allOf(CoreItems.class);
912                     missing.removeAll(coverage);
913                     for (Entry<CoreItems, String> entry : detailedErrors.entries()) {
914                         CoreItems coreItem = entry.getKey();
915                         String value = entry.getValue();
916                         coreMissing.add(coreItem.toString());
917                         //String line = spreadsheetLine(language, script, "n/a", detailedErrors.get(entry).toString(), level, "ABSENT", "n/a", "n/a", "n/a");
918                         if (cldrLevelGoalModerateOrAbove) {
919                             String line = spreadsheetLine(locale, script, language, cldrLocaleLevelGoal, coreItem.desiredLevel, "ABSENT", value, "«No " + coreItem + "»");
920                             tsv_missing.println(line);
921                         }
922                     }
923                     missing.removeAll(CoreItems.ONLY_RECOMMENDED);
924                     foundCounter.add(Level.CORE, coverage.size());
925                     missingCounter.add(Level.CORE, missing.size());
926 
927 //                    sumFound += coverage.size();
928 //                    sumMissing += missing.size();
929 
930 //                    confirmed.put(Level.CORE, (int) coverage.size());
931 ////                    unconfirmedByLevel.put(level, (int)(foundCount + unconfirmedCount));
932 //                    totals.put(Level.CORE, (int)(coverage.size() + missing.size()));
933 
934                 }
935 
936                 if (cldrLevelGoalModerateOrAbove) {
937                     for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) {
938                         String path = entry.getValue();
939 //                        if (SKIP_PATHS.get(path) == null) {
940                         MissingStatus missingStatus = entry.getKey();
941                         CoverageInfo coverageInfo = new CoverageInfo(SUPPLEMENTAL_DATA_INFO);
942                         Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
943                         if (cldrLocaleLevelGoal.compareTo(foundLevel) >= 0) {
944                             String line = spreadsheetLine(locale, script, language, cldrLocaleLevelGoal, foundLevel, missingStatus.toString(), path, file.getStringValue(path));
945                             tsv_missing.println(line);
946                         }
947                     }
948                 }
949 
950                 for (Level level : reversedLevels) {
951                     long foundCount = foundCounter.get(level);
952                     long unconfirmedCount = unconfirmedCounter.get(level);
953                     long missingCount = missingCounter.get(level);
954 
955                     sumFound += foundCount;
956                     sumUnconfirmed += unconfirmedCount;
957                     sumMissing += missingCount;
958 
959                     confirmed.put(level, (int) foundCount);
960 //                    unconfirmedByLevel.put(level, (int)(foundCount + unconfirmedCount));
961                     totals.put(level, (int)(foundCount + unconfirmedCount + missingCount));
962                 }
963 
964                 tsv_missing.flush();
965 
966                 double modernTotal = totals.get(Level.MODERN);
967 
968                 tablePrinter
969                 .addCell(sumFound)
970                 .addCell(sumUnconfirmed)
971                 .addCell(sumMissing)
972                 ;
973 
974                 tsv_summary
975                 .append('\t').append(sumFound+"")
976                 .append('\t').append(sumUnconfirmed+"")
977                 .append('\t').append(sumMissing+"")
978                 ;
979 
980 
981 //                header += "\t" + sumFound;
982 //                header += "\t" + (sumFound + sumUnconfirmed);
983 
984                 // print the totals
985 
986                 for (Level level : reversedLevels) {
987                     if (useOrgLevel && cldrLocaleLevelGoal != level) {
988                         continue;
989                     }
990                     int confirmedCoverage = confirmed.get(level);
991 //                    int unconfirmedCoverage = unconfirmedByLevel.get(level);
992                     double total = totals.get(level);
993 
994                     tablePrinter
995                     .addCell(confirmedCoverage / total)
996 //                    .addCell(unconfirmedCoverage / total)
997                     ;
998 
999                     tsv_summary
1000                     .append('\t').append(String.valueOf(confirmedCoverage))
1001                     .append('\t').append(String.valueOf((int)total - confirmedCoverage))
1002                     ;
1003 
1004 //                    if (RAW_DATA) {
1005 //                        header += "\t" + confirmedCoverage / total
1006 //                            + "\t" + unconfirmedCoverage / total;
1007 //                    } else {
1008 //                        Double factor = targetLevel.get(level) / (total / modernTotal);
1009 //                        header += "\t" + factor * confirmedCoverage / modernTotal
1010 ////                            + "\t" + factor * unconfirmedCoverage / modernTotal
1011 //                            ;
1012 //                    }
1013                 }
1014                 String coreMissingString =
1015                     CollectionUtilities.join(coreMissing, ", ");
1016 
1017                 tablePrinter
1018                 .addCell(coreMissingString)
1019                 .finishRow();
1020 
1021                 tsv_summary
1022                 .append('\t')
1023                 .append(coreMissingString)
1024                 .append('\n');
1025 
1026                 //out2.println(header + "\t" + coreValue + "\t" + CollectionUtilities.join(missing, ", "));
1027 
1028                 // Write missing paths (for >99% and specials
1029 
1030                 if (false) { // checkModernLocales.contains(locale)
1031                     CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance(locale);
1032 
1033                     for (String path : unconfirmed) {
1034                         Level level = coverageLevel2.getLevel(path);
1035                         if (level.compareTo(cldrLocaleLevelGoal) > 0) {
1036                             continue;
1037                         }
1038                         String line = spreadsheetLine(locale, script, language, cldrLocaleLevelGoal, level, "UNCONFIRMED", path, file.getStringValue(path));
1039                         if (SUPPRESS_PATHS_CAN_BE_EMPTY.get(path) != null) {
1040                             //System.out.println("\nSKIP: " + line);
1041                         } else {
1042                             tsv_missing.println(line);
1043                         }
1044                     }
1045                     for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) {
1046                         String path = entry.getValue();
1047                         Level level = coverageLevel2.getLevel(path);
1048                         if (level.compareTo(cldrLocaleLevelGoal) > 0) {
1049                             continue;
1050                         }
1051                         MissingStatus missingStatus = entry.getKey();
1052                         String line = spreadsheetLine(locale, script, language, cldrLocaleLevelGoal, level, missingStatus.toString(), path, "???");
1053                         if (SUPPRESS_PATHS_CAN_BE_EMPTY.get(path) != null) {
1054                             //System.out.println("\nSKIP: " + line);
1055                         } else {
1056                             tsv_missing.println(line);
1057                         }
1058                     }
1059                 }
1060 
1061                 localeCount++;
1062             } catch (Exception e) {
1063                 throw new IllegalArgumentException(e);
1064             }
1065         }
1066         pw.println(tablePrinter.toTable());
1067         out2.close();
1068 
1069         long end = System.currentTimeMillis();
1070         System.out.println((end - start) + " millis = "
1071             + ((end - start) / localeCount) + " millis/locale");
1072 
1073         //        CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance("en");
1074         //
1075         //        for (Entry<MissingStatus, Set<String>> entity : missingPaths.keyValuesSet()) {
1076         //            for (PathHeader s : CldrUtility.transform(entity.getValue(), pathHeaderFactory, new TreeSet<PathHeader>())) {
1077         //                System.out.println(entity.getKey() + "\t" + coverageLevel2.getLevel(s.getOriginalPath()) + "\t" + s
1078         //                        + "\t\t" + s.getOriginalPath());
1079         //            }
1080         //        }
1081     }
1082 // userInfo.getVoterInfo().getLevel().compareTo(VoteResolver.Level.tc)
1083     static final VoterInfo dummyVoterInfo = new VoterInfo(Organization.cldr, org.unicode.cldr.util.VoteResolver.Level.vetter, "somename");
1084 
1085     static final UserInfo dummyUserInfo = new UserInfo() {
1086         public VoterInfo getVoterInfo() {
1087             return dummyVoterInfo;
1088         }
1089     };
1090     static final PathValueInfo dummyPathValueInfo = new PathValueInfo() {
1091         // pathValueInfo.getCoverageLevel().compareTo(Level.COMPREHENSIVE)
1092         public Collection<? extends CandidateInfo> getValues() {
1093             throw new UnsupportedOperationException();
1094         }
1095         public CandidateInfo getCurrentItem() {
1096             throw new UnsupportedOperationException();
1097         }
1098         public String getLastReleaseValue() {
1099             throw new UnsupportedOperationException();
1100         }
1101         public Level getCoverageLevel() {
1102             return Level.MODERN;
1103         }
1104         public boolean hadVotesSometimeThisRelease() {
1105             throw new UnsupportedOperationException();
1106         }
1107     };
1108 
spreadsheetLine(String locale, String script, String language, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, String nativeValue)1109     public static String spreadsheetLine(String locale, String script, String language, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, String nativeValue) {
1110         String phString = "n/a\tn/a\tn/a\tn/a";
1111         String stLink = "n/a";
1112         String englishValue = "n/a";
1113         StatusAction action = null;
1114         SurveyToolStatus surveyToolStatus = null;
1115         try {
1116             PathHeader ph = pathHeaderFactory.fromPath(path);
1117             phString = ph.toString();
1118             surveyToolStatus = ph.getSurveyToolStatus();
1119             action = Phase.SUBMISSION.getShowRowAction(dummyPathValueInfo, InputMethod.DIRECT, surveyToolStatus, dummyUserInfo);
1120             stLink = URLS.forXpath(locale, ph.getOriginalPath());
1121             englishValue = ENGLISH.getStringValue(path);
1122         } catch (Exception e) {
1123         }
1124         String line = language
1125             + "\t" + ENGLISH.getName(language)
1126             + "\t" + ENGLISH.getName("script", script)
1127             + "\t" + englishValue
1128             + "\t" + nativeValue
1129             + "\t" + cldrLocaleLevelGoal
1130             + "\t" + itemLevel
1131             + "\t" + status
1132             + "\t" + (action == null ? "?" : action.toString())
1133             + "\t" + (surveyToolStatus == null ? "?" : surveyToolStatus.toString())
1134             + "\t" + stLink
1135             + "\t" + phString
1136             + "\t" + path
1137             ;
1138         return line;
1139     }
1140 
1141     private static CLDRURLS URLS = CLDRConfig.getInstance().urls();
1142 
1143 }
1144