1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.util.ArrayList; 7 import java.util.Arrays; 8 import java.util.Collection; 9 import java.util.Collections; 10 import java.util.EnumMap; 11 import java.util.EnumSet; 12 import java.util.HashMap; 13 import java.util.HashSet; 14 import java.util.Iterator; 15 import java.util.LinkedHashMap; 16 import java.util.LinkedHashSet; 17 import java.util.List; 18 import java.util.Locale; 19 import java.util.Map; 20 import java.util.Map.Entry; 21 import java.util.Set; 22 import java.util.TreeMap; 23 import java.util.TreeSet; 24 import java.util.regex.Matcher; 25 26 import org.unicode.cldr.draft.FileUtilities; 27 import org.unicode.cldr.test.CheckCLDR.InputMethod; 28 import org.unicode.cldr.test.CheckCLDR.Phase; 29 import org.unicode.cldr.test.CheckCLDR.StatusAction; 30 import org.unicode.cldr.test.CoverageLevel2; 31 import org.unicode.cldr.tool.FormattedFileWriter.Anchors; 32 import org.unicode.cldr.tool.Option.Options; 33 import org.unicode.cldr.util.Annotations; 34 import org.unicode.cldr.util.CLDRConfig; 35 import org.unicode.cldr.util.CLDRFile; 36 import org.unicode.cldr.util.CLDRFile.DraftStatus; 37 import org.unicode.cldr.util.CLDRFile.Status; 38 import org.unicode.cldr.util.CLDRInfo.CandidateInfo; 39 import org.unicode.cldr.util.CLDRInfo.PathValueInfo; 40 import org.unicode.cldr.util.CLDRInfo.UserInfo; 41 import org.unicode.cldr.util.CLDRLocale; 42 import org.unicode.cldr.util.CLDRPaths; 43 import org.unicode.cldr.util.CLDRURLS; 44 import org.unicode.cldr.util.CldrUtility; 45 import org.unicode.cldr.util.CoreCoverageInfo; 46 import org.unicode.cldr.util.CoreCoverageInfo.CoreItems; 47 import org.unicode.cldr.util.Counter; 48 import org.unicode.cldr.util.Counter2; 49 import org.unicode.cldr.util.CoverageInfo; 50 import org.unicode.cldr.util.DtdType; 51 import org.unicode.cldr.util.LanguageTagCanonicalizer; 52 import org.unicode.cldr.util.LanguageTagParser; 53 import org.unicode.cldr.util.Level; 54 import org.unicode.cldr.util.Organization; 55 import org.unicode.cldr.util.PathHeader; 56 import org.unicode.cldr.util.PathHeader.Factory; 57 import org.unicode.cldr.util.PathHeader.SurveyToolStatus; 58 import org.unicode.cldr.util.PatternCache; 59 import org.unicode.cldr.util.RegexLookup; 60 import org.unicode.cldr.util.RegexLookup.LookupType; 61 import org.unicode.cldr.util.SimpleFactory; 62 import org.unicode.cldr.util.StandardCodes; 63 import org.unicode.cldr.util.SupplementalDataInfo; 64 import org.unicode.cldr.util.VettingViewer; 65 import org.unicode.cldr.util.VettingViewer.MissingStatus; 66 import org.unicode.cldr.util.VoteResolver.VoterInfo; 67 68 import com.google.common.collect.ImmutableSet; 69 import com.google.common.collect.LinkedHashMultimap; 70 import com.google.common.collect.Multimap; 71 import com.google.common.collect.Ordering; 72 import com.ibm.icu.dev.util.CollectionUtilities; 73 import com.ibm.icu.dev.util.UnicodeMap; 74 import com.ibm.icu.impl.Relation; 75 import com.ibm.icu.lang.UCharacter; 76 import com.ibm.icu.text.NumberFormat; 77 import com.ibm.icu.text.UnicodeSet; 78 import com.ibm.icu.util.ICUUncheckedIOException; 79 80 public class ShowLocaleCoverage { 81 private static final String SPREADSHEET_MISSING = "#LCode\tEnglish Name\tScript\tEnglish Value\tNative Value\tCldr Target\tPath Level\tStatus\tAction\tSTStatus\tST Link\tSection\tPage\tHeader\tCode\tPath"; 82 private static final boolean DEBUG = false; 83 private static final char DEBUG_FILTER = 0; // use letter to only load locales starting with that letter 84 85 private static final String LATEST = ToolConstants.CHART_VERSION; 86 private static final double CORE_SIZE = CoreItems.values().length - CoreItems.ONLY_RECOMMENDED.size(); 87 public static CLDRConfig testInfo = ToolConfig.getToolInstance(); 88 private static final StandardCodes SC = testInfo.getStandardCodes(); 89 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = testInfo.getSupplementalDataInfo(); 90 private static final StandardCodes STANDARD_CODES = SC; 91 92 static org.unicode.cldr.util.Factory factory = testInfo.getCommonAndSeedAndMainAndAnnotationsFactory(); 93 private static final CLDRFile ENGLISH = factory.make("en", true); 94 95 private static UnicodeSet ENG_ANN = Annotations.getData("en").keySet(); 96 97 // added info using pattern in VettingViewer. 98 99 static final RegexLookup<Boolean> HACK = RegexLookup.<Boolean> of(LookupType.STANDARD, RegexLookup.RegexFinderTransformPath) 100 .add("//ldml/localeDisplayNames/keys/key[@type=\"(d0|em|fw|i0|k0|lw|m0|rg|s0|ss|t0|x0)\"]", true) 101 .add("//ldml/localeDisplayNames/types/type[@key=\"(em|fw|kr|lw|ss)\"].*", true) 102 .add("//ldml/localeDisplayNames/languages/language[@type=\".*_.*\"]", true) 103 .add("//ldml/localeDisplayNames/languages/language[@type=\".*\"][@alt=\".*\"]", true) 104 .add("//ldml/localeDisplayNames/territories/territory[@type=\".*\"][@alt=\".*\"]", true) 105 .add("//ldml/localeDisplayNames/territories/territory[@type=\"EZ\"]", true); 106 107 //private static final String OUT_DIRECTORY = CLDRPaths.GEN_DIRECTORY + "/coverage/"; // CldrUtility.MAIN_DIRECTORY; 108 109 final static Options myOptions = new Options(); 110 111 enum MyOptions { 112 filter(".+", ".*", "Filter the information based on id, using a regex argument."), 113 // draftStatus(".+", "unconfirmed", "Filter the information to a minimum draft status."), 114 chart(null, null, "chart only"), 115 growth("true", "true", "Compute growth data"), 116 organization(".+", null, "Only locales for organization"), 117 version(".+", 118 LATEST, "To get different versions"), 119 rawData(null, null, "Output the raw data from all coverage levels"), 120 targetDir(".*", 121 CLDRPaths.GEN_DIRECTORY + "/statistics/", "target output file."), 122 directories("(.*:)?[a-z]+(,[a-z]+)*", "common", 123 "Space-delimited list of main source directories: common,seed,exemplar.\n" + 124 "Optional, <baseDir>:common,seed"),; 125 126 // targetDirectory(".+", CldrUtility.CHART_DIRECTORY + "keyboards/", "The target directory."), 127 // layouts(null, null, "Only create html files for keyboard layouts"), 128 // repertoire(null, null, "Only create html files for repertoire"), ; 129 // boilerplate 130 final Option option; 131 MyOptions(String argumentPattern, String defaultArgument, String helpText)132 MyOptions(String argumentPattern, String defaultArgument, String helpText) { 133 option = myOptions.add(this, argumentPattern, defaultArgument, helpText); 134 } 135 } 136 137 static final RegexLookup<Boolean> SUPPRESS_PATHS_CAN_BE_EMPTY = new RegexLookup<Boolean>() 138 .add("\\[@alt=\"accounting\"]", true) 139 .add("\\[@alt=\"variant\"]", true) 140 .add("^//ldml/localeDisplayNames/territories/territory.*@alt=\"short", true) 141 .add("^//ldml/localeDisplayNames/languages/language.*_", true) 142 .add("^//ldml/numbers/currencies/currency.*/symbol", true) 143 .add("^//ldml/characters/exemplarCharacters", true); 144 145 static DraftStatus minimumDraftStatus = DraftStatus.unconfirmed; 146 static final Factory pathHeaderFactory = PathHeader.getFactory(ENGLISH); 147 148 static boolean RAW_DATA = true; 149 private static Set<String> COMMON_LOCALES; 150 main(String[] args)151 public static void main(String[] args) throws IOException { 152 myOptions.parse(MyOptions.filter, args, true); 153 154 if (MyOptions.chart.option.doesOccur()) { 155 showCoverage(null); 156 return; 157 } 158 159 Matcher matcher = PatternCache.get(MyOptions.filter.option.getValue()).matcher(""); 160 161 if (MyOptions.growth.option.doesOccur()) { 162 try (PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-growth.tsv")) { 163 doGrowth(matcher, out); 164 return; 165 } 166 } 167 168 Set<String> locales = null; 169 String organization = MyOptions.organization.option.getValue(); 170 boolean useOrgLevel = MyOptions.organization.option.doesOccur(); 171 if (useOrgLevel) { 172 locales = STANDARD_CODES.getLocaleCoverageLocales(organization); 173 } 174 175 if (MyOptions.version.option.doesOccur()) { 176 String number = MyOptions.version.option.getValue().trim(); 177 if (!number.contains(".")) { 178 number += ".0"; 179 } 180 factory = org.unicode.cldr.util.Factory.make( 181 CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + number + "/common/main/", ".*"); 182 } else { 183 if (MyOptions.directories.option.doesOccur()) { 184 String directories = MyOptions.directories.option.getValue().trim(); 185 CLDRConfig cldrConfig = CLDRConfig.getInstance(); 186 String base = null; 187 int colonPos = directories.indexOf(':'); 188 if (colonPos >= 0) { 189 base = directories.substring(0, colonPos).trim(); 190 directories = directories.substring(colonPos + 1).trim(); 191 } else { 192 base = cldrConfig.getCldrBaseDirectory().toString(); 193 } 194 String[] items = directories.split(",\\s*"); 195 File[] fullDirectories = new File[items.length]; 196 int i = 0; 197 for (String item : items) { 198 fullDirectories[i++] = new File(base + "/" + item + "/main"); 199 } 200 factory = SimpleFactory.make(fullDirectories, ".*"); 201 COMMON_LOCALES = SimpleFactory.make(base + "/" + "common" + "/main", ".*").getAvailableLanguages(); 202 } 203 } 204 fixCommonLocales(); 205 206 RAW_DATA = MyOptions.rawData.option.doesOccur(); 207 208 //showEnglish(); 209 210 showCoverage(null, matcher, locales, useOrgLevel); 211 } 212 fixCommonLocales()213 public static void fixCommonLocales() { 214 if (COMMON_LOCALES == null) { 215 COMMON_LOCALES = factory.getAvailableLanguages(); 216 } 217 } 218 doGrowth(Matcher matcher, PrintWriter out)219 private static void doGrowth(Matcher matcher, PrintWriter out) { 220 TreeMap<String, List<Double>> growthData = new TreeMap<>(Ordering.natural().reverse()); // sort by version, descending 221 // if (DEBUG) { 222 // for (String dir : new File(CLDRPaths.ARCHIVE_DIRECTORY).list()) { 223 // if (!dir.startsWith("cldr")) { 224 // continue; 225 // } 226 // String version = getNormalizedVersion(dir); 227 // if (version == null) { 228 // continue; 229 // } 230 // org.unicode.cldr.util.Factory newFactory = org.unicode.cldr.util.Factory.make( 231 // CLDRPaths.ARCHIVE_DIRECTORY + "/" + dir + "/common/main/", ".*"); 232 // System.out.println("Reading: " + version); 233 // Map<String, FoundAndTotal> currentData = addGrowth(newFactory, matcher); 234 // System.out.println("Read: " + version + "\t" + currentData); 235 // break; 236 // } 237 // } 238 Map<String, FoundAndTotal> latestData = addGrowth(factory, null, matcher, DEBUG); 239 addCompletionList(getYearFromVersion(LATEST, false), getCompletion(latestData, latestData), growthData); 240 if (DEBUG) System.out.println(latestData); 241 //System.out.println(growthData); 242 List<String> dirs = new ArrayList<>(Arrays.asList(new File(CLDRPaths.ARCHIVE_DIRECTORY).list())); 243 Collections.reverse(dirs); 244 for (String dir : dirs) { 245 if (!dir.startsWith("cldr")) { 246 continue; 247 } 248 String version = getNormalizedVersion(dir); 249 if (version == null) { 250 continue; 251 } 252 // if (version.compareTo("12") < 0) { 253 // continue; 254 // } 255 System.out.println("Reading: " + version); 256 if (version.equals("2008")) { 257 int debug = 0; 258 } 259 Map<String, FoundAndTotal> currentData = addGrowth(factory, dir, matcher, false); 260 System.out.println("Read: " + version + "\t" + currentData); 261 Counter2<String> completionData = getCompletion(latestData, currentData); 262 //System.out.println(version + "\t" + completionData); 263 addCompletionList(version, completionData, growthData); 264 if (DEBUG) System.out.println(currentData); 265 } 266 boolean first = true; 267 for (Entry<String, List<Double>> entry : growthData.entrySet()) { 268 if (first) { 269 for (int i = 0; i < entry.getValue().size(); ++i) { 270 out.print("\t" + i); 271 } 272 out.println(); 273 first = false; 274 } 275 out.println(entry.getKey() + "\t" + CollectionUtilities.join(entry.getValue(), "\t")); 276 } 277 } 278 279 static final Map<String, String> versionToYear = new HashMap<>(); 280 static { 281 int[][] mapping = { 282 { 34, 2018 }, 283 { 32, 2017 }, 284 { 30, 2016 }, 285 { 28, 2015 }, 286 { 26, 2014 }, 287 { 24, 2013 }, 288 { 22, 2012 }, 289 { 20, 2011 }, 290 { 19, 2010 }, 291 { 17, 2009 }, 292 { 16, 2008 }, 293 { 15, 2007 }, 294 { 14, 2006 }, 295 { 13, 2005 }, 296 { 12, 2004 }, 297 { 10, 2003 }, 298 }; 299 for (int[] row : mapping) { String.valueOf(row[0])300 versionToYear.put(String.valueOf(row[0]), String.valueOf(row[1])); 301 } 302 } 303 getNormalizedVersion(String dir)304 public static String getNormalizedVersion(String dir) { 305 String rawVersion = dir.substring(dir.indexOf('-') + 1); 306 int firstDot = rawVersion.indexOf('.'); 307 int secondDot = rawVersion.indexOf('.', firstDot + 1); 308 if (secondDot > 0) { 309 rawVersion = rawVersion.substring(0, firstDot) + rawVersion.substring(firstDot + 1, secondDot); 310 } else { 311 rawVersion = rawVersion.substring(0, firstDot); 312 } 313 String result = getYearFromVersion(rawVersion, true); 314 return result == null ? null : result.toString(); 315 } 316 getYearFromVersion(String version, boolean allowNull)317 private static String getYearFromVersion(String version, boolean allowNull) { 318 String result = versionToYear.get(version); 319 if (!allowNull && result == null) { 320 throw new IllegalArgumentException("No year for version: " + version); 321 } 322 return result; 323 } 324 addCompletionList(String version, Counter2<String> completionData, TreeMap<String, List<Double>> growthData)325 public static void addCompletionList(String version, Counter2<String> completionData, TreeMap<String, List<Double>> growthData) { 326 List<Double> x = new ArrayList<>(); 327 for (String key : completionData.getKeysetSortedByCount(false)) { 328 x.add(completionData.getCount(key)); 329 } 330 growthData.put(version, x); 331 System.out.println(version + "\t" + x.size()); 332 } 333 getCompletion(Map<String, FoundAndTotal> latestData, Map<String, FoundAndTotal> currentData)334 public static Counter2<String> getCompletion(Map<String, FoundAndTotal> latestData, Map<String, FoundAndTotal> currentData) { 335 Counter2<String> completionData = new Counter2<>(); 336 for (Entry<String, FoundAndTotal> entry : latestData.entrySet()) { 337 final String locale = entry.getKey(); 338 final FoundAndTotal currentRecord = currentData.get(locale); 339 if (currentRecord == null) { 340 continue; 341 } 342 double total = entry.getValue().total; 343 if (total == 0) { 344 continue; 345 } 346 double completion = currentRecord.found / total; 347 completionData.add(locale, completion); 348 } 349 return completionData; 350 } 351 352 static class FoundAndTotal { 353 final int found; 354 final int total; 355 FoundAndTotal(Counter<Level>.... counters)356 public FoundAndTotal(Counter<Level>... counters) { 357 final int[] count = { 0, 0, 0 }; 358 for (Level level : Level.values()) { 359 if (level == Level.COMPREHENSIVE || level == Level.OPTIONAL) { 360 continue; 361 } 362 int i = 0; 363 for (Counter<Level> counter : counters) { 364 count[i++] += counter.get(level); 365 } 366 } 367 found = count[0]; 368 total = found + count[1] + count[2]; 369 } 370 371 @Override toString()372 public String toString() { 373 return found + "/" + total; 374 } 375 } 376 addGrowth(org.unicode.cldr.util.Factory latestFactory, String dir, Matcher matcher, boolean showMissing)377 private static Map<String, FoundAndTotal> addGrowth(org.unicode.cldr.util.Factory latestFactory, String dir, Matcher matcher, boolean showMissing) { 378 org.unicode.cldr.util.Factory newFactory = dir == null ? factory 379 : org.unicode.cldr.util.Factory.make( 380 CLDRPaths.ARCHIVE_DIRECTORY + "/" + dir + "/common/main/", ".*"); 381 Map<String, FoundAndTotal> data = new HashMap<>(); 382 char c = 0; 383 Set<String> latestAvailable = newFactory.getAvailableLanguages(); 384 for (String locale : newFactory.getAvailableLanguages()) { 385 if (!matcher.reset(locale).matches()) { 386 continue; 387 } 388 if (!latestAvailable.contains(locale)) { 389 continue; 390 } 391 if (SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales().contains(locale) 392 || locale.equals("root") 393 || locale.equals("supplementalData")) { 394 continue; 395 } 396 char nc = locale.charAt(0); 397 if (nc != c) { 398 System.out.println("\t" + locale); 399 c = nc; 400 } 401 if (DEBUG_FILTER != 0 && DEBUG_FILTER != nc) { 402 continue; 403 } 404 CLDRFile latestFile = null; 405 try { 406 latestFile = latestFactory.make(locale, true); 407 } catch (Exception e2) { 408 continue; 409 } 410 final CLDRFile file = newFactory.make(locale, true); 411 // HACK check bogus 412 // Collection<String> extra = file.getExtraPaths(); 413 // 414 // final Iterable<String> fullIterable = file.fullIterable(); 415 // for (String path : fullIterable) { 416 // if (path.contains("\"one[@")) { 417 // boolean inside = extra.contains(path); 418 // Status status = new Status(); 419 // String loc = file.getSourceLocaleID(path, status ); 420 // int debug = 0; 421 // } 422 // } 423 // END HACK 424 Counter<Level> foundCounter = new Counter<Level>(); 425 Counter<Level> unconfirmedCounter = new Counter<Level>(); 426 Counter<Level> missingCounter = new Counter<Level>(); 427 Set<String> unconfirmedPaths = null; 428 Relation<MissingStatus, String> missingPaths = null; 429 unconfirmedPaths = new LinkedHashSet<>(); 430 missingPaths = Relation.of(new LinkedHashMap(), LinkedHashSet.class); 431 VettingViewer.getStatus(latestFile.fullIterable(), file, 432 pathHeaderFactory, foundCounter, unconfirmedCounter, 433 missingCounter, missingPaths, unconfirmedPaths); 434 435 // HACK 436 Set<Entry<MissingStatus, String>> missingRemovals = new HashSet<>(); 437 for (Entry<MissingStatus, String> e : missingPaths.keyValueSet()) { 438 if (e.getKey() == MissingStatus.ABSENT) { 439 final String path = e.getValue(); 440 if (HACK.get(path) != null) { 441 missingRemovals.add(e); 442 missingCounter.add(Level.MODERN, -1); 443 foundCounter.add(Level.MODERN, 1); 444 } else { 445 Status status = new Status(); 446 String loc = file.getSourceLocaleID(path, status); 447 int debug = 0; 448 } 449 } 450 } 451 for (Entry<MissingStatus, String> e : missingRemovals) { 452 missingPaths.remove(e.getKey(), e.getValue()); 453 } 454 // END HACK 455 456 if (showMissing) { 457 int count = 0; 458 for (String s : unconfirmedPaths) { 459 System.out.println(++count + "\t" + locale + "\tunconfirmed\t" + s); 460 } 461 for (Entry<MissingStatus, String> e : missingPaths.keyValueSet()) { 462 String path = e.getValue(); 463 Status status = new Status(); 464 String loc = file.getSourceLocaleID(path, status); 465 int debug = 0; 466 467 System.out.println(++count + "\t" + locale + "\t" + CldrUtility.toString(e)); 468 } 469 int debug = 0; 470 } 471 472 // add annotations 473 System.out.println(locale + " annotations"); 474 try { 475 UnicodeMap<Annotations> annotations = dir == null ? Annotations.getData(locale) 476 : Annotations.getData(CLDRPaths.ARCHIVE_DIRECTORY + "/" + dir + "/common/annotations/", locale); 477 for (String cp : ENG_ANN) { 478 Annotations annotation = annotations.get(cp); 479 if (annotation == null) { 480 missingCounter.add(Level.MODERN, 1); 481 } else if (annotation.getShortName() == null) { 482 missingCounter.add(Level.MODERN, 1); 483 } else { 484 foundCounter.add(Level.MODERN, 1); 485 } 486 } 487 } catch (Exception e1) { 488 missingCounter.add(Level.MODERN, ENG_ANN.size()); 489 } 490 491 data.put(locale, new FoundAndTotal(foundCounter, unconfirmedCounter, missingCounter)); 492 } 493 return Collections.unmodifiableMap(data); 494 } 495 showCoverage(Anchors anchors)496 public static void showCoverage(Anchors anchors) throws IOException { 497 showCoverage(anchors, PatternCache.get(".*").matcher(""), null, false); 498 } 499 showCoverage(Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel)500 public static void showCoverage(Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel) throws IOException { 501 final String title = "Locale Coverage"; 502 try (PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, anchors)); 503 PrintWriter tsv_summary = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-coverage.tsv"); 504 PrintWriter tsv_missing = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing.tsv"); 505 ){ 506 printData(pw, tsv_summary, tsv_missing, locales, matcher, useOrgLevel); 507 new ShowPlurals().appendBlanksForScrolling(pw); 508 } 509 } 510 511 // public static void showEnglish() { 512 // Map<PathHeader,String> sorted = new TreeMap<>(); 513 // CoverageInfo coverageInfo=CLDRConfig.getInstance().getCoverageInfo(); 514 // for (String path : ENGLISH) { 515 //// Level currentLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, "en"); 516 // Level currentLevel=coverageInfo.getCoverageLevel(path, "en"); 517 // if (currentLevel.compareTo(Level.MINIMAL) <= 0) { 518 // PathHeader ph = pathHeaderFactory.fromPath(path); 519 // sorted.put(ph, currentLevel + "\t" + ENGLISH.getStringValue(path)); 520 // } 521 // } 522 // for (Entry<PathHeader, String> entry : sorted.entrySet()) { 523 // System.out.println(entry.getKey() + "\t" + entry.getValue()); 524 // } 525 // } 526 527 static class IterableFilter implements Iterable<String> { 528 private Iterable<String> source; 529 IterableFilter(Iterable<String> source)530 IterableFilter(Iterable<String> source) { 531 this.source = source; 532 } 533 534 /** 535 * When some paths are defined after submission, we need to change them to COMPREHENSIVE in computing the vetting status. 536 */ 537 538 static final Set<String> SUPPRESS_PATHS_AFTER_SUBMISSION = ImmutableSet.of( 539 "//ldml/localeDisplayNames/languages/language[@type=\"ccp\"]", 540 "//ldml/localeDisplayNames/territories/territory[@type=\"XA\"]", 541 "//ldml/localeDisplayNames/territories/territory[@type=\"XB\"]", 542 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"G\"]", 543 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"y\"]", 544 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"G\"]", 545 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"M\"]", 546 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"y\"]", 547 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"d\"]", 548 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"G\"]", 549 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"M\"]", 550 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"y\"]", 551 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"d\"]", 552 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"G\"]", 553 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"M\"]", 554 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"y\"]", 555 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"G\"]", 556 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"M\"]", 557 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"y\"]", 558 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"d\"]", 559 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"G\"]", 560 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"M\"]", 561 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"y\"]", 562 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"d\"]", 563 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"G\"]", 564 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"M\"]", 565 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"y\"]", 566 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"G\"]", 567 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"y\"]", 568 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"G\"]", 569 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"M\"]", 570 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"y\"]", 571 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"d\"]", 572 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"G\"]", 573 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"M\"]", 574 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"y\"]", 575 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"d\"]", 576 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"G\"]", 577 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"M\"]", 578 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"y\"]", 579 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"G\"]", 580 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"M\"]", 581 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"y\"]", 582 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"d\"]", 583 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"G\"]", 584 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"M\"]", 585 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"y\"]", 586 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"d\"]", 587 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"G\"]", 588 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"M\"]", 589 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"y\"]" 590 ); 591 @Override iterator()592 public Iterator<String> iterator() { 593 return new IteratorFilter(source.iterator()); 594 } 595 596 static class IteratorFilter implements Iterator<String> { 597 Iterator<String> source; 598 String peek; 599 IteratorFilter(Iterator<String> source)600 public IteratorFilter(Iterator<String> source) { 601 this.source = source; 602 fillPeek(); 603 } 604 @Override hasNext()605 public boolean hasNext() { 606 return peek != null; 607 } 608 @Override next()609 public String next() { 610 String result = peek; 611 fillPeek(); 612 return result; 613 } 614 fillPeek()615 private void fillPeek() { 616 peek = null; 617 while (source.hasNext()) { 618 peek = source.next(); 619 // if it is ok to assess, then break 620 if (!SUPPRESS_PATHS_AFTER_SUBMISSION.contains(peek) 621 && SUPPRESS_PATHS_CAN_BE_EMPTY.get(peek) != Boolean.TRUE) { 622 break; 623 } 624 peek = null; 625 } 626 } 627 } 628 629 } printData(PrintWriter pw, PrintWriter tsv_summary, PrintWriter tsv_missing, Set<String> locales, Matcher matcher, boolean useOrgLevel)630 static void printData(PrintWriter pw, PrintWriter tsv_summary, PrintWriter tsv_missing, Set<String> locales, Matcher matcher, boolean useOrgLevel) { 631 // Set<String> checkModernLocales = STANDARD_CODES.getLocaleCoverageLocales("google", EnumSet.of(Level.MODERN)); 632 Set<String> checkModernLocales = STANDARD_CODES.getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN)); 633 Set<String> availableLanguages = new TreeSet<>(factory.getAvailableLanguages()); 634 availableLanguages.addAll(checkModernLocales); 635 Relation<String, String> languageToRegion = Relation.of(new TreeMap(), TreeSet.class); 636 LanguageTagParser ltp = new LanguageTagParser(); 637 LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(true); 638 for (String locale : factory.getAvailable()) { 639 String country = ltp.set(locale).getRegion(); 640 if (!country.isEmpty()) { 641 languageToRegion.put(ltc.transform(ltp.getLanguageScript()), country); 642 } 643 } 644 645 fixCommonLocales(); 646 647 System.out.println(CollectionUtilities.join(languageToRegion.keyValuesSet(), "\n")); 648 649 System.out.println("# Checking: " + availableLanguages); 650 651 pw.println("<p style='text-align: left'>This chart shows the coverage levels for this release. </p>" + 652 "<ol>" 653 + "<li>Fields = fields found at a modern level</li>" 654 + "<li>UC = unconfirmed values: typically treated as missing by implementations</li>" 655 + "<li>Miss = missing values</li>" 656 + "<li>Modern%, etc = fields/(fields + missing + unconfirmed) — at that level</li></ol>" 657 + "<li>Core Missing = missing core fields — optionals marked with *</li></ol>" 658 + "<p>A high-level summary of the meaning of the coverage values are at " + 659 "<a target='_blank' href='http://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels'>Coverage Levels</a>. " + 660 "The Core values are described on " + 661 "<a target='_blank' href='http://cldr.unicode.org/index/cldr-spec/minimaldata'>Core Data</a>." + 662 "</p>"); 663 664 Relation<MissingStatus, String> missingPaths = Relation.of(new EnumMap<MissingStatus, Set<String>>( 665 MissingStatus.class), TreeSet.class, CLDRFile.getComparator(DtdType.ldml)); 666 Set<String> unconfirmed = new TreeSet<String>(CLDRFile.getComparator(DtdType.ldml)); 667 668 //Map<String, String> likely = testInfo.getSupplementalDataInfo().getLikelySubtags(); 669 Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales(); 670 671 // Map<String,Counter<Level>> counts = new HashMap(); 672 // System.out.print("Script\tEnglish\tNative\tCode\tCode*"); 673 // for (Level level : Level.values()) { 674 // if (skipPrintingLevels.contains(level)) { 675 // continue; 676 // } 677 // System.out.print("\t≤" + level + " (f)\t(u)\t(m)"); 678 // } 679 // System.out.println(); 680 // Factory pathHeaderFactory = PathHeader.getFactory(testInfo.getCldrFactory().make("en", true)); 681 682 tsv_missing.println(SPREADSHEET_MISSING); 683 684 Counter<Level> foundCounter = new Counter<Level>(); 685 Counter<Level> unconfirmedCounter = new Counter<Level>(); 686 Counter<Level> missingCounter = new Counter<Level>(); 687 688 List<Level> reversedLevels = new ArrayList<>(EnumSet.allOf(Level.class)); 689 reversedLevels.remove(Level.COMPREHENSIVE); 690 reversedLevels.remove(Level.UNDETERMINED); 691 Collections.reverse(reversedLevels); 692 693 PrintWriter out2; 694 try { 695 out2 = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "showLocaleCoverage.tsv"); 696 } catch (IOException e1) { 697 throw new ICUUncheckedIOException(e1); 698 } 699 700 out2.print("Code\tCom?\tEnglish Name\tNative Name\tScript\tSublocales\tStrings"); 701 for (Level level : reversedLevels) { 702 out2.print("\t" + level + " %\t" + level + " UC%"); 703 } 704 out2.println(); 705 //System.out.println("\tCore*\nCore* Missing"); 706 int localeCount = 0; 707 708 final TablePrinter tablePrinter = new TablePrinter() 709 .addColumn("Direct.", "class='source'", null, "class='source'", true) 710 .setBreakSpans(true).setSpanRows(false) 711 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true).setBreakSpans(true) 712 .addColumn("English Name", "class='source'", null, "class='source'", true).setBreakSpans(true) 713 .addColumn("Native Name", "class='source'", null, "class='source'", true).setBreakSpans(true) 714 .addColumn("Script", "class='source'", null, "class='source'", true).setBreakSpans(true) 715 .addColumn("CLDR target", "class='source'", null, "class='source'", true).setBreakSpans(true).setSortPriority(0).setSortAscending(false) 716 .addColumn("Sublocales", "class='target'", null, "class='targetRight'", true).setBreakSpans(true) 717 .setCellPattern("{0,number}") 718 .addColumn("Fields", "class='target'", null, "class='targetRight'", true).setBreakSpans(true) 719 .setCellPattern("{0,number}") 720 .addColumn("UC", "class='target'", null, "class='targetRight'", true).setBreakSpans(true) 721 .setCellPattern("{0,number}") 722 .addColumn("Miss", "class='target'", null, "class='targetRight'", true).setBreakSpans(true) 723 .setCellPattern("{0,number}") 724 //.addColumn("Target Level", "class='target'", null, "class='target'", true).setBreakSpans(true) 725 ; 726 tsv_summary.println("Dir" 727 + "\tCode" 728 + "\tEnglish Name" 729 + "\tNative Name" 730 + "\tScript" 731 + "\tCLDR target" 732 + "\tSublocales" 733 + "\tFields\tUC\tMissing" 734 + "\tModern\tMiss +UC" 735 + "\tModerate\tMiss +UC" 736 + "\tBasic\tMiss +UC" 737 + "\tCore\tMiss +UC" 738 + "\tCore-Missing"); 739 NumberFormat tsvPercent = NumberFormat.getPercentInstance(Locale.ENGLISH); 740 tsvPercent.setMaximumFractionDigits(2); 741 742 for (Level level : reversedLevels) { 743 String titleLevel = level.toString(); 744 tablePrinter.addColumn(UCharacter.toTitleCase(titleLevel, null) + "%", "class='target'", null, "class='targetRight'", true) 745 .setCellPattern("{0,number,0.0%}") 746 .setBreakSpans(true); 747 switch(level) { 748 case CORE: 749 tablePrinter.setSortPriority(4).setSortAscending(false); 750 break; 751 case BASIC: 752 tablePrinter.setSortPriority(3).setSortAscending(false); 753 break; 754 case MODERATE: 755 tablePrinter.setSortPriority(2).setSortAscending(false); 756 break; 757 case MODERN: 758 tablePrinter.setSortPriority(1).setSortAscending(false); 759 break; 760 } 761 // tablePrinter 762 // .addColumn("∪ UC%", "class='target'", null, "class='targetRight'", true) 763 // .setCellPattern("{0,number,0.0%}") 764 // .setBreakSpans(true) 765 ; 766 } 767 tablePrinter.addColumn("Core Missing", "class='target'", null, "class='targetRight'", true) 768 .setBreakSpans(true); 769 770 long start = System.currentTimeMillis(); 771 LikelySubtags likelySubtags = new LikelySubtags(); 772 773 EnumMap<Level, Double> targetLevel = new EnumMap<>(Level.class); 774 targetLevel.put(Level.CORE, 2 / 100d); 775 targetLevel.put(Level.BASIC, 16 / 100d); 776 targetLevel.put(Level.MODERATE, 33 / 100d); 777 targetLevel.put(Level.MODERN, 100 / 100d); 778 779 // NumberFormat percentFormat = NumberFormat.getPercentInstance(ULocale.ENGLISH); 780 // percentFormat.setMaximumFractionDigits(2); 781 // percentFormat.setMinimumFractionDigits(2); 782 // NumberFormat intFormat = NumberFormat.getIntegerInstance(ULocale.ENGLISH); 783 784 int counter = 0; 785 for (String locale : availableLanguages) { 786 try { 787 if (locale.contains("supplemental")) { // for old versions 788 continue; 789 } 790 if (locales != null && !locales.contains(locale)) { 791 String base = CLDRLocale.getInstance(locale).getLanguage(); 792 if (!locales.contains(base)) { 793 continue; 794 } 795 } 796 if (!matcher.reset(locale).matches()) { 797 continue; 798 } 799 if (defaultContents.contains(locale) || "root".equals(locale) || "und".equals(locale)) { 800 continue; 801 } 802 803 boolean isSeed = new File(CLDRPaths.SEED_DIRECTORY, locale + ".xml").exists(); 804 805 //boolean capture = locale.equals("en"); 806 String region = ltp.set(locale).getRegion(); 807 if (!region.isEmpty()) continue; // skip regions 808 809 final Level cldrLocaleLevelGoal = SC.getLocaleCoverageLevel(Organization.cldr.toString(), locale); 810 final boolean cldrLevelGoalModerateOrAbove = cldrLocaleLevelGoal.compareTo(Level.MODERATE) >= 0; 811 812 String isCommonLocale = Level.MODERN == cldrLocaleLevelGoal ? "C*" 813 : COMMON_LOCALES.contains(locale) ? "C" 814 : ""; 815 816 String max = likelySubtags.maximize(locale); 817 String script = ltp.set(max).getScript(); 818 819 String language = likelySubtags.minimize(locale); 820 // Level otherLevel = STANDARD_CODES.getLocaleCoverageLevel("apple", locale); 821 // if (otherLevel.compareTo(currentLevel) > 0 822 // && otherLevel.compareTo(Level.MODERN) <= 0) { 823 // currentLevel = otherLevel; 824 // } 825 826 missingPaths.clear(); 827 unconfirmed.clear(); 828 829 final CLDRFile file = factory.make(locale, true, minimumDraftStatus); 830 831 if (locale.equals("af")) { 832 int debug = 0; 833 } 834 835 Iterable<String> pathSource = new IterableFilter(file.fullIterable()); 836 837 VettingViewer.getStatus(pathSource, file, 838 pathHeaderFactory, foundCounter, unconfirmedCounter, 839 missingCounter, missingPaths, unconfirmed); 840 841 Set<String> sublocales = languageToRegion.get(language); 842 if (sublocales == null) { 843 //System.err.println("No Sublocales: " + language); 844 sublocales = Collections.EMPTY_SET; 845 } 846 847 // List s = Lists.newArrayList(file.fullIterable()); 848 849 String seedString = isSeed ? "seed" : "common"; 850 tablePrinter.addRow() 851 .addCell(seedString) 852 .addCell(language) 853 .addCell(ENGLISH.getName(language)) 854 .addCell(file.getName(language)) 855 .addCell(script) 856 .addCell(cldrLocaleLevelGoal) 857 .addCell(sublocales.size()); 858 859 tsv_summary 860 .append(seedString) 861 .append('\t').append(language) 862 .append('\t').append(ENGLISH.getName(language)) 863 .append('\t').append(file.getName(language)) 864 .append('\t').append(script) 865 .append('\t').append(cldrLocaleLevelGoal.toString()) 866 .append('\t').append(sublocales.size()+""); 867 ; 868 869 // String header = language 870 // + "\t" + isCommonLocale 871 // + "\t" + ENGLISH.getName(language) 872 // + "\t" + file.getName(language) 873 // + "\t" + script 874 // + "\t" + sublocales.size() 875 // //+ "\t" + currentLevel 876 // ; 877 878 int sumFound = 0; 879 int sumMissing = 0; 880 int sumUnconfirmed = 0; 881 882 // get the totals 883 884 EnumMap<Level, Integer> totals = new EnumMap<>(Level.class); 885 EnumMap<Level, Integer> confirmed = new EnumMap<>(Level.class); 886 // EnumMap<Level, Integer> unconfirmedByLevel = new EnumMap<>(Level.class); 887 Set<String> coreMissing = new LinkedHashSet<>(); 888 889 if (locale.equals("af")) { 890 int debug = 0; 891 } 892 893 { // CORE 894 long missingExemplarCount = missingCounter.get(Level.CORE); 895 if (missingExemplarCount > 0) { 896 for (Entry<MissingStatus, String> statusAndPath : missingPaths.entrySet()) { 897 String path = statusAndPath.getValue(); 898 if (path.startsWith("//ldml/characters/exemplarCharacters")) { 899 PathHeader ph = pathHeaderFactory.fromPath(path); 900 String problem = ph.getCode().replaceAll("Others: ","").replaceAll("Main Letters", "main-letters"); 901 coreMissing.add(problem); 902 // String line = spreadsheetLine(locale, script, language, cldrLevelGoal, foundLevel, missingStatus.toString(), path, file.getStringValue(path)); 903 String line = spreadsheetLine(locale, script, language, cldrLocaleLevelGoal, Level.CORE, "ABSENT", path, "«No " + problem + "»"); 904 tsv_missing.println(line); 905 } 906 } 907 } 908 Multimap<CoreItems, String> detailedErrors = LinkedHashMultimap.create(); 909 Set<CoreItems> coverage = new TreeSet<>( 910 CoreCoverageInfo.getCoreCoverageInfo(file, detailedErrors)); 911 Set<CoreItems> missing = EnumSet.allOf(CoreItems.class); 912 missing.removeAll(coverage); 913 for (Entry<CoreItems, String> entry : detailedErrors.entries()) { 914 CoreItems coreItem = entry.getKey(); 915 String value = entry.getValue(); 916 coreMissing.add(coreItem.toString()); 917 //String line = spreadsheetLine(language, script, "n/a", detailedErrors.get(entry).toString(), level, "ABSENT", "n/a", "n/a", "n/a"); 918 if (cldrLevelGoalModerateOrAbove) { 919 String line = spreadsheetLine(locale, script, language, cldrLocaleLevelGoal, coreItem.desiredLevel, "ABSENT", value, "«No " + coreItem + "»"); 920 tsv_missing.println(line); 921 } 922 } 923 missing.removeAll(CoreItems.ONLY_RECOMMENDED); 924 foundCounter.add(Level.CORE, coverage.size()); 925 missingCounter.add(Level.CORE, missing.size()); 926 927 // sumFound += coverage.size(); 928 // sumMissing += missing.size(); 929 930 // confirmed.put(Level.CORE, (int) coverage.size()); 931 //// unconfirmedByLevel.put(level, (int)(foundCount + unconfirmedCount)); 932 // totals.put(Level.CORE, (int)(coverage.size() + missing.size())); 933 934 } 935 936 if (cldrLevelGoalModerateOrAbove) { 937 for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) { 938 String path = entry.getValue(); 939 // if (SKIP_PATHS.get(path) == null) { 940 MissingStatus missingStatus = entry.getKey(); 941 CoverageInfo coverageInfo = new CoverageInfo(SUPPLEMENTAL_DATA_INFO); 942 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 943 if (cldrLocaleLevelGoal.compareTo(foundLevel) >= 0) { 944 String line = spreadsheetLine(locale, script, language, cldrLocaleLevelGoal, foundLevel, missingStatus.toString(), path, file.getStringValue(path)); 945 tsv_missing.println(line); 946 } 947 } 948 } 949 950 for (Level level : reversedLevels) { 951 long foundCount = foundCounter.get(level); 952 long unconfirmedCount = unconfirmedCounter.get(level); 953 long missingCount = missingCounter.get(level); 954 955 sumFound += foundCount; 956 sumUnconfirmed += unconfirmedCount; 957 sumMissing += missingCount; 958 959 confirmed.put(level, (int) foundCount); 960 // unconfirmedByLevel.put(level, (int)(foundCount + unconfirmedCount)); 961 totals.put(level, (int)(foundCount + unconfirmedCount + missingCount)); 962 } 963 964 tsv_missing.flush(); 965 966 double modernTotal = totals.get(Level.MODERN); 967 968 tablePrinter 969 .addCell(sumFound) 970 .addCell(sumUnconfirmed) 971 .addCell(sumMissing) 972 ; 973 974 tsv_summary 975 .append('\t').append(sumFound+"") 976 .append('\t').append(sumUnconfirmed+"") 977 .append('\t').append(sumMissing+"") 978 ; 979 980 981 // header += "\t" + sumFound; 982 // header += "\t" + (sumFound + sumUnconfirmed); 983 984 // print the totals 985 986 for (Level level : reversedLevels) { 987 if (useOrgLevel && cldrLocaleLevelGoal != level) { 988 continue; 989 } 990 int confirmedCoverage = confirmed.get(level); 991 // int unconfirmedCoverage = unconfirmedByLevel.get(level); 992 double total = totals.get(level); 993 994 tablePrinter 995 .addCell(confirmedCoverage / total) 996 // .addCell(unconfirmedCoverage / total) 997 ; 998 999 tsv_summary 1000 .append('\t').append(String.valueOf(confirmedCoverage)) 1001 .append('\t').append(String.valueOf((int)total - confirmedCoverage)) 1002 ; 1003 1004 // if (RAW_DATA) { 1005 // header += "\t" + confirmedCoverage / total 1006 // + "\t" + unconfirmedCoverage / total; 1007 // } else { 1008 // Double factor = targetLevel.get(level) / (total / modernTotal); 1009 // header += "\t" + factor * confirmedCoverage / modernTotal 1010 //// + "\t" + factor * unconfirmedCoverage / modernTotal 1011 // ; 1012 // } 1013 } 1014 String coreMissingString = 1015 CollectionUtilities.join(coreMissing, ", "); 1016 1017 tablePrinter 1018 .addCell(coreMissingString) 1019 .finishRow(); 1020 1021 tsv_summary 1022 .append('\t') 1023 .append(coreMissingString) 1024 .append('\n'); 1025 1026 //out2.println(header + "\t" + coreValue + "\t" + CollectionUtilities.join(missing, ", ")); 1027 1028 // Write missing paths (for >99% and specials 1029 1030 if (false) { // checkModernLocales.contains(locale) 1031 CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance(locale); 1032 1033 for (String path : unconfirmed) { 1034 Level level = coverageLevel2.getLevel(path); 1035 if (level.compareTo(cldrLocaleLevelGoal) > 0) { 1036 continue; 1037 } 1038 String line = spreadsheetLine(locale, script, language, cldrLocaleLevelGoal, level, "UNCONFIRMED", path, file.getStringValue(path)); 1039 if (SUPPRESS_PATHS_CAN_BE_EMPTY.get(path) != null) { 1040 //System.out.println("\nSKIP: " + line); 1041 } else { 1042 tsv_missing.println(line); 1043 } 1044 } 1045 for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) { 1046 String path = entry.getValue(); 1047 Level level = coverageLevel2.getLevel(path); 1048 if (level.compareTo(cldrLocaleLevelGoal) > 0) { 1049 continue; 1050 } 1051 MissingStatus missingStatus = entry.getKey(); 1052 String line = spreadsheetLine(locale, script, language, cldrLocaleLevelGoal, level, missingStatus.toString(), path, "???"); 1053 if (SUPPRESS_PATHS_CAN_BE_EMPTY.get(path) != null) { 1054 //System.out.println("\nSKIP: " + line); 1055 } else { 1056 tsv_missing.println(line); 1057 } 1058 } 1059 } 1060 1061 localeCount++; 1062 } catch (Exception e) { 1063 throw new IllegalArgumentException(e); 1064 } 1065 } 1066 pw.println(tablePrinter.toTable()); 1067 out2.close(); 1068 1069 long end = System.currentTimeMillis(); 1070 System.out.println((end - start) + " millis = " 1071 + ((end - start) / localeCount) + " millis/locale"); 1072 1073 // CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance("en"); 1074 // 1075 // for (Entry<MissingStatus, Set<String>> entity : missingPaths.keyValuesSet()) { 1076 // for (PathHeader s : CldrUtility.transform(entity.getValue(), pathHeaderFactory, new TreeSet<PathHeader>())) { 1077 // System.out.println(entity.getKey() + "\t" + coverageLevel2.getLevel(s.getOriginalPath()) + "\t" + s 1078 // + "\t\t" + s.getOriginalPath()); 1079 // } 1080 // } 1081 } 1082 // userInfo.getVoterInfo().getLevel().compareTo(VoteResolver.Level.tc) 1083 static final VoterInfo dummyVoterInfo = new VoterInfo(Organization.cldr, org.unicode.cldr.util.VoteResolver.Level.vetter, "somename"); 1084 1085 static final UserInfo dummyUserInfo = new UserInfo() { 1086 public VoterInfo getVoterInfo() { 1087 return dummyVoterInfo; 1088 } 1089 }; 1090 static final PathValueInfo dummyPathValueInfo = new PathValueInfo() { 1091 // pathValueInfo.getCoverageLevel().compareTo(Level.COMPREHENSIVE) 1092 public Collection<? extends CandidateInfo> getValues() { 1093 throw new UnsupportedOperationException(); 1094 } 1095 public CandidateInfo getCurrentItem() { 1096 throw new UnsupportedOperationException(); 1097 } 1098 public String getLastReleaseValue() { 1099 throw new UnsupportedOperationException(); 1100 } 1101 public Level getCoverageLevel() { 1102 return Level.MODERN; 1103 } 1104 public boolean hadVotesSometimeThisRelease() { 1105 throw new UnsupportedOperationException(); 1106 } 1107 }; 1108 spreadsheetLine(String locale, String script, String language, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, String nativeValue)1109 public static String spreadsheetLine(String locale, String script, String language, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, String nativeValue) { 1110 String phString = "n/a\tn/a\tn/a\tn/a"; 1111 String stLink = "n/a"; 1112 String englishValue = "n/a"; 1113 StatusAction action = null; 1114 SurveyToolStatus surveyToolStatus = null; 1115 try { 1116 PathHeader ph = pathHeaderFactory.fromPath(path); 1117 phString = ph.toString(); 1118 surveyToolStatus = ph.getSurveyToolStatus(); 1119 action = Phase.SUBMISSION.getShowRowAction(dummyPathValueInfo, InputMethod.DIRECT, surveyToolStatus, dummyUserInfo); 1120 stLink = URLS.forXpath(locale, ph.getOriginalPath()); 1121 englishValue = ENGLISH.getStringValue(path); 1122 } catch (Exception e) { 1123 } 1124 String line = language 1125 + "\t" + ENGLISH.getName(language) 1126 + "\t" + ENGLISH.getName("script", script) 1127 + "\t" + englishValue 1128 + "\t" + nativeValue 1129 + "\t" + cldrLocaleLevelGoal 1130 + "\t" + itemLevel 1131 + "\t" + status 1132 + "\t" + (action == null ? "?" : action.toString()) 1133 + "\t" + (surveyToolStatus == null ? "?" : surveyToolStatus.toString()) 1134 + "\t" + stLink 1135 + "\t" + phString 1136 + "\t" + path 1137 ; 1138 return line; 1139 } 1140 1141 private static CLDRURLS URLS = CLDRConfig.getInstance().urls(); 1142 1143 } 1144