1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.util.ArrayList; 7 import java.util.Collection; 8 import java.util.Collections; 9 import java.util.EnumMap; 10 import java.util.EnumSet; 11 import java.util.HashMap; 12 import java.util.HashSet; 13 import java.util.Iterator; 14 import java.util.LinkedHashMap; 15 import java.util.LinkedHashSet; 16 import java.util.List; 17 import java.util.Locale; 18 import java.util.Map; 19 import java.util.Map.Entry; 20 import java.util.Set; 21 import java.util.TreeMap; 22 import java.util.TreeSet; 23 import java.util.regex.Matcher; 24 25 import org.unicode.cldr.draft.FileUtilities; 26 import org.unicode.cldr.test.CheckCLDR.InputMethod; 27 import org.unicode.cldr.test.CheckCLDR.Phase; 28 import org.unicode.cldr.test.CheckCLDR.StatusAction; 29 import org.unicode.cldr.tool.FormattedFileWriter.Anchors; 30 import org.unicode.cldr.tool.Option.Options; 31 import org.unicode.cldr.util.Annotations; 32 import org.unicode.cldr.util.CLDRConfig; 33 import org.unicode.cldr.util.CLDRFile; 34 import org.unicode.cldr.util.CLDRFile.DraftStatus; 35 import org.unicode.cldr.util.CLDRFile.Status; 36 import org.unicode.cldr.util.CLDRInfo.CandidateInfo; 37 import org.unicode.cldr.util.CLDRInfo.PathValueInfo; 38 import org.unicode.cldr.util.CLDRInfo.UserInfo; 39 import org.unicode.cldr.util.CLDRLocale; 40 import org.unicode.cldr.util.CLDRPaths; 41 import org.unicode.cldr.util.CLDRURLS; 42 import org.unicode.cldr.util.CldrUtility; 43 import org.unicode.cldr.util.CoreCoverageInfo; 44 import org.unicode.cldr.util.CoreCoverageInfo.CoreItems; 45 import org.unicode.cldr.util.Counter; 46 import org.unicode.cldr.util.Counter2; 47 import org.unicode.cldr.util.CoverageInfo; 48 import org.unicode.cldr.util.DtdType; 49 import org.unicode.cldr.util.LanguageTagCanonicalizer; 50 import org.unicode.cldr.util.LanguageTagParser; 51 import org.unicode.cldr.util.Level; 52 import org.unicode.cldr.util.Organization; 53 import org.unicode.cldr.util.PathHeader; 54 import org.unicode.cldr.util.PathHeader.Factory; 55 import org.unicode.cldr.util.PathHeader.SurveyToolStatus; 56 import org.unicode.cldr.util.PathStarrer; 57 import org.unicode.cldr.util.PatternCache; 58 import org.unicode.cldr.util.RegexLookup; 59 import org.unicode.cldr.util.RegexLookup.LookupType; 60 import org.unicode.cldr.util.SimpleFactory; 61 import org.unicode.cldr.util.StandardCodes; 62 import org.unicode.cldr.util.StringId; 63 import org.unicode.cldr.util.SupplementalDataInfo; 64 import org.unicode.cldr.util.VettingViewer; 65 import org.unicode.cldr.util.VettingViewer.MissingStatus; 66 import org.unicode.cldr.util.VoteResolver.VoterInfo; 67 import org.unicode.cldr.util.XPathParts; 68 69 import com.google.common.base.Joiner; 70 import com.google.common.collect.HashMultimap; 71 import com.google.common.collect.ImmutableList; 72 import com.google.common.collect.ImmutableSet; 73 import com.google.common.collect.LinkedHashMultimap; 74 import com.google.common.collect.Multimap; 75 import com.google.common.collect.Ordering; 76 import com.google.common.collect.TreeMultimap; 77 import com.ibm.icu.impl.Relation; 78 import com.ibm.icu.impl.Row.R2; 79 import com.ibm.icu.lang.UCharacter; 80 import com.ibm.icu.text.NumberFormat; 81 import com.ibm.icu.text.UnicodeSet; 82 import com.ibm.icu.util.ULocale; 83 import com.ibm.icu.util.VersionInfo; 84 85 public class ShowLocaleCoverage { 86 private static final String VXML_CONSTANT = CLDRPaths.AUX_DIRECTORY + "voting/" + CLDRFile.GEN_VERSION + "/vxml/common/"; 87 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 88 private static final String TSV_MISSING_SUMMARY_HEADER = 89 "#Path Level" 90 + "\t#Locales" 91 + "\tLocales" 92 + "\tSection" 93 + "\tPage" 94 + "\tHeader" 95 + "\tCode" 96 ; 97 private static final String TSV_LOCALE_COVERAGE_HEADER = 98 "#Dir" 99 + "\tCode" 100 + "\tEnglish Name" 101 + "\tNative Name" 102 + "\tScript" 103 + "\tCLDR Target" 104 + "\tSublocales" 105 + "\tFields\tUC\tMissing" 106 + "\tModern\tMiss +UC" 107 + "\tModerate\tMiss +UC" 108 + "\tBasic\tMiss +UC" 109 + "\tCore\tMiss +UC" 110 + "\tCore-Missing"; 111 112 private static final String TSV_MISSING_HEADER = 113 "#LCode" 114 + "\tEnglish Name" 115 + "\tScript" 116 // + "\tEnglish Value" 117 // + "\tNative Value" 118 + "\tLocale Level" 119 + "\tPath Level" 120 // + "\tStatus" 121 // + "\tAction" 122 + "\tSTStatus" 123 + "\tBailey" 124 + "\tVxml" 125 + "\tVStatus" 126 // + "\tST Link" 127 + "\tSection" 128 + "\tPage" 129 + "\tHeader" 130 + "\tCode" 131 + "\tST Link" 132 + "\tConfig Action" 133 ; 134 private static final String TSV_MISSING_BASIC_HEADER = ""; 135 136 private static final boolean DEBUG = true; 137 private static final char DEBUG_FILTER = 0; // use letter to only load locales starting with that letter 138 139 private static final String LATEST = ToolConstants.CHART_VERSION; 140 private static final double CORE_SIZE = CoreItems.values().length - CoreItems.ONLY_RECOMMENDED.size(); 141 public static CLDRConfig testInfo = ToolConfig.getToolInstance(); 142 private static final StandardCodes SC = testInfo.getStandardCodes(); 143 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = testInfo.getSupplementalDataInfo(); 144 private static final StandardCodes STANDARD_CODES = SC; 145 146 static org.unicode.cldr.util.Factory factory = testInfo.getCommonAndSeedAndMainAndAnnotationsFactory(); 147 private static final CLDRFile ENGLISH = factory.make("en", true); 148 149 private static UnicodeSet ENG_ANN = Annotations.getData("en").keySet(); 150 151 // added info using pattern in VettingViewer. 152 153 static final RegexLookup<Boolean> HACK = RegexLookup.<Boolean> of(LookupType.STANDARD, RegexLookup.RegexFinderTransformPath) 154 .add("//ldml/localeDisplayNames/keys/key[@type=\"(d0|em|fw|i0|k0|lw|m0|rg|s0|ss|t0|x0)\"]", true) 155 .add("//ldml/localeDisplayNames/types/type[@key=\"(em|fw|kr|lw|ss)\"].*", true) 156 .add("//ldml/localeDisplayNames/languages/language[@type=\".*_.*\"]", true) 157 .add("//ldml/localeDisplayNames/languages/language[@type=\".*\"][@alt=\".*\"]", true) 158 .add("//ldml/localeDisplayNames/territories/territory[@type=\".*\"][@alt=\".*\"]", true) 159 .add("//ldml/localeDisplayNames/territories/territory[@type=\"EZ\"]", true); 160 161 //private static final String OUT_DIRECTORY = CLDRPaths.GEN_DIRECTORY + "/coverage/"; // CldrUtility.MAIN_DIRECTORY; 162 163 final static Options myOptions = new Options(); 164 165 enum MyOptions { 166 filter(".+", ".*", "Filter the information based on id, using a regex argument."), 167 // draftStatus(".+", "unconfirmed", "Filter the information to a minimum draft status."), 168 chart(null, null, "chart only"), 169 growth("true", "true", "Compute growth data"), 170 organization(".+", null, "Only locales for organization"), 171 version(".+", 172 LATEST, "To get different versions"), 173 rawData(null, null, "Output the raw data from all coverage levels"), 174 targetDir(".*", 175 CLDRPaths.GEN_DIRECTORY + "/statistics/", "target output file."), 176 directories("(.*:)?[a-z]+(,[a-z]+)*", "common", 177 "Space-delimited list of main source directories: common,seed,exemplar.\n" + 178 "Optional, <baseDir>:common,seed"),; 179 180 // targetDirectory(".+", CldrUtility.CHART_DIRECTORY + "keyboards/", "The target directory."), 181 // layouts(null, null, "Only create html files for keyboard layouts"), 182 // repertoire(null, null, "Only create html files for repertoire"), ; 183 // boilerplate 184 final Option option; 185 MyOptions(String argumentPattern, String defaultArgument, String helpText)186 MyOptions(String argumentPattern, String defaultArgument, String helpText) { 187 option = myOptions.add(this, argumentPattern, defaultArgument, helpText); 188 } 189 } 190 191 static final RegexLookup<Boolean> SUPPRESS_PATHS_CAN_BE_EMPTY = new RegexLookup<Boolean>() 192 .add("\\[@alt=\"accounting\"]", true) 193 .add("\\[@alt=\"variant\"]", true) 194 .add("^//ldml/localeDisplayNames/territories/territory.*@alt=\"short", true) 195 .add("^//ldml/localeDisplayNames/languages/language.*_", true) 196 .add("^//ldml/numbers/currencies/currency.*/symbol", true) 197 .add("^//ldml/characters/exemplarCharacters", true); 198 199 static DraftStatus minimumDraftStatus = DraftStatus.unconfirmed; 200 static final Factory pathHeaderFactory = PathHeader.getFactory(ENGLISH); 201 202 static boolean RAW_DATA = true; 203 private static Set<String> COMMON_LOCALES; 204 main(String[] args)205 public static void main(String[] args) throws IOException { 206 myOptions.parse(MyOptions.filter, args, true); 207 208 Matcher matcher = PatternCache.get(MyOptions.filter.option.getValue()).matcher(""); 209 210 if (MyOptions.chart.option.doesOccur()) { 211 showCoverage(null, matcher); 212 return; 213 } 214 215 216 if (MyOptions.growth.option.doesOccur()) { 217 try (PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-growth.tsv")) { 218 doGrowth(matcher, out); 219 return; 220 } 221 } 222 223 Set<String> locales = null; 224 String organization = MyOptions.organization.option.getValue(); 225 boolean useOrgLevel = MyOptions.organization.option.doesOccur(); 226 if (useOrgLevel) { 227 locales = STANDARD_CODES.getLocaleCoverageLocales(organization); 228 } 229 230 if (MyOptions.version.option.doesOccur()) { 231 String number = MyOptions.version.option.getValue().trim(); 232 if (!number.contains(".")) { 233 number += ".0"; 234 } 235 factory = org.unicode.cldr.util.Factory.make( 236 CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + number + "/common/main/", ".*"); 237 } else { 238 if (MyOptions.directories.option.doesOccur()) { 239 String directories = MyOptions.directories.option.getValue().trim(); 240 CLDRConfig cldrConfig = CONFIG; 241 String base = null; 242 int colonPos = directories.indexOf(':'); 243 if (colonPos >= 0) { 244 base = directories.substring(0, colonPos).trim(); 245 directories = directories.substring(colonPos + 1).trim(); 246 } else { 247 base = cldrConfig.getCldrBaseDirectory().toString(); 248 } 249 String[] items = directories.split(",\\s*"); 250 File[] fullDirectories = new File[items.length]; 251 int i = 0; 252 for (String item : items) { 253 fullDirectories[i++] = new File(base + "/" + item + "/main"); 254 } 255 factory = SimpleFactory.make(fullDirectories, ".*"); 256 COMMON_LOCALES = SimpleFactory.make(base + "/" + "common" + "/main", ".*").getAvailableLanguages(); 257 } 258 } 259 fixCommonLocales(); 260 261 RAW_DATA = MyOptions.rawData.option.doesOccur(); 262 263 //showEnglish(); 264 265 showCoverage(null, matcher, locales, useOrgLevel); 266 } 267 fixCommonLocales()268 public static void fixCommonLocales() { 269 if (COMMON_LOCALES == null) { 270 COMMON_LOCALES = factory.getAvailableLanguages(); 271 } 272 } 273 doGrowth(Matcher matcher, PrintWriter out)274 private static void doGrowth(Matcher matcher, PrintWriter out) { 275 TreeMap<String, List<Double>> growthData = new TreeMap<>(Ordering.natural().reverse()); // sort by version, descending 276 // if (DEBUG) { 277 // for (String dir : new File(CLDRPaths.ARCHIVE_DIRECTORY).list()) { 278 // if (!dir.startsWith("cldr")) { 279 // continue; 280 // } 281 // String version = getNormalizedVersion(dir); 282 // if (version == null) { 283 // continue; 284 // } 285 // org.unicode.cldr.util.Factory newFactory = org.unicode.cldr.util.Factory.make( 286 // CLDRPaths.ARCHIVE_DIRECTORY + "/" + dir + "/common/main/", ".*"); 287 // System.out.println("Reading: " + version); 288 // Map<String, FoundAndTotal> currentData = addGrowth(newFactory, matcher); 289 // System.out.println("Read: " + version + "\t" + currentData); 290 // break; 291 // } 292 // } 293 Map<String, FoundAndTotal> latestData = null; 294 for (ReleaseInfo versionNormalizedVersionAndYear : versionToYear) { 295 VersionInfo version = versionNormalizedVersionAndYear.version; 296 int year = versionNormalizedVersionAndYear.year; 297 String dir = ToolConstants.getBaseDirectory(version.getVersionString(2, 3)); 298 Map<String, FoundAndTotal> currentData = addGrowth(factory, dir, matcher, false); 299 System.out.println("year: " + year + "; version: " + version + "; size: " + currentData); 300 if (latestData == null) { 301 latestData = currentData; 302 } 303 Counter2<String> completionData = getCompletion(latestData, currentData); 304 addCompletionList(year+"", completionData, growthData); 305 if (DEBUG) System.out.println(currentData); 306 } 307 // Map<String, FoundAndTotal> latestData = addGrowth(factory, null, matcher, false); 308 // addCompletionList(getYearFromVersion(LATEST, false), getCompletion(latestData, latestData), growthData); 309 // if (DEBUG) System.out.println(latestData); 310 // //System.out.println(growthData); 311 // List<String> dirs = new ArrayList<>(Arrays.asList(new File(CLDRPaths.ARCHIVE_DIRECTORY).list())); 312 // Collections.reverse(dirs); 313 // for (String dir : dirs) { 314 // if (!dir.startsWith("cldr")) { 315 // continue; 316 // } 317 // String version = getNormalizedVersion(dir); 318 // if (version == null) { 319 // continue; 320 // } 321 //// if (version.compareTo("12") < 0) { 322 //// continue; 323 //// } 324 // System.out.println("Reading: " + version); 325 // if (version.equals("2008")) { 326 // int debug = 0; 327 // } 328 // Map<String, FoundAndTotal> currentData = addGrowth(factory, dir, matcher, false); 329 // System.out.println("Read: " + version + "\t" + currentData); 330 // Counter2<String> completionData = getCompletion(latestData, currentData); 331 // //System.out.println(version + "\t" + completionData); 332 // addCompletionList(version, completionData, growthData); 333 // if (DEBUG) System.out.println(currentData); 334 // } 335 boolean first = true; 336 for (Entry<String, List<Double>> entry : growthData.entrySet()) { 337 if (first) { 338 for (int i = 0; i < entry.getValue().size(); ++i) { 339 out.print("\t" + i); 340 } 341 out.println(); 342 first = false; 343 } 344 out.println(entry.getKey() + "\t" + Joiner.on("\t").join(entry.getValue())); 345 } 346 } 347 348 static final class ReleaseInfo { ReleaseInfo(VersionInfo versionInfo, int year)349 public ReleaseInfo(VersionInfo versionInfo, int year) { 350 this.version = versionInfo; 351 this.year = year; 352 } 353 VersionInfo version; 354 int year; 355 } 356 357 // TODO merge this into ToolConstants, and have the version expressed as VersionInfo. 358 static final List<ReleaseInfo> versionToYear; 359 static { 360 Object[][] mapping = { 361 { VersionInfo.getInstance(37), 2020 }, 362 { VersionInfo.getInstance(36), 2019 }, 363 { VersionInfo.getInstance(34), 2018 }, 364 { VersionInfo.getInstance(32), 2017 }, 365 { VersionInfo.getInstance(30), 2016 }, 366 { VersionInfo.getInstance(28), 2015 }, 367 { VersionInfo.getInstance(26), 2014 }, 368 { VersionInfo.getInstance(24), 2013 }, 369 { VersionInfo.getInstance(22,1), 2012 }, 370 { VersionInfo.getInstance(2,0,1), 2011 }, 371 { VersionInfo.getInstance(1,9,1), 2010 }, 372 { VersionInfo.getInstance(1,7,2), 2009 }, 373 { VersionInfo.getInstance(1,6,1), 2008 }, 374 { VersionInfo.getInstance(1,5,1), 2007 }, 375 { VersionInfo.getInstance(1,4,1), 2006 }, 376 { VersionInfo.getInstance(1,3), 2005 }, 377 { VersionInfo.getInstance(1,2), 2004 }, 378 { VersionInfo.getInstance(1,1,1), 2003 }, 379 }; 380 List<ReleaseInfo> _versionToYear = new ArrayList<>(); 381 for (Object[] row : mapping) { _versionToYear.add(new ReleaseInfo((VersionInfo)row[0], (int)row[1]))382 _versionToYear.add(new ReleaseInfo((VersionInfo)row[0], (int)row[1])); 383 } 384 versionToYear = ImmutableList.copyOf(_versionToYear); 385 } 386 387 // public static String getNormalizedVersion(String dir) { 388 // String rawVersion = dir.substring(dir.indexOf('-') + 1); 389 // int firstDot = rawVersion.indexOf('.'); 390 // int secondDot = rawVersion.indexOf('.', firstDot + 1); 391 // if (secondDot > 0) { 392 // rawVersion = rawVersion.substring(0, firstDot) + rawVersion.substring(firstDot + 1, secondDot); 393 // } else { 394 // rawVersion = rawVersion.substring(0, firstDot); 395 // } 396 // String result = getYearFromVersion(rawVersion, true); 397 // return result == null ? null : result.toString(); 398 // } 399 400 // private static String getYearFromVersion(String version, boolean allowNull) { 401 // String result = versionToYear.get(version); 402 // if (!allowNull && result == null) { 403 // throw new IllegalArgumentException("No year for version: " + version); 404 // } 405 // return result; 406 // } 407 // 408 // private static String getVersionFromYear(String year, boolean allowNull) { 409 // String result = versionToYear.inverse().get(year); 410 // if (!allowNull && result == null) { 411 // throw new IllegalArgumentException("No version for year: " + year); 412 // } 413 // return result; 414 // } 415 addCompletionList(String version, Counter2<String> completionData, TreeMap<String, List<Double>> growthData)416 public static void addCompletionList(String version, Counter2<String> completionData, TreeMap<String, List<Double>> growthData) { 417 List<Double> x = new ArrayList<>(); 418 for (String key : completionData.getKeysetSortedByCount(false)) { 419 x.add(completionData.getCount(key)); 420 } 421 growthData.put(version, x); 422 System.out.println(version + "\t" + x.size()); 423 } 424 getCompletion(Map<String, FoundAndTotal> latestData, Map<String, FoundAndTotal> currentData)425 public static Counter2<String> getCompletion(Map<String, FoundAndTotal> latestData, Map<String, FoundAndTotal> currentData) { 426 Counter2<String> completionData = new Counter2<>(); 427 for (Entry<String, FoundAndTotal> entry : latestData.entrySet()) { 428 final String locale = entry.getKey(); 429 final FoundAndTotal currentRecord = currentData.get(locale); 430 if (currentRecord == null) { 431 continue; 432 } 433 double total = entry.getValue().total; 434 if (total == 0) { 435 continue; 436 } 437 double completion = currentRecord.found / total; 438 completionData.add(locale, completion); 439 } 440 return completionData; 441 } 442 443 static class FoundAndTotal { 444 final int found; 445 final int total; 446 FoundAndTotal(Counter<Level>.... counters)447 public FoundAndTotal(Counter<Level>... counters) { 448 final int[] count = { 0, 0, 0 }; 449 for (Level level : Level.values()) { 450 if (level == Level.COMPREHENSIVE || level == Level.OPTIONAL) { 451 continue; 452 } 453 int i = 0; 454 for (Counter<Level> counter : counters) { 455 count[i++] += counter.get(level); 456 } 457 } 458 found = count[0]; 459 total = found + count[1] + count[2]; 460 } 461 462 @Override toString()463 public String toString() { 464 return found + "/" + total; 465 } 466 } 467 addGrowth(org.unicode.cldr.util.Factory latestFactory, String dir, Matcher matcher, boolean showMissing)468 private static Map<String, FoundAndTotal> addGrowth(org.unicode.cldr.util.Factory latestFactory, String dir, Matcher matcher, boolean showMissing) { 469 final File mainDir = new File(dir + "/common/main/"); 470 final File annotationDir = new File(dir + "/common/annotations/"); 471 File[] paths = annotationDir.exists() ? new File[] {mainDir, annotationDir} : new File[] {mainDir}; 472 org.unicode.cldr.util.Factory newFactory = SimpleFactory.make(paths, ".*"); 473 Map<String, FoundAndTotal> data = new HashMap<>(); 474 char c = 0; 475 Set<String> latestAvailable = newFactory.getAvailableLanguages(); 476 for (String locale : newFactory.getAvailableLanguages()) { 477 if (!matcher.reset(locale).matches()) { 478 continue; 479 } 480 if (!latestAvailable.contains(locale)) { 481 continue; 482 } 483 if (SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales().contains(locale) 484 || locale.equals("root") 485 || locale.equals("supplementalData")) { 486 continue; 487 } 488 char nc = locale.charAt(0); 489 if (nc != c) { 490 System.out.println("\t" + locale); 491 c = nc; 492 } 493 if (DEBUG_FILTER != 0 && DEBUG_FILTER != nc) { 494 continue; 495 } 496 CLDRFile latestFile = null; 497 try { 498 latestFile = latestFactory.make(locale, true); 499 } catch (Exception e2) { 500 continue; 501 } 502 final CLDRFile file = newFactory.make(locale, true); 503 // HACK check bogus 504 // Collection<String> extra = file.getExtraPaths(); 505 // 506 // final Iterable<String> fullIterable = file.fullIterable(); 507 // for (String path : fullIterable) { 508 // if (path.contains("\"one[@")) { 509 // boolean inside = extra.contains(path); 510 // Status status = new Status(); 511 // String loc = file.getSourceLocaleID(path, status ); 512 // int debug = 0; 513 // } 514 // } 515 // END HACK 516 Counter<Level> foundCounter = new Counter<>(); 517 Counter<Level> unconfirmedCounter = new Counter<>(); 518 Counter<Level> missingCounter = new Counter<>(); 519 Set<String> unconfirmedPaths = null; 520 Relation<MissingStatus, String> missingPaths = null; 521 unconfirmedPaths = new LinkedHashSet<>(); 522 missingPaths = Relation.of(new LinkedHashMap(), LinkedHashSet.class); 523 VettingViewer.getStatus(latestFile.fullIterable(), file, 524 pathHeaderFactory, foundCounter, unconfirmedCounter, 525 missingCounter, missingPaths, unconfirmedPaths); 526 527 // HACK 528 Set<Entry<MissingStatus, String>> missingRemovals = new HashSet<>(); 529 for (Entry<MissingStatus, String> e : missingPaths.keyValueSet()) { 530 if (e.getKey() == MissingStatus.ABSENT) { 531 final String path = e.getValue(); 532 if (HACK.get(path) != null) { 533 missingRemovals.add(e); 534 missingCounter.add(Level.MODERN, -1); 535 foundCounter.add(Level.MODERN, 1); 536 } else { 537 Status status = new Status(); 538 String loc = file.getSourceLocaleID(path, status); 539 int debug = 0; 540 } 541 } 542 } 543 for (Entry<MissingStatus, String> e : missingRemovals) { 544 missingPaths.remove(e.getKey(), e.getValue()); 545 } 546 // END HACK 547 548 if (showMissing) { 549 int count = 0; 550 for (String s : unconfirmedPaths) { 551 System.out.println(++count + "\t" + locale + "\tunconfirmed\t" + s); 552 } 553 for (Entry<MissingStatus, String> e : missingPaths.keyValueSet()) { 554 String path = e.getValue(); 555 Status status = new Status(); 556 String loc = file.getSourceLocaleID(path, status); 557 int debug = 0; 558 559 System.out.println(++count + "\t" + locale + "\t" + CldrUtility.toString(e)); 560 } 561 int debug = 0; 562 } 563 564 data.put(locale, new FoundAndTotal(foundCounter, unconfirmedCounter, missingCounter)); 565 } 566 return Collections.unmodifiableMap(data); 567 } 568 showCoverage(Anchors anchors, Matcher matcher)569 public static void showCoverage(Anchors anchors, Matcher matcher) throws IOException { 570 showCoverage(anchors, matcher, null, false); 571 } 572 showCoverage(Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel)573 public static void showCoverage(Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel) throws IOException { 574 final String title = "Locale Coverage"; 575 try (PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, anchors)); 576 PrintWriter tsv_summary = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-coverage.tsv"); 577 PrintWriter tsv_missing = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing.tsv"); 578 PrintWriter tsv_missing_summary = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-summary.tsv"); 579 PrintWriter tsv_missing_basic = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-basic.tsv"); 580 ){ 581 tsv_summary.println(TSV_LOCALE_COVERAGE_HEADER); 582 tsv_missing_summary.println(TSV_MISSING_SUMMARY_HEADER); 583 tsv_missing.println(TSV_MISSING_HEADER); 584 tsv_missing_basic.println(TSV_MISSING_BASIC_HEADER); 585 586 Set<String> checkModernLocales = STANDARD_CODES.getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN)); 587 Set<String> availableLanguages = new TreeSet<>(factory.getAvailableLanguages()); 588 availableLanguages.addAll(checkModernLocales); 589 Relation<String, String> languageToRegion = Relation.of(new TreeMap(), TreeSet.class); 590 LanguageTagParser ltp = new LanguageTagParser(); 591 LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(true); 592 for (String locale : factory.getAvailable()) { 593 String country = ltp.set(locale).getRegion(); 594 if (!country.isEmpty()) { 595 languageToRegion.put(ltc.transform(ltp.getLanguageScript()), country); 596 } 597 } 598 599 fixCommonLocales(); 600 601 System.out.println(Joiner.on("\n").join(languageToRegion.keyValuesSet())); 602 603 System.out.println("# Checking: " + availableLanguages); 604 605 pw.println("<p style='text-align: left'>This chart shows the coverage levels for this release. </p>" + 606 "<ol>" 607 + "<li>Fields = fields found at a modern level</li>" 608 + "<li>UC = unconfirmed values: typically treated as missing by implementations</li>" 609 + "<li>Miss = missing values</li>" 610 + "<li>Modern%, etc = fields/(fields + missing + unconfirmed) — at that level</li>" 611 + "<li>Core Missing = missing core fields — optionals marked with *</li></ol>" 612 + "<p>A high-level summary of the meaning of the coverage values are at " + 613 "<a target='_blank' href='http://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels'>Coverage Levels</a>. " + 614 "The Core values are described on " + 615 "<a target='_blank' href='http://cldr.unicode.org/index/cldr-spec/minimaldata'>Core Data</a>." + 616 "</p>"); 617 618 Relation<MissingStatus, String> missingPaths = Relation.of(new EnumMap<MissingStatus, Set<String>>( 619 MissingStatus.class), TreeSet.class, CLDRFile.getComparator(DtdType.ldml)); 620 Set<String> unconfirmed = new TreeSet<>(CLDRFile.getComparator(DtdType.ldml)); 621 622 //Map<String, String> likely = testInfo.getSupplementalDataInfo().getLikelySubtags(); 623 Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales(); 624 625 // Map<String,Counter<Level>> counts = new HashMap(); 626 // System.out.print("Script\tEnglish\tNative\tCode\tCode*"); 627 // for (Level level : Level.values()) { 628 // if (skipPrintingLevels.contains(level)) { 629 // continue; 630 // } 631 // System.out.print("\t≤" + level + " (f)\t(u)\t(m)"); 632 // } 633 // System.out.println(); 634 // Factory pathHeaderFactory = PathHeader.getFactory(testInfo.getCldrFactory().make("en", true)); 635 636 Counter<Level> foundCounter = new Counter<>(); 637 Counter<Level> unconfirmedCounter = new Counter<>(); 638 Counter<Level> missingCounter = new Counter<>(); 639 640 List<Level> levelsToShow = new ArrayList<>(EnumSet.allOf(Level.class)); 641 levelsToShow.remove(Level.COMPREHENSIVE); 642 levelsToShow.remove(Level.UNDETERMINED); 643 levelsToShow = ImmutableList.copyOf(levelsToShow); 644 List<Level> reversedLevels = new ArrayList<>(levelsToShow); 645 Collections.reverse(reversedLevels); 646 reversedLevels = ImmutableList.copyOf(reversedLevels); 647 648 649 // PrintWriter out2; 650 // try { 651 // out2 = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "showLocaleCoverage.tsv"); 652 // } catch (IOException e1) { 653 // throw new ICUUncheckedIOException(e1); 654 // } 655 // 656 // out2.print("Code\tCom?\tEnglish Name\tNative Name\tScript\tSublocales\tStrings"); 657 // for (Level level : reversedLevels) { 658 // out2.print("\t" + level + " %\t" + level + " UC%"); 659 // } 660 // out2.println(); 661 //System.out.println("\tCore*\nCore* Missing"); 662 int localeCount = 0; 663 664 final TablePrinter tablePrinter = new TablePrinter() 665 .addColumn("Direct.", "class='source'", null, "class='source'", true) 666 .setBreakSpans(true).setSpanRows(false) 667 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true).setBreakSpans(true) 668 .addColumn("English Name", "class='source'", null, "class='source'", true).setBreakSpans(true) 669 .addColumn("Native Name", "class='source'", null, "class='source'", true).setBreakSpans(true) 670 .addColumn("Script", "class='source'", null, "class='source'", true).setBreakSpans(true) 671 .addColumn("CLDR target", "class='source'", null, "class='source'", true).setBreakSpans(true).setSortPriority(0).setSortAscending(false) 672 .addColumn("ICU", "class='source'", null, "class='source'", true).setBreakSpans(true).setSortPriority(1).setSortAscending(false) 673 .addColumn("Sublocales", "class='target'", null, "class='targetRight'", true).setBreakSpans(true) 674 .setCellPattern("{0,number}") 675 .addColumn("Fields", "class='target'", null, "class='targetRight'", true).setBreakSpans(true) 676 .setCellPattern("{0,number}") 677 .addColumn("UC", "class='target'", null, "class='targetRight'", true).setBreakSpans(true) 678 .setCellPattern("{0,number}") 679 .addColumn("Miss", "class='target'", null, "class='targetRight'", true).setBreakSpans(true) 680 .setCellPattern("{0,number}") 681 //.addColumn("Target Level", "class='target'", null, "class='target'", true).setBreakSpans(true) 682 ; 683 684 NumberFormat tsvPercent = NumberFormat.getPercentInstance(Locale.ENGLISH); 685 tsvPercent.setMaximumFractionDigits(2); 686 687 for (Level level : reversedLevels) { 688 String titleLevel = level.toString(); 689 tablePrinter.addColumn(UCharacter.toTitleCase(titleLevel, null) + "%", "class='target'", null, "class='targetRight'", true) 690 .setCellPattern("{0,number,0.0%}") 691 .setBreakSpans(true); 692 switch(level) { 693 case CORE: 694 tablePrinter.setSortPriority(5).setSortAscending(false); 695 break; 696 case BASIC: 697 tablePrinter.setSortPriority(4).setSortAscending(false); 698 break; 699 case MODERATE: 700 tablePrinter.setSortPriority(3).setSortAscending(false); 701 break; 702 case MODERN: 703 tablePrinter.setSortPriority(2).setSortAscending(false); 704 break; 705 } 706 // tablePrinter 707 // .addColumn("∪ UC%", "class='target'", null, "class='targetRight'", true) 708 // .setCellPattern("{0,number,0.0%}") 709 // .setBreakSpans(true) 710 711 } 712 tablePrinter.addColumn("Core Missing", "class='target'", null, "class='targetRight'", true) 713 .setBreakSpans(true); 714 715 long start = System.currentTimeMillis(); 716 LikelySubtags likelySubtags = new LikelySubtags(); 717 718 EnumMap<Level, Double> targetLevel = new EnumMap<>(Level.class); 719 targetLevel.put(Level.CORE, 2 / 100d); 720 targetLevel.put(Level.BASIC, 16 / 100d); 721 targetLevel.put(Level.MODERATE, 33 / 100d); 722 targetLevel.put(Level.MODERN, 100 / 100d); 723 724 // NumberFormat percentFormat = NumberFormat.getPercentInstance(ULocale.ENGLISH); 725 // percentFormat.setMaximumFractionDigits(2); 726 // percentFormat.setMinimumFractionDigits(2); 727 // NumberFormat intFormat = NumberFormat.getIntegerInstance(ULocale.ENGLISH); 728 729 Multimap<String, String> pathToLocale = TreeMultimap.create(); 730 731 int counter = 0; 732 for (String locale : availableLanguages) { 733 try { 734 if (locale.contains("supplemental") // for old versionsl 735 || locale.startsWith("sr_Latn")) { 736 continue; 737 } 738 if (locales != null && !locales.contains(locale)) { 739 String base = CLDRLocale.getInstance(locale).getLanguage(); 740 if (!locales.contains(base)) { 741 continue; 742 } 743 } 744 if (matcher != null && !matcher.reset(locale).matches()) { 745 continue; 746 } 747 if (defaultContents.contains(locale) || "root".equals(locale) || "und".equals(locale)) { 748 continue; 749 } 750 751 CLDRFile vxmlCldrFile2 = null; // getVxmlCldrFile(locale); TODO clean this up 752 753 tsv_summary.flush(); 754 tsv_missing_summary.flush(); 755 tsv_missing.flush(); 756 tsv_missing_basic.flush(); 757 758 boolean isSeed = new File(CLDRPaths.SEED_DIRECTORY, locale + ".xml").exists(); 759 760 //boolean capture = locale.equals("en"); 761 String region = ltp.set(locale).getRegion(); 762 if (!region.isEmpty()) continue; // skip regions 763 764 final Level cldrLocaleLevelGoal = SC.getLocaleCoverageLevel(Organization.cldr.toString(), locale); 765 final boolean cldrLevelGoalBasicToModern = Level.CORE_TO_MODERN.contains(cldrLocaleLevelGoal); 766 767 String isCommonLocale = Level.MODERN == cldrLocaleLevelGoal ? "C*" 768 : COMMON_LOCALES.contains(locale) ? "C" 769 : ""; 770 771 String max = likelySubtags.maximize(locale); 772 String script = ltp.set(max).getScript(); 773 774 String language = likelySubtags.minimize(locale); 775 // Level otherLevel = STANDARD_CODES.getLocaleCoverageLevel("apple", locale); 776 // if (otherLevel.compareTo(currentLevel) > 0 777 // && otherLevel.compareTo(Level.MODERN) <= 0) { 778 // currentLevel = otherLevel; 779 // } 780 781 missingPaths.clear(); 782 unconfirmed.clear(); 783 784 final CLDRFile file = factory.make(locale, true, minimumDraftStatus); 785 786 if (locale.equals("af")) { 787 int debug = 0; 788 } 789 790 Iterable<String> pathSource = new IterableFilter(file.fullIterable()); 791 792 VettingViewer.getStatus(pathSource, file, 793 pathHeaderFactory, foundCounter, unconfirmedCounter, 794 missingCounter, missingPaths, unconfirmed); 795 796 // HACK Fix up missing items. Remove once vxml is ok. 797 if (vxmlCldrFile != null) { 798 Multimap<MissingStatus,String> toRemove = HashMultimap.create(); 799 for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) { 800 String mPath = entry.getValue(); 801 String vxmlValue = vxmlCldrFile.getStringValue(mPath); 802 if (vxmlValue != null) { 803 if (vxmlValue.equals(CldrUtility.INHERITANCE_MARKER)) { 804 vxmlValue = vxmlCldrFile.getBaileyValue(mPath, null, null); 805 } 806 if (vxmlValue != null) { 807 String bailey = file.getStringValue(mPath); 808 if (vxmlValue.equals(bailey)) { 809 String fullPath = vxmlCldrFile.getFullXPath(mPath); 810 if (!fullPath.contains("provisional") && !fullPath.contains("unconfirmed")) { 811 toRemove.put(entry.getKey(), mPath); 812 Level level = coverageInfo.getCoverageLevel(mPath, locale); 813 missingCounter.add(level, -1); 814 } 815 } 816 } 817 } 818 } 819 for (Entry<MissingStatus, String> entry : toRemove.entries()) { 820 missingPaths.remove(entry.getKey(), entry.getValue()); 821 } 822 } 823 824 Set<String> sublocales = languageToRegion.get(language); 825 if (sublocales == null) { 826 //System.err.println("No Sublocales: " + language); 827 sublocales = Collections.EMPTY_SET; 828 } 829 830 // List s = Lists.newArrayList(file.fullIterable()); 831 832 String seedString = isSeed ? "seed" : "common"; 833 tablePrinter.addRow() 834 .addCell(seedString) 835 .addCell(language) 836 .addCell(ENGLISH.getName(language)) 837 .addCell(file.getName(language)) 838 .addCell(script) 839 .addCell(cldrLocaleLevelGoal == Level.UNDETERMINED ? "" : cldrLocaleLevelGoal.toString()) 840 .addCell(getIcuValue(language)) 841 .addCell(sublocales.size()); 842 843 String s = TSV_LOCALE_COVERAGE_HEADER; // make sure sync'ed (and below) 844 if (cldrLevelGoalBasicToModern) { 845 tsv_summary 846 .append(seedString) 847 .append('\t').append(language) 848 .append('\t').append(ENGLISH.getName(language)) 849 .append('\t').append(file.getName(language)) 850 .append('\t').append(script) 851 .append('\t').append(cldrLocaleLevelGoal.toString()) 852 .append('\t').append(sublocales.size()+""); 853 854 } 855 856 // String header = language 857 // + "\t" + isCommonLocale 858 // + "\t" + ENGLISH.getName(language) 859 // + "\t" + file.getName(language) 860 // + "\t" + script 861 // + "\t" + sublocales.size() 862 // //+ "\t" + currentLevel 863 // ; 864 865 int sumFound = 0; 866 int sumMissing = 0; 867 int sumUnconfirmed = 0; 868 869 // get the totals 870 871 EnumMap<Level, Integer> totals = new EnumMap<>(Level.class); 872 EnumMap<Level, Integer> confirmed = new EnumMap<>(Level.class); 873 // EnumMap<Level, Integer> unconfirmedByLevel = new EnumMap<>(Level.class); 874 Set<String> coreMissing = new LinkedHashSet<>(); 875 876 if (locale.equals("af")) { 877 int debug = 0; 878 } 879 880 Counter<String> starredCounter = new Counter<>(); 881 882 { // CORE 883 long missingExemplarCount = missingCounter.get(Level.CORE); 884 if (missingExemplarCount > 0) { 885 for (Entry<MissingStatus, String> statusAndPath : missingPaths.entrySet()) { 886 String path = statusAndPath.getValue(); 887 if (path.startsWith("//ldml/characters/exemplarCharacters")) { 888 PathHeader ph = pathHeaderFactory.fromPath(path); 889 String problem = ph.getCode().replaceAll("Others: ","").replaceAll("Main Letters", "main-letters"); 890 coreMissing.add(problem); 891 // String line = spreadsheetLine(locale, script, language, cldrLevelGoal, foundLevel, missingStatus.toString(), path, file.getStringValue(path)); 892 if (cldrLevelGoalBasicToModern) { 893 String line = spreadsheetLine(locale, language, script, "«No " + problem + "»", cldrLocaleLevelGoal, Level.CORE, "ABSENT", path, file, vxmlCldrFile2, pathToLocale); 894 tsv_missing.println(line); 895 } else { 896 gatherStarred(path, starredCounter); 897 } 898 } 899 } 900 } 901 Multimap<CoreItems, String> detailedErrors = LinkedHashMultimap.create(); 902 Set<CoreItems> coverage = new TreeSet<>( 903 CoreCoverageInfo.getCoreCoverageInfo(file, detailedErrors)); 904 Set<CoreItems> missing = EnumSet.allOf(CoreItems.class); 905 missing.removeAll(coverage); 906 for (Entry<CoreItems, String> entry : detailedErrors.entries()) { 907 CoreItems coreItem = entry.getKey(); 908 String path = entry.getValue(); 909 coreMissing.add(coreItem.toString()); 910 //String line = spreadsheetLine(language, script, "n/a", detailedErrors.get(entry).toString(), level, "ABSENT", "n/a", "n/a", "n/a"); 911 if (cldrLevelGoalBasicToModern) { 912 String line = spreadsheetLine(locale, language, script, "«No " + coreItem + "»", cldrLocaleLevelGoal, coreItem.desiredLevel, "ABSENT", path, null, vxmlCldrFile2, pathToLocale); 913 tsv_missing.println(line); 914 } else { 915 gatherStarred(path, starredCounter); 916 } 917 } 918 missing.removeAll(CoreItems.ONLY_RECOMMENDED); 919 foundCounter.add(Level.CORE, coverage.size()); 920 missingCounter.add(Level.CORE, missing.size()); 921 922 // sumFound += coverage.size(); 923 // sumMissing += missing.size(); 924 925 // confirmed.put(Level.CORE, (int) coverage.size()); 926 //// unconfirmedByLevel.put(level, (int)(foundCount + unconfirmedCount)); 927 // totals.put(Level.CORE, (int)(coverage.size() + missing.size())); 928 929 } 930 931 if (cldrLevelGoalBasicToModern) { 932 Level goalLevel = cldrLocaleLevelGoal; 933 for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) { 934 String path = entry.getValue(); 935 String status = entry.getKey().toString(); 936 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 937 if (goalLevel.compareTo(foundLevel) >= 0) { 938 String line = spreadsheetLine(locale, language, script, file.getStringValue(path), goalLevel, foundLevel, status, path, file, vxmlCldrFile2, pathToLocale); 939 tsv_missing.println(line); 940 } 941 } 942 for (String path : unconfirmed) { 943 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 944 if (goalLevel.compareTo(foundLevel) >= 0) { 945 String line = spreadsheetLine(locale, language, script, file.getStringValue(path), goalLevel, foundLevel, "n/a", path, file, vxmlCldrFile2, pathToLocale); 946 tsv_missing.println(line); 947 } 948 } 949 } else { 950 Level goalLevel = Level.BASIC; 951 for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) { 952 String path = entry.getValue(); 953 String status = entry.getKey().toString(); 954 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 955 if (goalLevel.compareTo(foundLevel) >= 0) { 956 gatherStarred(path, starredCounter); 957 } 958 } 959 for (String path : unconfirmed) { 960 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 961 if (goalLevel.compareTo(foundLevel) >= 0) { 962 gatherStarred(path, starredCounter); 963 } 964 } 965 } 966 967 tsv_missing_basic.println(TSV_MISSING_BASIC_HEADER); 968 for (R2<Long, String> starred : starredCounter.getEntrySetSortedByCount(false, null)) { 969 // PathHeader ph = pathHeaderFactory.fromPath(starred.get1()); 970 tsv_missing_basic.println(locale + "\t" + starred.get0() + "\t" + starred.get1().replace("\"*\"", "'*'")); 971 } 972 973 for (Level level : levelsToShow) { 974 long foundCount = foundCounter.get(level); 975 long unconfirmedCount = unconfirmedCounter.get(level); 976 long missingCount = missingCounter.get(level); 977 978 sumFound += foundCount; 979 sumUnconfirmed += unconfirmedCount; 980 sumMissing += missingCount; 981 982 confirmed.put(level, sumFound); 983 // unconfirmedByLevel.put(level, (int)(foundCount + unconfirmedCount)); 984 totals.put(level, sumFound + sumUnconfirmed + sumMissing); 985 } 986 987 double modernTotal = totals.get(Level.MODERN); 988 989 tablePrinter 990 .addCell(sumFound) 991 .addCell(sumUnconfirmed) 992 .addCell(sumMissing) 993 ; 994 995 if (cldrLevelGoalBasicToModern) { 996 tsv_summary 997 .append('\t').append(sumFound+"") 998 .append('\t').append(sumUnconfirmed+"") 999 .append('\t').append(sumMissing+"") 1000 ; 1001 } 1002 1003 // header += "\t" + sumFound; 1004 // header += "\t" + (sumFound + sumUnconfirmed); 1005 1006 // print the totals 1007 1008 for (Level level : reversedLevels) { 1009 if (useOrgLevel && cldrLocaleLevelGoal != level) { 1010 continue; 1011 } 1012 int confirmedCoverage = confirmed.get(level); 1013 // int unconfirmedCoverage = unconfirmedByLevel.get(level); 1014 double total = totals.get(level); 1015 1016 tablePrinter 1017 .addCell(confirmedCoverage / total) 1018 // .addCell(unconfirmedCoverage / total) 1019 ; 1020 1021 if (cldrLevelGoalBasicToModern) { 1022 tsv_summary 1023 .append('\t').append(String.valueOf(confirmedCoverage)) 1024 .append('\t').append(String.valueOf((int)total - confirmedCoverage)) 1025 ; 1026 } 1027 1028 // if (RAW_DATA) { 1029 // header += "\t" + confirmedCoverage / total 1030 // + "\t" + unconfirmedCoverage / total; 1031 // } else { 1032 // Double factor = targetLevel.get(level) / (total / modernTotal); 1033 // header += "\t" + factor * confirmedCoverage / modernTotal 1034 //// + "\t" + factor * unconfirmedCoverage / modernTotal 1035 // ; 1036 // } 1037 } 1038 String coreMissingString = 1039 Joiner.on(", ").join(coreMissing); 1040 1041 tablePrinter 1042 .addCell(coreMissingString) 1043 .finishRow(); 1044 1045 if (cldrLevelGoalBasicToModern) { 1046 tsv_summary 1047 .append('\t') 1048 .append(coreMissingString) 1049 .append('\n'); 1050 } 1051 1052 // Write missing paths (for >99% and specials 1053 1054 // if (false) { // checkModernLocales.contains(locale) 1055 // CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance(locale); 1056 // 1057 // for (String path : unconfirmed) { 1058 // Level level = coverageLevel2.getLevel(path); 1059 // if (level.compareTo(cldrLocaleLevelGoal) > 0) { 1060 // continue; 1061 // } 1062 // String line = spreadsheetLine(locale, language, script, file.getStringValue(path), cldrLocaleLevelGoal, level, "UNCONFIRMED", path, pathToLocale); 1063 // if (SUPPRESS_PATHS_CAN_BE_EMPTY.get(path) != null) { 1064 // //System.out.println("\nSKIP: " + line); 1065 // } else { 1066 // tsv_missing.println(line); 1067 // } 1068 // } 1069 // for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) { 1070 // String path = entry.getValue(); 1071 // Level level = coverageLevel2.getLevel(path); 1072 // if (level.compareTo(cldrLocaleLevelGoal) > 0) { 1073 // continue; 1074 // } 1075 // MissingStatus missingStatus = entry.getKey(); 1076 // String line = spreadsheetLine(locale, language, script, "???", cldrLocaleLevelGoal, level, missingStatus.toString(), path, pathToLocale); 1077 // if (SUPPRESS_PATHS_CAN_BE_EMPTY.get(path) != null) { 1078 // //System.out.println("\nSKIP: " + line); 1079 // } else { 1080 // tsv_missing.println(line); 1081 // } 1082 // } 1083 // } 1084 1085 localeCount++; 1086 } catch (Exception e) { 1087 throw new IllegalArgumentException(e); 1088 } 1089 } 1090 pw.println(tablePrinter.toTable()); 1091 1092 Multimap<Level, String> levelToLocales = TreeMultimap.create(); 1093 1094 for ( Entry<String, Collection<String>> entry : pathToLocale.asMap().entrySet()) { 1095 String path = entry.getKey(); 1096 Collection<String> localeSet = entry.getValue(); 1097 levelToLocales.clear(); 1098 for (String locale : localeSet) { 1099 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 1100 levelToLocales.put(foundLevel, locale); 1101 } 1102 String phString = "n/a\tn/a\tn/a\tn/a"; 1103 try { 1104 PathHeader ph = pathHeaderFactory.fromPath(path); 1105 phString = ph.toString(); 1106 } catch (Exception e) { 1107 } 1108 for (Entry<Level, Collection<String>> entry2 : levelToLocales.asMap().entrySet()) { 1109 Level level = entry2.getKey(); 1110 localeSet = entry2.getValue(); 1111 String s = TSV_MISSING_SUMMARY_HEADER; // check for changes 1112 tsv_missing_summary.println( 1113 level 1114 + "\t" + localeSet.size() 1115 + "\t" + Joiner.on(" ").join(localeSet) 1116 + "\t" + phString 1117 ); 1118 } 1119 } 1120 // out2.close(); 1121 1122 long end = System.currentTimeMillis(); 1123 System.out.println((end - start) + " millis = " 1124 + ((end - start) / localeCount) + " millis/locale"); 1125 ShowPlurals.appendBlanksForScrolling(pw); 1126 } 1127 } 1128 1129 // public static void showEnglish() { 1130 // Map<PathHeader,String> sorted = new TreeMap<>(); 1131 // CoverageInfo coverageInfo=CLDRConfig.getInstance().getCoverageInfo(); 1132 // for (String path : ENGLISH) { 1133 //// Level currentLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, "en"); 1134 // Level currentLevel=coverageInfo.getCoverageLevel(path, "en"); 1135 // if (currentLevel.compareTo(Level.MINIMAL) <= 0) { 1136 // PathHeader ph = pathHeaderFactory.fromPath(path); 1137 // sorted.put(ph, currentLevel + "\t" + ENGLISH.getStringValue(path)); 1138 // } 1139 // } 1140 // for (Entry<PathHeader, String> entry : sorted.entrySet()) { 1141 // System.out.println(entry.getKey() + "\t" + entry.getValue()); 1142 // } 1143 // } 1144 1145 static class IterableFilter implements Iterable<String> { 1146 private Iterable<String> source; 1147 IterableFilter(Iterable<String> source)1148 IterableFilter(Iterable<String> source) { 1149 this.source = source; 1150 } 1151 1152 /** 1153 * When some paths are defined after submission, we need to change them to COMPREHENSIVE in computing the vetting status. 1154 */ 1155 1156 static final Set<String> SUPPRESS_PATHS_AFTER_SUBMISSION = ImmutableSet.of( 1157 "//ldml/localeDisplayNames/languages/language[@type=\"ccp\"]", 1158 "//ldml/localeDisplayNames/territories/territory[@type=\"XA\"]", 1159 "//ldml/localeDisplayNames/territories/territory[@type=\"XB\"]", 1160 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"G\"]", 1161 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"y\"]", 1162 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"G\"]", 1163 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"M\"]", 1164 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"y\"]", 1165 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"d\"]", 1166 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"G\"]", 1167 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"M\"]", 1168 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"y\"]", 1169 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"d\"]", 1170 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"G\"]", 1171 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"M\"]", 1172 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"y\"]", 1173 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"G\"]", 1174 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"M\"]", 1175 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"y\"]", 1176 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"d\"]", 1177 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"G\"]", 1178 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"M\"]", 1179 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"y\"]", 1180 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"d\"]", 1181 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"G\"]", 1182 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"M\"]", 1183 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"y\"]", 1184 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"G\"]", 1185 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"y\"]", 1186 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"G\"]", 1187 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"M\"]", 1188 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"y\"]", 1189 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"d\"]", 1190 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"G\"]", 1191 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"M\"]", 1192 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"y\"]", 1193 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"d\"]", 1194 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"G\"]", 1195 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"M\"]", 1196 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"y\"]", 1197 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"G\"]", 1198 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"M\"]", 1199 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"y\"]", 1200 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"d\"]", 1201 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"G\"]", 1202 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"M\"]", 1203 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"y\"]", 1204 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"d\"]", 1205 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"G\"]", 1206 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"M\"]", 1207 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"y\"]" 1208 ); 1209 @Override iterator()1210 public Iterator<String> iterator() { 1211 return new IteratorFilter(source.iterator()); 1212 } 1213 1214 static class IteratorFilter implements Iterator<String> { 1215 Iterator<String> source; 1216 String peek; 1217 IteratorFilter(Iterator<String> source)1218 public IteratorFilter(Iterator<String> source) { 1219 this.source = source; 1220 fillPeek(); 1221 } 1222 @Override hasNext()1223 public boolean hasNext() { 1224 return peek != null; 1225 } 1226 @Override next()1227 public String next() { 1228 String result = peek; 1229 fillPeek(); 1230 return result; 1231 } 1232 fillPeek()1233 private void fillPeek() { 1234 peek = null; 1235 while (source.hasNext()) { 1236 peek = source.next(); 1237 // if it is ok to assess, then break 1238 if (!SUPPRESS_PATHS_AFTER_SUBMISSION.contains(peek) 1239 && SUPPRESS_PATHS_CAN_BE_EMPTY.get(peek) != Boolean.TRUE) { 1240 break; 1241 } 1242 peek = null; 1243 } 1244 } 1245 } 1246 1247 } 1248 static final CoverageInfo coverageInfo = new CoverageInfo(SUPPLEMENTAL_DATA_INFO); 1249 1250 // userInfo.getVoterInfo().getLevel().compareTo(VoteResolver.Level.tc) 1251 static final VoterInfo dummyVoterInfo = new VoterInfo(Organization.cldr, org.unicode.cldr.util.VoteResolver.Level.vetter, "somename"); 1252 1253 static final UserInfo dummyUserInfo = new UserInfo() { 1254 @Override 1255 public VoterInfo getVoterInfo() { 1256 return dummyVoterInfo; 1257 } 1258 }; 1259 static final PathValueInfo dummyPathValueInfo = new PathValueInfo() { 1260 // pathValueInfo.getCoverageLevel().compareTo(Level.COMPREHENSIVE) 1261 @Override 1262 public Collection<? extends CandidateInfo> getValues() { 1263 throw new UnsupportedOperationException(); 1264 } 1265 @Override 1266 public CandidateInfo getCurrentItem() { 1267 throw new UnsupportedOperationException(); 1268 } 1269 @Override 1270 public String getBaselineValue() { 1271 throw new UnsupportedOperationException(); 1272 } 1273 @Override 1274 public Level getCoverageLevel() { 1275 return Level.MODERN; 1276 } 1277 @Override 1278 public boolean hadVotesSometimeThisRelease() { 1279 throw new UnsupportedOperationException(); 1280 } 1281 @Override 1282 public CLDRLocale getLocale() { 1283 throw new UnsupportedOperationException(); 1284 } 1285 @Override 1286 public String getXpath() { 1287 throw new UnsupportedOperationException(); 1288 } 1289 }; 1290 1291 1292 // static org.unicode.cldr.util.Factory VXML_FACTORY = SimpleFactory.make(new File[] { 1293 // new File(VXML_CONSTANT + "main"), 1294 // new File(VXML_CONSTANT + "annotations") }, ".*"); 1295 static CLDRFile vxmlCldrFile = null; 1296 // static String vxmlLocale = ""; 1297 1298 // private static CLDRFile getVxmlCldrFile(String locale) { 1299 // if (!vxmlLocale.equals(locale)) { 1300 // try { 1301 // vxmlCldrFile = VXML_FACTORY.make(locale, false); 1302 // } catch (Exception e) { 1303 // vxmlCldrFile = null; 1304 // } 1305 // vxmlLocale = locale; 1306 // } 1307 // return vxmlCldrFile; 1308 // } 1309 gatherStarred(String path, Counter<String> starredCounter)1310 public static void gatherStarred(String path, Counter<String> starredCounter) { 1311 starredCounter.add(new PathStarrer().setSubstitutionPattern("*").set(path), 1); 1312 } 1313 spreadsheetLine(String locale, String language, String script, String nativeValue, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, CLDRFile resolvedFile, CLDRFile vxmlCldrFile, Multimap<String, String> pathToLocale)1314 public static String spreadsheetLine(String locale, String language, String script, String nativeValue, Level cldrLocaleLevelGoal, 1315 Level itemLevel, String status, String path, CLDRFile resolvedFile, CLDRFile vxmlCldrFile, 1316 Multimap<String, String> pathToLocale) { 1317 if (pathToLocale != null) { 1318 pathToLocale.put(path, locale); 1319 } 1320 String stLink = "n/a"; 1321 String englishValue = "n/a"; 1322 StatusAction action = null; 1323 SurveyToolStatus surveyToolStatus = null; 1324 String icuValue = getIcuValue(locale); 1325 1326 String bailey = resolvedFile == null ? "" : resolvedFile.getStringValue(path); 1327 String vxmlValue = ""; 1328 String vxmlDraftStatus = ""; 1329 if (vxmlCldrFile != null) { 1330 try { 1331 vxmlValue = vxmlCldrFile.getStringValue(path); 1332 if (vxmlValue == null) { 1333 vxmlValue = ""; 1334 } else { 1335 String fullXPath = vxmlCldrFile.getFullXPath(path); 1336 XPathParts parts = XPathParts.getFrozenInstance(fullXPath); 1337 vxmlDraftStatus = parts.getAttributeValue(-1, "draft"); 1338 if (vxmlDraftStatus == null) { 1339 vxmlDraftStatus = ""; 1340 } 1341 } 1342 } catch (Exception e) { 1343 } 1344 } 1345 1346 String phString = "na\tn/a\tn/a\t" + path; 1347 try { 1348 PathHeader ph = pathHeaderFactory.fromPath(path); 1349 phString = ph.toString(); 1350 surveyToolStatus = ph.getSurveyToolStatus(); 1351 stLink = URLS.forXpath(locale, path); 1352 englishValue = ENGLISH.getStringValue(path); 1353 action = Phase.SUBMISSION.getShowRowAction(dummyPathValueInfo, InputMethod.DIRECT, surveyToolStatus, dummyUserInfo); 1354 } catch (Exception e) { 1355 int debug = 0; 1356 } 1357 1358 String config_text = vxmlValue.isEmpty() ? "" : 1359 "locale=" + locale 1360 + " ; action=add" 1361 + " ; new_path=" + StringId.getHexId(path) 1362 + " ; new_value=" + vxmlValue; 1363 1364 1365 1366 String s = TSV_MISSING_HEADER; // make sure in sync 1367 String line = 1368 language 1369 + "\t" + ENGLISH.getName(language) 1370 + "\t" + ENGLISH.getName("script", script) 1371 //+ "\t" + englishValue 1372 //+ "\t" + nativeValue 1373 + "\t" + cldrLocaleLevelGoal 1374 //+ "\t" + icuValue 1375 + "\t" + itemLevel 1376 //+ "\t" + status 1377 //+ "\t" + (action == null ? "?" : action.toString()) 1378 + "\t" + (surveyToolStatus == null ? "?" : surveyToolStatus.toString()) 1379 //+ "\t" + stLink 1380 + "\t" + bailey 1381 + "\t" + vxmlValue 1382 + "\t" + vxmlDraftStatus 1383 + "\t" + phString 1384 + "\t" + PathHeader.getUrlForLocalePath(locale, path) 1385 + "\t" + config_text 1386 ; 1387 return line; 1388 } 1389 1390 1391 getIcuValue(String locale)1392 private static String getIcuValue(String locale) { 1393 return ICU_Locales.contains(new ULocale(locale)) ? "ICU" : ""; 1394 } 1395 1396 static final Set<ULocale> ICU_Locales = ImmutableSet.copyOf(ULocale.getAvailableLocales()); 1397 private static CLDRURLS URLS = CONFIG.urls(); 1398 1399 } 1400