1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.FileInputStream; 5 import java.io.IOException; 6 import java.io.PrintWriter; 7 import java.io.StringWriter; 8 import java.util.ArrayList; 9 import java.util.Arrays; 10 import java.util.Calendar; 11 import java.util.Collections; 12 import java.util.Date; 13 import java.util.EnumSet; 14 import java.util.HashMap; 15 import java.util.HashSet; 16 import java.util.Iterator; 17 import java.util.LinkedHashMap; 18 import java.util.LinkedHashSet; 19 import java.util.List; 20 import java.util.Locale; 21 import java.util.Map; 22 import java.util.Map.Entry; 23 import java.util.Set; 24 import java.util.TreeMap; 25 import java.util.TreeSet; 26 import java.util.regex.Matcher; 27 import java.util.regex.Pattern; 28 29 import org.unicode.cldr.draft.FileUtilities; 30 import org.unicode.cldr.test.CheckExemplars; 31 import org.unicode.cldr.test.CoverageLevel2; 32 import org.unicode.cldr.test.DisplayAndInputProcessor; 33 import org.unicode.cldr.test.QuickCheck; 34 import org.unicode.cldr.tool.Option.Options; 35 import org.unicode.cldr.util.Builder; 36 import org.unicode.cldr.util.CLDRFile; 37 import org.unicode.cldr.util.CLDRPaths; 38 import org.unicode.cldr.util.Factory; 39 import org.unicode.cldr.util.FileCopier; 40 import org.unicode.cldr.util.LanguageTagParser; 41 import org.unicode.cldr.util.Level; 42 import org.unicode.cldr.util.PathDescription; 43 import org.unicode.cldr.util.PatternCache; 44 import org.unicode.cldr.util.PatternPlaceholders; 45 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo; 46 import org.unicode.cldr.util.PrettyPath; 47 import org.unicode.cldr.util.RegexLookup; 48 import org.unicode.cldr.util.RegexLookup.Finder; 49 import org.unicode.cldr.util.RegexUtilities; 50 import org.unicode.cldr.util.StandardCodes; 51 import org.unicode.cldr.util.StringId; 52 import org.unicode.cldr.util.SupplementalDataInfo; 53 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange; 54 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 55 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 56 import org.unicode.cldr.util.TransliteratorUtilities; 57 import org.unicode.cldr.util.With; 58 import org.unicode.cldr.util.XMLFileReader; 59 import org.unicode.cldr.util.XMLSource; 60 import org.unicode.cldr.util.XPathParts; 61 import org.xml.sax.Attributes; 62 import org.xml.sax.ContentHandler; 63 import org.xml.sax.ErrorHandler; 64 import org.xml.sax.InputSource; 65 import org.xml.sax.Locator; 66 import org.xml.sax.SAXException; 67 import org.xml.sax.SAXParseException; 68 import org.xml.sax.XMLReader; 69 70 import com.ibm.icu.dev.util.CollectionUtilities; 71 import com.ibm.icu.impl.Relation; 72 import com.ibm.icu.impl.Row; 73 import com.ibm.icu.impl.Row.R2; 74 import com.ibm.icu.lang.CharSequences; 75 import com.ibm.icu.text.BreakIterator; 76 import com.ibm.icu.text.DateFormat; 77 import com.ibm.icu.text.MessageFormat; 78 import com.ibm.icu.text.PluralRules; 79 import com.ibm.icu.text.SimpleDateFormat; 80 import com.ibm.icu.text.Transform; 81 import com.ibm.icu.text.UnicodeSet; 82 import com.ibm.icu.util.Output; 83 import com.ibm.icu.util.TimeZone; 84 import com.ibm.icu.util.ULocale; 85 86 public class GenerateXMB { 87 private static final String DEBUG_PATH = "[@type=\"day\"]/unitPattern[@count=\"1\"]"; 88 89 static StandardCodes sc = StandardCodes.make(); 90 91 static final String DATE; 92 static { 93 DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); 94 DATE = dateFormat.format(new Date()); 95 } 96 static final String stock = "en|ar|de|es|fr|it|ja|ko|nl|pl|ru|th|tr|pt|zh|zh_Hant|bg|ca|cs|da|el|fa|fi|fil|hi|hr|hu|id|lt|lv|ro|sk|sl|sr|sv|uk|vi|he|nb|et|ms|am|bn|gu|is|kn|ml|mr|sw|ta|te|ur|eu|gl|af|zu|en_GB|es_419|pt_PT|fr_CA|zh_Hant_HK"; 97 private static final HashSet<String> REGION_LOCALES = new HashSet<String>(Arrays.asList(stock.split("\\|"))); 98 99 final static Options myOptions = new Options("In normal usage, you set the -t option for the target.") 100 .add("target", ".*", CLDRPaths.TMP_DIRECTORY + "dropbox/xmb/", 101 "The target directory for building. Will generate an English .xmb file, and .wsb files for other languages.") 102 .add( 103 "file", 104 ".*", 105 stock, 106 "Filter the information based on file name, using a regex argument. The '.xml' is removed from the file before filtering") 107 // "^(sl|fr)$", 108 .add("path", ".*", "Filter the information based on path name, using a regex argument") 109 // "dates.*(pattern|available)", 110 .add("content", ".*", "Filter the information based on content name, using a regex argument") 111 .add("jason", ".*", "Generate JSON versions instead") 112 .add("zone", null, "Show metazoneinfo and exit") 113 .add("wsb", ".*", "Show metazoneinfo and exit") 114 .add("kompare", ".*", CLDRPaths.BASE_DIRECTORY + "../DATA/cldr/common/google-bulk-imports", 115 "Compare data with directory; generate files in -target.") 116 .add("project_name", 'n', ".*", "CLDR", "The ID of the project."); 117 118 static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance(); 119 // static Matcher contentMatcher; 120 static Matcher pathMatcher; 121 static RegexLookup<String> pathFindRemover = new RegexLookup<String>().loadFromFile(GenerateXMB.class, 122 "xmbSkip.txt");; // .compile("//ldml/dates/calendars/calendar\\[@type=\"(?!gregorian).*").matcher(""); 123 static PrettyPath prettyPath = new PrettyPath(); 124 static int errors = 0; 125 static Relation<String, String> path2errors = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 126 127 // enum Handling {SKIP}; 128 static final Matcher datePatternMatcher = PatternCache.get("dates.*(pattern|available)").matcher(""); 129 130 public static final boolean DEBUG = false; 131 132 private static final HashSet<String> SKIP_LOCALES = new HashSet<String>( 133 Arrays.asList(new String[] { "en", "root" })); 134 135 public static String DTD_VERSION; 136 137 private static String projectId; 138 139 enum PlaceholderType { 140 BRACES, // e.g. {NAME} 141 XML, // e.g. <ph name='NAME' /> 142 XML_EXAMPLE // e.g. <ph name='NAME' /><ex>EXAMPLE</ex>{0}</ph> 143 }; 144 main(String[] args)145 public static void main(String[] args) throws Exception { 146 myOptions.parse(args, true); 147 Option option; 148 option = myOptions.get("zone"); 149 if (option.doesOccur()) { 150 showMetazoneInfo(); 151 return; 152 } 153 option = myOptions.get("file"); 154 String fileMatcherString = option.getValue(); 155 option = myOptions.get("content"); 156 Matcher contentMatcher = option.doesOccur() ? PatternCache.get(option.getValue()).matcher("") : null; 157 option = myOptions.get("path"); 158 pathMatcher = option.doesOccur() ? PatternCache.get(option.getValue()).matcher("") : null; 159 160 String targetDir = myOptions.get("target").getValue(); 161 countFile = FileUtilities.openUTF8Writer(targetDir + "/log/", "counts.txt"); 162 163 Factory cldrFactory1 = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 164 CLDRFile english = cldrFactory1.make("en", true); 165 CLDRFile englishTop = cldrFactory1.make("en", false); 166 DTD_VERSION = englishTop.getDtdVersion(); 167 168 CLDRFile root = cldrFactory1.make("en", true); 169 170 showDefaultContents(targetDir, english); 171 EnglishInfo englishInfo = new EnglishInfo(targetDir, english, root); 172 173 option = myOptions.get("kompare"); 174 if (option.doesOccur()) { 175 compareDirectory = option.getValue(); 176 compareFiles(fileMatcherString, contentMatcher, targetDir, cldrFactory1, english, englishInfo); 177 return; 178 } 179 180 if (myOptions.get("wsb").doesOccur()) { 181 displayWsb(myOptions.get("wsb").getValue(), englishInfo); 182 return; 183 } 184 185 projectId = myOptions.get("project_name").getValue(); 186 187 writeFile(targetDir, "en", englishInfo, english, true, false); 188 writeFile(targetDir + "/filtered/", "en", englishInfo, english, true, true); 189 190 // TODO: 191 // Replace {0}... with placeholders (Mostly done, but need better examples) 192 // Replace datetime fields (MMM, L, ...) with placeholders 193 // Skip items that we don't need translated (most language names, script names, deprecated region names, etc. 194 // Add descriptions 195 // Add pages with detailed descriptions, and links from the descriptions 196 // Represent the items with count= as ICUSyntax 197 // Filter items that we don't want to get translated, and add others that we need even if not in English 198 // Rewire items that are in undistinguished attributes 199 // Test each xml file for validity 200 // Generate strings that let the user choose the placeholder style hh vs HH,...??? 201 202 Factory cldrFactory2 = Factory.make(CLDRPaths.MAIN_DIRECTORY, fileMatcherString); 203 LanguageTagParser ltp = new LanguageTagParser(); 204 205 for (String file : cldrFactory2.getAvailable()) { 206 if (SKIP_LOCALES.contains(file)) { 207 continue; 208 } 209 210 // skip all locales with regions (with certain exceptions) 211 if (ltp.set(file).getRegion().length() != 0) { 212 if (!REGION_LOCALES.contains(file)) { 213 continue; 214 } 215 } 216 217 // skip anything without plural rules 218 final PluralInfo plurals = supplementalDataInfo.getPlurals(file, false); 219 if (plurals == null) { 220 System.out.println("Skipping " + file + ", no plural rules"); 221 continue; 222 } 223 224 CLDRFile cldrFile = cldrFactory2.make(file, true); 225 writeFile(targetDir + "/wsb/", file, englishInfo, cldrFile, false, false); 226 writeFile(targetDir + "/wsb/filtered/", file, englishInfo, cldrFile, false, true); 227 countFile.flush(); 228 } 229 countFile.close(); 230 PrintWriter errorFile = FileUtilities.openUTF8Writer(targetDir + "/log/", "errors.txt"); 231 for (Entry<String, Set<String>> entry : path2errors.keyValuesSet()) { 232 errorFile.println(entry); 233 } 234 errorFile.close(); 235 System.out.println("Errors: " + (errors + path2errors.size())); 236 } 237 compareFiles(String fileMatcherString, Matcher contentMatcher, String targetDir, Factory cldrFactory1, CLDRFile english, EnglishInfo englishInfo)238 private static void compareFiles(String fileMatcherString, Matcher contentMatcher, String targetDir, 239 Factory cldrFactory1, CLDRFile english, 240 EnglishInfo englishInfo) throws IOException { 241 SubmittedPathFixer fixer = new SubmittedPathFixer(); 242 Factory cldrFactory2 = Factory.make(compareDirectory, fileMatcherString); 243 PrintWriter output = null; 244 PrintWriter log = FileUtilities.openUTF8Writer(targetDir + "/log/", "skipped.txt"); 245 246 for (String file : cldrFactory2.getAvailable()) { 247 // System.out.println("Checking " + file); 248 CLDRFile submitted = cldrFactory2.make(file, false); 249 CLDRFile trunk = cldrFactory1.make(file, true); 250 for (String path : With.in(submitted.iterator(null, submitted.getComparator()))) { 251 if (pathMatcher != null && !pathMatcher.reset(path).matches()) { 252 continue; 253 } 254 String submittedValue = submitted.getStringValue(path); 255 if (contentMatcher != null && !contentMatcher.reset(submittedValue).matches()) { 256 continue; 257 } 258 PathStatus pathStatus = shouldSkipPath(path, submittedValue); 259 if (pathStatus == PathStatus.SKIP) { 260 continue; 261 } 262 263 // fix alt 264 String trunkPath = fixer.fix(path, false); 265 String trunkValue = trunk.getStringValue(trunkPath); 266 if (CharSequences.equals(submittedValue, trunkValue)) { 267 continue; 268 } 269 if (output == null) { 270 output = FileUtilities.openUTF8Writer(targetDir, file + ".txt"); 271 output.println("ID\tEnglish\tSource\tRelease\tDescription"); 272 } 273 String englishValue = english.getStringValue(trunkPath); 274 final PathInfo pathInfo = englishInfo.getPathInfo(trunkPath); 275 String description; 276 if (pathInfo == null) { 277 log.println(file + "\tDescription unavailable for " + trunkPath); 278 errors++; 279 String temp = fixer.fix(path, true); 280 englishInfo.getPathInfo(trunkPath); 281 continue; 282 } else { 283 description = pathInfo.getDescription(); 284 } 285 long id = StringId.getId(trunkPath); 286 if (englishValue == null) { 287 log.println(file + "\tEmpty English for " + trunkPath); 288 errors++; 289 continue; 290 } 291 output.println(id + "\t" + ssquote(englishValue, false) + "\t" + ssquote(submittedValue, false) + "\t" 292 + ssquote(trunkValue, true) + "\t" + description); 293 } 294 if (output != null) { 295 output.close(); 296 output = null; 297 } 298 log.flush(); 299 } 300 log.close(); 301 } 302 303 static Output<String[]> matches = new Output<String[]>(); 304 static List<String> failures = new ArrayList<String>(); 305 static Output<Finder> matcherFound = new Output<Finder>(); 306 307 enum PathStatus { 308 SKIP, KEEP, MAYBE 309 } 310 shouldSkipPath(String path, String value)311 public static PathStatus shouldSkipPath(String path, String value) { 312 // skip if 313 List<String> myFailures = null; 314 if (false && path.contains("currencies") && path.contains("symbol")) { 315 myFailures = failures; 316 } 317 String skipPath = pathFindRemover.get(path, null, matches, matcherFound, myFailures); 318 if (myFailures != null && failures.size() != 0) { 319 System.out.println("Failures\n\t" + CollectionUtilities.join(failures, "\n\t")); 320 failures.clear(); 321 } 322 if (skipPath == null || skipPath.equals("MAYBE")) { 323 return PathStatus.MAYBE; 324 } else if (skipPath.equals("VALUE")) { 325 return value.equals(matches.value[1]) ? PathStatus.SKIP : PathStatus.MAYBE; 326 } else if (skipPath.equals("SKIP")) { 327 return PathStatus.SKIP; 328 } else if (skipPath.equals("KEEP")) { 329 return PathStatus.KEEP; 330 } 331 throw new IllegalArgumentException("Unexpected xmbSkip.txt value: " + skipPath); 332 } 333 ssquote(String englishValue, boolean showRemoved)334 private static String ssquote(String englishValue, boolean showRemoved) { 335 if (englishValue == null) { 336 return showRemoved ? "[removed]" : "[empty]"; 337 } 338 englishValue = englishValue.replace("\"", """); 339 return englishValue; 340 } 341 342 static class SubmittedPathFixer { 343 private static final Pattern PATH_FIX = PatternCache.get("\\[@alt=\"" + 344 "(?:proposed|((?!proposed)[-a-zA-Z0-9]*)-proposed)" + 345 "-u\\d+-implicit[0-9.]+" + 346 "(?:-proposed-u\\d+-implicit[0-9.]+)?" + // NOTE: we allow duplicated alt values because of a generation 347 // bug. 348 // -proposed-u971-implicit2.0 349 "\"]"); 350 static Matcher pathFix = PATH_FIX.matcher(""); 351 fix(String path, boolean debug)352 public String fix(String path, boolean debug) { 353 if (pathFix.reset(path).find()) { 354 if (debug) { 355 // debug in case we get a mismatch 356 String temp = "REGEX:\t" + 357 RegexUtilities.showMismatch(PATH_FIX, path.substring(pathFix.start(0))); 358 } 359 final String group = pathFix.group(1); 360 String replacement = group == null ? "" : "[@alt=\"" + group + "\"]"; 361 String trunkPath = path.substring(0, pathFix.start(0)) + replacement + path.substring(pathFix.end(0)); 362 // HACK because of change in CLDR defaults 363 if (trunkPath.startsWith("//ldml/numbers/symbols/")) { 364 trunkPath = "//ldml/numbers/symbols[@numberSystem=\"latn\"]/" 365 + trunkPath.substring("//ldml/numbers/symbols/".length()); 366 } 367 return trunkPath; 368 } 369 return path; 370 } 371 372 } 373 showDefaultContents(String targetDir, CLDRFile english)374 private static void showDefaultContents(String targetDir, CLDRFile english) throws IOException { 375 PrintWriter out = FileUtilities.openUTF8Writer(targetDir + "/log/", "locales.txt"); 376 String[] locales = stock.split("\\|"); 377 Set<R2<String, String>> sorted = new TreeSet<R2<String, String>>(); 378 for (String locale : locales) { 379 if (locale.isEmpty()) continue; 380 String name = english.getName(locale); 381 R2<String, String> row = Row.of(name, locale); 382 sorted.add(row); 383 } 384 Set<String> defaultContents = supplementalDataInfo.getDefaultContentLocales(); 385 386 for (R2<String, String> row : sorted) { 387 String locale = row.get1(); 388 String dlocale = getDefaultContentLocale(locale, defaultContents); 389 out.println(row.get0() + "\t" + locale + "\t" + english.getName(dlocale) + "\t" + dlocale); 390 } 391 out.close(); 392 } 393 getDefaultContentLocale(String locale, Set<String> defaultContents)394 private static String getDefaultContentLocale(String locale, Set<String> defaultContents) { 395 String best = null; 396 for (String s : defaultContents) { 397 if (s.startsWith(locale)) { 398 if (best == null) { 399 best = s; 400 } else if (s.length() < best.length()) { 401 best = s; 402 } 403 } 404 } 405 if (best == null) { 406 return locale; 407 } 408 return best; 409 } 410 411 static final Pattern COUNT_OR_ALT_ATTRIBUTE = PatternCache.get("\\[@(count)=\"([^\"]*)\"]"); 412 static final Pattern PLURAL_XPATH = Pattern 413 .compile("//ldml/(units/unit|numbers/(decimal|currency)Formats).*\\[@count=\"\\w+\"].*"); 414 static final Pattern SKIP_EXEMPLAR_TEST = PatternCache.get( 415 "/(currencySpacing" 416 + "|hourFormat" 417 + "|exemplarCharacters" 418 + "|pattern" 419 + "|localizedPatternChars" 420 + "|segmentations" 421 + "|dateFormatItem" 422 + "|references" 423 + "|unitPattern" 424 + "|intervalFormatItem" 425 + "|localeDisplayNames/variants/" 426 + "|commonlyUsed" 427 + "|currency.*/symbol" 428 + "|symbols/(exponential|nan))"); 429 430 static final Matcher skipExemplarTest = SKIP_EXEMPLAR_TEST.matcher(""); 431 static final UnicodeSet ASCII_LATIN = new UnicodeSet("[A-Za-z]").freeze(); 432 static final UnicodeSet LATIN = new UnicodeSet("[:sc=Latn:]").freeze(); 433 434 static final Matcher keepFromRoot = PatternCache.get("/(exemplarCity|currencies/currency.*/symbol)").matcher(""); 435 static final Matcher currencyDisplayName = Pattern 436 .compile("/currencies/currency\\[@type=\"([^\"]*)\"]/displayName").matcher(""); 437 writeFile(String targetDir, String localeId, EnglishInfo englishInfo, CLDRFile cldrFile, boolean isEnglish, boolean filter)438 private static void writeFile(String targetDir, String localeId, EnglishInfo englishInfo, CLDRFile cldrFile, 439 boolean isEnglish, boolean filter) throws IOException { 440 441 String extension = "xml"; 442 XPathParts xpathParts = new XPathParts(); 443 Relation<String, String> reasonsToPaths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 444 Set<String> seenStarred = new HashSet<String>(); 445 446 Relation<String, Row.R2<PathInfo, String>> countItems = Relation.of( 447 new TreeMap<String, Set<Row.R2<PathInfo, String>>>(), TreeSet.class); 448 Matcher countMatcher = COUNT_OR_ALT_ATTRIBUTE.matcher(""); 449 int lineCount = 0; 450 int wordCount = 0; 451 int messageCount = 0; 452 453 StringWriter buffer = new StringWriter(); 454 PrintWriter out1 = new PrintWriter(buffer); 455 StringWriter buffer3 = new StringWriter(); 456 PrintWriter out3 = new PrintWriter(buffer3); 457 UnicodeSet exemplars = getExemplars(cldrFile); 458 459 for (PathInfo pathInfo : englishInfo) { 460 if (false && pathInfo.id == 46139888945574604L) { // for debugging 461 System.out.println("?"); 462 } 463 String path = pathInfo.getPath(); 464 String value; 465 if (isEnglish) { 466 value = pathInfo.englishValue; 467 } else { 468 value = cldrFile.getStringValue(path); 469 } 470 // Remove quotes from number formats (we'll put them back in during 471 // post-processing). 472 // TODO: we should actually call daip.processForDisplay() here, but 473 // it does more stuff than we need it to do, e.g. stripping the 474 // brackets from exemplarCharacters. 475 if (DisplayAndInputProcessor.NUMBER_FORMAT_XPATH.matcher(path).matches()) { 476 value = value.replace("'", ""); 477 } 478 479 // skip root if not English 480 if (!isEnglish && value != null && !keepFromRoot.reset(path).find()) { // note that mismatched script will 481 // be checked later 482 String locale = cldrFile.getSourceLocaleID(path, null); 483 if (locale.equals("root")) { 484 reasonsToPaths.put("root", path + "\t" + value); 485 continue; 486 } 487 if (locale.equals(XMLSource.CODE_FALLBACK_ID)) { 488 reasonsToPaths.put("codeFallback", path + "\t" + value); 489 continue; 490 } 491 } 492 boolean hasPlurals = PLURAL_XPATH.matcher(path).matches(); 493 if (filter && !hasPlurals) { 494 String starred = pathInfo.getStarredPath(); 495 if (seenStarred.contains(starred)) { 496 continue; 497 } 498 seenStarred.add(starred); 499 } 500 if (value == null) { 501 reasonsToPaths.put("missing", path + " " + value); 502 continue; 503 } 504 if (!isEnglish) { 505 String fullPath = cldrFile.getFullXPath(path); 506 if (fullPath.contains("draft")) { 507 xpathParts.set(fullPath); 508 String draftValue = xpathParts.getAttributeValue(-1, "draft"); 509 if (!draftValue.equals("contributed")) { 510 reasonsToPaths.put(draftValue, path + "\t" + value); 511 continue; 512 } 513 } 514 } 515 if (!isEnglish 516 && !exemplars.containsAll(value) 517 && !skipExemplarTest.reset(path).find()) { 518 // check for special cases in currency names. If the code itself occurs in the name, that's ok 519 // ldml/numbers/currencies/currency[@type="XXX"]/displayName 520 boolean bad = true; 521 if (currencyDisplayName.reset(path).find()) { 522 String code = currencyDisplayName.group(1); 523 String value2 = value.replace(code, ""); 524 bad = !exemplars.containsAll(value2); 525 } 526 if (bad) { 527 UnicodeSet diff = new UnicodeSet().addAll(value).removeAll(exemplars); 528 reasonsToPaths.put("exemplars", path + "\t" + value + "\t" + diff); 529 continue; 530 } 531 } 532 // String fullPath = cldrFile.getStringValue(path); 533 // //ldml/units/unit[@type="day"]/unitPattern[@count="one"] 534 if (hasPlurals) { 535 countMatcher.reset(path).find(); 536 String countLessPath = countMatcher.replaceAll(""); 537 countItems.put(countLessPath, Row.of(pathInfo, value)); 538 continue; 539 } 540 if (!isEnglish && pathInfo.changedEnglish) { 541 reasonsToPaths.put("changed-english", path); 542 } else { 543 writePathInfo(out1, pathInfo, value, isEnglish); 544 messageCount++; 545 } 546 if (isEnglish) { 547 writeJavaInfo(out3, pathInfo.getStringId(), pathInfo.getPath(), value); 548 } 549 wordCount += pathInfo.wordCount; 550 ++lineCount; 551 } 552 R2<Integer, Integer> lineWordCount = writeCountPathInfo(out1, out3, cldrFile.getLocaleID(), countItems, 553 isEnglish, filter); 554 messageCount += lineWordCount.get0(); 555 lineCount += lineWordCount.get0(); 556 wordCount += lineWordCount.get1(); 557 if (!filter && countItems.size() != lineWordCount.get0().intValue()) { 558 System.out.println(localeId + "\t" + countItems.size() + "\t" + lineWordCount.get0().intValue()); 559 } 560 out1.flush(); 561 out3.flush(); 562 563 String file = LanguageCodeConverter.toGoogleLocaleId(localeId); 564 String localeName = englishInfo.getName(localeId); 565 PrintWriter out = FileUtilities.openUTF8Writer(targetDir, file + "." + extension); 566 567 if (isEnglish) { 568 FileCopier.copy(GenerateXMB.class, "xmb-dtd.xml", out); 569 // FileUtilities.appendFile(GenerateXMB.class, "xmb-dtd.xml", out); 570 out.println("<!-- " + localeName + " -->"); 571 out.println("<messagebundle class='" + projectId + "'> <!-- version: " + DTD_VERSION + ", date: " + DATE 572 + " -->"); 573 out.println(buffer.toString()); 574 out.println("</messagebundle>"); 575 576 PrintWriter out3File = FileUtilities.openUTF8Writer(targetDir, "IdToPath.java"); 577 out3File.println("package org.unicode.cldr.tool;"); 578 out3File.println(); 579 out3File.println("import java.util.HashMap;"); 580 out3File.println(); 581 out3File.println("/**"); 582 out3File.println(" * Autogenerated by GenerateXMB for use by ConvertXTB."); 583 out3File.println(" * Do not manually edit this file."); 584 out3File.println(" */"); 585 out3File.println("public class IdToPath {"); 586 out3File.println(" static final HashMap<String,String> map = new HashMap<String,String>();"); 587 out3File.println(" public static String getPath(String id) {"); 588 out3File.println(" return map.get(id);"); 589 out3File.println(" }"); 590 out3File.println(" static {"); 591 out3File.println(" String[][] data = {"); 592 out3File.println(buffer3); 593 out3File.println(" };"); 594 out3File.println(" for (String[] pair : data) {"); 595 out3File.println(" map.put(pair[0], pair[1]);"); 596 out3File.println(" }"); 597 out3File.println(" }"); 598 out3File.println("}"); 599 out3File.close(); 600 } else { 601 602 // FileUtilities.appendFile(GenerateXMB.class, "wsb-dtd.xml", out); 603 FileCopier.copy(GenerateXMB.class, "wsb-dtd.xml", out); 604 out.println("<!-- " + localeName + " -->"); 605 out.println("<worldserverbundles lazarus_id='dummy' date='" + DATE + "'> <!-- version: " + DTD_VERSION 606 + " -->"); 607 out.println(" <worldserverbundle project_id='" + projectId + "' message_count='" + messageCount + "'>"); 608 out.println(buffer.toString()); 609 out.println(" </worldserverbundle>"); 610 out.println("</worldserverbundles>"); 611 } 612 out.close(); 613 QuickCheck.check(new File(targetDir, file + "." + extension)); 614 if (!filter) { 615 countFile.println(file + "\t" + lineCount + "\t" + wordCount); 616 } 617 if (!isEnglish && !filter) { 618 writeReasons(reasonsToPaths, targetDir, file); 619 } 620 } 621 writeJavaInfo(PrintWriter out3, String id, String path, String value)622 private static void writeJavaInfo(PrintWriter out3, String id, String path, String value) { 623 out3.println(" {\"" + id + "\",\"" + path.replace("\"", "\\\"") + "\",\"" 624 + value.replace("\\", "\\\\").replace("\"", "\\\"") + "\"},"); 625 } 626 getExemplars(CLDRFile cldrFile)627 private static UnicodeSet getExemplars(CLDRFile cldrFile) { 628 UnicodeSet exemplars = cldrFile.getExemplarSet("", CLDRFile.WinningChoice.WINNING); 629 boolean isLatin = exemplars.containsSome(ASCII_LATIN); 630 exemplars.addAll(CheckExemplars.AlwaysOK); 631 UnicodeSet auxExemplars = cldrFile.getExemplarSet("auxiliary", CLDRFile.WinningChoice.WINNING); 632 if (auxExemplars != null) { 633 exemplars.addAll(auxExemplars); 634 } 635 if (!isLatin) { 636 exemplars.removeAll(LATIN); 637 } 638 exemplars.freeze(); 639 return exemplars; 640 } 641 642 static final Pattern COUNT_ATTRIBUTE = PatternCache.get("\\[@count=\"([^\"]*)\"]"); 643 static final Pattern PLURAL_NUMBER = PatternCache.get("(decimal|number)Format"); 644 writeCountPathInfo(PrintWriter out, PrintWriter out3, String locale, Relation<String, R2<PathInfo, String>> countItems, boolean isEnglish, boolean filter)645 private static Row.R2<Integer, Integer> writeCountPathInfo(PrintWriter out, PrintWriter out3, String locale, 646 Relation<String, R2<PathInfo, String>> countItems, boolean isEnglish, boolean filter) { 647 Matcher m = COUNT_ATTRIBUTE.matcher(""); 648 int wordCount = 0; 649 PluralInfo pluralInfo = supplementalDataInfo.getPlurals(locale); 650 int lineCount = 0; 651 Set<String> errorSet = new LinkedHashSet<String>(); 652 for (Entry<String, Set<R2<PathInfo, String>>> entry : countItems.keyValuesSet()) { 653 String countLessPath = entry.getKey(); 654 Map<String, String> fullValues = new TreeMap<String, String>(); 655 PathInfo pathInfo = null; 656 String value = null; 657 for (R2<PathInfo, String> entry2 : entry.getValue()) { 658 PathInfo pathInfoN = entry2.get0(); 659 m.reset(pathInfoN.getPath()).find(); 660 String count = m.group(1); 661 if (count.equals("other")) { 662 pathInfo = pathInfoN; 663 } 664 value = entry2.get1(); 665 fullValues.put(count, value); 666 } 667 if (pathInfo == null) { 668 continue; 669 } 670 if (fullValues.size() < 2) { 671 // if we don't have two count values, skip 672 System.out.println(locale + "\tMust have 2 count values: " + entry.getKey()); 673 continue; 674 } 675 String fullPlurals = showPlurals(fullValues, locale, pathInfo, pluralInfo, isEnglish, errorSet); 676 if (fullPlurals == null) { 677 System.out.println(locale + "\tCan't format plurals for: " + entry.getKey() + "\t" + errorSet); 678 errors++; 679 continue; 680 } 681 682 out.println(); 683 out.println(" <!-- " 684 // + prettyPath.getPrettyPath(pathInfo.getPath(), false) + " ; " 685 + countLessPath + " -->"); 686 out.println(" <msg id='" + pathInfo.getStringId() + "' desc='" + pathInfo.description + "'"); 687 out.println(" >" + fullPlurals + "</msg>"); 688 // Use the last plural value in the loop because we only need it for example purposes. 689 writeJavaInfo(out3, pathInfo.getStringId(), countLessPath, value); 690 // if (!isEnglish || pathInfo.placeholderReplacements != null) { 691 // out.println("\t<!-- English original:\t" + pathInfo.getEnglishValue() + "\t-->"); 692 // } 693 out.flush(); 694 ++lineCount; 695 wordCount += pathInfo.wordCount * 3; 696 if (filter) { 697 break; 698 } 699 } 700 return Row.of(lineCount, wordCount); 701 } 702 703 static final String[] PLURAL_KEYS = { "=0", "=1", "zero", "one", "two", "few", "many", "other" }; 704 static final String[] EXTRA_PLURAL_KEYS = { "0", "1", "zero", "one", "two", "few", "many" }; 705 showPlurals(Map<String, String> values, String locale, PathInfo pathInfo, PluralInfo pluralInfo, boolean isEnglish, Set<String> errorSet)706 private static String showPlurals(Map<String, String> values, 707 String locale, PathInfo pathInfo, PluralInfo pluralInfo, 708 boolean isEnglish, Set<String> errorSet) { 709 errorSet.clear(); 710 /* 711 * Desired output for English XMB 712 * <msg desc= 713 * "[ICU Syntax] Plural forms for a number of hours. These are special messages: before translating, see cldr.org/translation/plurals." 714 * > 715 * {LENGTH, select, 716 * abbreviated { 717 * {NUMBER_OF_HOURS, plural, 718 * =0 {0 hrs} 719 * =1 {1 hr} 720 * zero {# hrs} 721 * one {# hrs} 722 * two {# hrs} 723 * few {# hrs} 724 * many {# hrs} 725 * other {# hrs}}} 726 * full { 727 * {NUMBER_OF_HOURS, plural, 728 * =0 {0 hours} 729 * =1 {1 hour} 730 * zero {# hours} 731 * one {# hours} 732 * two {# hours} 733 * few {# hours} 734 * many {# hours} 735 * other {# hours}}}} 736 * </msg> 737 * 738 * NOTE: For the WSB, the format has to match the following, WITHOUT LFs 739 * 740 * <msg id='1431840205484292448' desc='[ICU Syntax] who is viewing? This message requires special attention. 741 * Please follow the instructions here: 742 * https://sites.google.com/a/google.com/localization-info-site/Home/training/icusyntax'> 743 * <ph name='[PLURAL_NUM_USERS_OFFSET_1]' ex='Special placeholder used in [ICU Syntax] messages, see 744 * instructions page.'/> 745 * <ph name='[=0]'/>No one else is viewing. 746 * <ph name='[=1]'/><ph name='USERNAME' ex='Bob'/> is viewing. 747 * <ph name='[=2]'/><ph name='USERNAME' ex='Bob'/> and one other are viewing. 748 * <ph name='[ZERO]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 749 * <ph name='[ONE]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 750 * <ph name='[TWO]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 751 * <ph name='[FEW]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 752 * <ph name='[MANY]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 753 * <ph name='[OTHER]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 754 * <ph name='[END_PLURAL]'/> 755 * </msg> 756 */ 757 Matcher matcher = PLURAL_NUMBER.matcher(pathInfo.getPath()); 758 String var = null; 759 if (matcher.find()) { 760 // Plural doesn't use placeholders so create a label. 761 var = matcher.group(1).toUpperCase() + "_NUMBER"; 762 } else { 763 var = pathInfo.getFirstVariable(); 764 } 765 766 StringBuilder result = new StringBuilder(); 767 if (isEnglish) { 768 result.append('{') 769 // .append("PLURAL_") 770 .append(var).append(",plural,"); 771 } else { 772 result.append("<ph name='[PLURAL_").append(var).append("]'/>"); // ex='Special placeholder used in [ICU 773 // Syntax] messages, see instructions page.' 774 } 775 for (String key : PLURAL_KEYS) { 776 String value; 777 String coreKey = key.startsWith("=") ? key.substring(1, 2) : key; 778 value = values.get(coreKey); 779 if (value == null) { 780 if (key.startsWith("=")) { 781 String stringCount = key.substring(1); 782 // handle both =x case, and the category 783 int intCount = Integer.parseInt(stringCount); 784 Count count = pluralInfo.getCount(intCount); 785 value = values.get(count.toString()); 786 if (value == null) { 787 errorSet.add("Bad key/value " + key + "='" + value + "' in " + values); 788 return null; 789 } 790 value = value.replace("{0}", stringCount); 791 } else { 792 value = values.get("other"); 793 if (value == null) { 794 errorSet.add("No 'other' value in " + values); 795 return null; 796 } 797 } 798 } 799 String newValue = MessageFormat.format(MessageFormat.autoQuoteApostrophe(value), 800 new Object[] { key.startsWith("=") ? key.substring(1, 2) : "#" }); 801 PlaceholderType type = isEnglish ? PlaceholderType.BRACES : PlaceholderType.XML; 802 newValue = pathInfo.transformValue(newValue, type); 803 if (isEnglish) { 804 result.append("\n ").append(key).append(" {").append(newValue).append('}'); 805 } else { 806 String prefix = key.toUpperCase(Locale.ENGLISH); 807 result.append("<!--\n --><ph name='[").append(prefix).append("]'/>").append(newValue); 808 } 809 } 810 if (isEnglish) { 811 result.append('}'); 812 } else { 813 result.append("<!--\n --><ph name='[END_PLURAL]'/>"); 814 } 815 return result.toString(); 816 } 817 writePathInfo(PrintWriter out, PathInfo pathInfo, String value, boolean isEnglish)818 private static void writePathInfo(PrintWriter out, PathInfo pathInfo, String value, boolean isEnglish) { 819 out.println(); 820 out.println(" <!-- " + pathInfo.getPath() + " -->"); 821 out.println(" <msg id='" + pathInfo.getStringId() + "' desc='" + pathInfo.description + "'"); 822 PlaceholderType type = isEnglish ? PlaceholderType.XML_EXAMPLE : PlaceholderType.XML; 823 String transformValue = pathInfo.transformValue(value, type); 824 out.println(" >" + transformValue + "</msg>"); 825 value = TransliteratorUtilities.toHTML.transform(value); 826 if (!value.equals(transformValue) && (!isEnglish || pathInfo.placeholders != null)) { 827 out.println(" <!-- English original: " + value + " -->"); 828 } 829 out.flush(); 830 } 831 writeReasons(Relation<String, String> reasonsToPaths, String targetDir, String filename)832 private static void writeReasons(Relation<String, String> reasonsToPaths, String targetDir, String filename) 833 throws IOException { 834 targetDir += "/skipped/"; 835 filename += ".txt"; 836 PrintWriter out = FileUtilities.openUTF8Writer(targetDir, filename); 837 out.println("# " + DATE); 838 for (Entry<String, Set<String>> reasonToSet : reasonsToPaths.keyValuesSet()) { 839 for (String path : reasonToSet.getValue()) { 840 out.println(reasonToSet.getKey() + " " + path); 841 } 842 } 843 out.close(); 844 } 845 846 static class PathInfo implements Comparable<PathInfo> { 847 private static final Pattern PLACEHOLDER = PatternCache.get("\\{(\\d)}"); 848 849 private final String path; 850 private final Long id; 851 private final String stringId; 852 private final String englishValue; 853 private final boolean changedEnglish; 854 private final Map<String, PlaceholderInfo> placeholders; 855 private final String description; 856 private final String starredPath; 857 private final int wordCount; 858 859 private static final BreakIterator bi = BreakIterator.getWordInstance(ULocale.ENGLISH); 860 private static final UnicodeSet ALPHABETIC = new UnicodeSet("[:Alphabetic:]"); 861 PathInfo(String path, String englishValue, boolean changedEnglish, Map<String, PlaceholderInfo> placeholders, String description, String starredPath)862 public PathInfo(String path, String englishValue, boolean changedEnglish, 863 Map<String, PlaceholderInfo> placeholders, 864 String description, String starredPath) { 865 if (DEBUG_PATH != null && path.contains(DEBUG_PATH)) { 866 int x = 0; 867 } 868 if (description == null) { 869 path2errors.put(path, "missing description"); 870 } 871 this.path = path; 872 long id = StringId.getId(path); 873 this.id = id; 874 stringId = String.valueOf(id); 875 this.englishValue = englishValue; 876 this.changedEnglish = changedEnglish; 877 this.placeholders = placeholders; 878 this.description = description == null ? null : description.intern(); 879 this.starredPath = starredPath; 880 // count words 881 int tempCount = 0; 882 bi.setText(englishValue); 883 int start = bi.first(); 884 for (int end = bi.next(); end != BreakIterator.DONE; start = end, end = bi.next()) { 885 String word = englishValue.substring(start, end); 886 if (ALPHABETIC.containsSome(word)) { 887 ++tempCount; 888 } 889 } 890 wordCount = tempCount == 0 ? 1 : tempCount; 891 } 892 getFirstVariable()893 public String getFirstVariable() { 894 // ... name='FIRST_PART_OF_TEXT' ... 895 PlaceholderInfo info = placeholders.get("{0}"); 896 if (info == null) { 897 throw new IllegalArgumentException("Missing {0} for " + this); 898 } 899 return info.name; 900 } 901 getPath()902 public String getPath() { 903 return path; 904 } 905 getId()906 public Long getId() { 907 return id; 908 } 909 getStringId()910 public String getStringId() { 911 return stringId; 912 } 913 getEnglishValue()914 public String getEnglishValue() { 915 return englishValue; 916 } 917 getDescription()918 public String getDescription() { 919 return description; 920 } 921 getStarredPath()922 public String getStarredPath() { 923 return starredPath; 924 } 925 getPlaceholderReplacementsToOriginal()926 public Map<String, String> getPlaceholderReplacementsToOriginal() { 927 if (placeholders == null) return null; 928 Map<String, String> placeholderOutput = new LinkedHashMap<String, String>(); 929 for (String id : placeholders.keySet()) { 930 placeholderOutput.put(id, getPlaceholderWithExample(id)); 931 } 932 return placeholderOutput; 933 } 934 getPlaceholderWithExample(String placeholder)935 private String getPlaceholderWithExample(String placeholder) { 936 PlaceholderInfo info = placeholders.get(placeholder); 937 // <ph name='x'><ex>xxx</ex>yyy</ph> 938 return "<ph name='" + info.name + "'><ex>" + info.example + "</ex>" + placeholder + "</ph>"; 939 } 940 941 // static DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser(); 942 transformValue(String value, PlaceholderType type)943 private String transformValue(String value, PlaceholderType type) { 944 value = TransliteratorUtilities.toHTML.transform(value); 945 if (placeholders == null) return value; 946 947 String placeholderFormat = ""; 948 switch (type) { 949 case BRACES: 950 placeholderFormat = "'{'{0}'}'"; 951 break; 952 case XML: 953 placeholderFormat = "<ph name=''[{0}]'' />"; 954 break; 955 case XML_EXAMPLE: 956 placeholderFormat = "<ph name=''{0}''><ex>{1}</ex>'{'{2}'}'</ph>"; 957 break; 958 } 959 Matcher matcher = PLACEHOLDER.matcher(value); 960 StringBuffer buffer = new StringBuffer(); 961 int start = 0; 962 while (matcher.find()) { 963 buffer.append(value.substring(start, matcher.start())); 964 PlaceholderInfo info = placeholders.get(matcher.group()); 965 buffer.append(MessageFormat.format(placeholderFormat, 966 new Object[] { info.name, info.example, matcher.group(1) })); 967 start = matcher.end(); 968 } 969 buffer.append(value.substring(start)); 970 return buffer.toString(); 971 } 972 replacePlaceholders(String value, String placeholderStart, String placeholderEnd)973 private String replacePlaceholders(String value, String placeholderStart, String placeholderEnd) { 974 Matcher matcher = PLACEHOLDER.matcher(value); 975 StringBuffer buffer = new StringBuffer(); 976 int start = 0; 977 while (matcher.find()) { 978 buffer.append(value.substring(start, matcher.start())); 979 String name = placeholders.get(matcher.group()).name; 980 buffer.append(placeholderStart).append(name).append(placeholderEnd); 981 start = matcher.end(); 982 } 983 buffer.append(value.substring(start)); 984 return buffer.toString(); 985 } 986 987 @Override compareTo(PathInfo arg0)988 public int compareTo(PathInfo arg0) { 989 return path.compareTo(arg0.path); 990 } 991 toString()992 public String toString() { 993 return path; 994 } 995 } 996 997 static class EnglishInfo implements Iterable<PathInfo> { 998 999 final Map<String, PathInfo> pathToPathInfo = new TreeMap<String, PathInfo>(); 1000 final Map<Long, PathInfo> longToPathInfo = new HashMap<Long, PathInfo>(); 1001 final CLDRFile english; 1002 getPathInfo(long hash)1003 PathInfo getPathInfo(long hash) { 1004 return longToPathInfo.get(hash); 1005 } 1006 getName(String localeId)1007 public String getName(String localeId) { 1008 return english.getName(localeId); 1009 } 1010 getPathInfo(String path)1011 PathInfo getPathInfo(String path) { 1012 return pathToPathInfo.get(path); 1013 } 1014 EnglishInfo(String targetDir, CLDRFile english, CLDRFile root)1015 EnglishInfo(String targetDir, CLDRFile english, CLDRFile root) throws Exception { 1016 1017 Map<String, String> oldPathValueMap = ReadXMB.load(CLDRPaths.BASE_DIRECTORY + 1018 "/cldr-tools/org/unicode/cldr/unittest/data/xmb/", 1019 "en.xml"); 1020 1021 PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance(); 1022 1023 this.english = english; 1024 // we don't want the fully resolved paths, but we do want the direct inheritance from root. 1025 //Status status = new Status(); 1026 Map<String, List<Set<String>>> starredPaths = new TreeMap<String, List<Set<String>>>(); 1027 1028 HashSet<String> metazonePaths = new HashSet<String>(); 1029 // ^//ldml/dates/timeZoneNames/metazone\[@type="([^"]*)"] 1030 for (MetazoneInfo metazoneInfo : MetazoneInfo.METAZONE_LIST) { 1031 for (String item : metazoneInfo.getTypes()) { 1032 String path = "//ldml/dates/timeZoneNames/metazone[@type=\"" + metazoneInfo.metazoneId + "\"]" 1033 + item; 1034 metazonePaths.add(path); 1035 } 1036 } 1037 1038 // TODO add short countries 1039 HashSet<String> extraLanguages = new HashSet<String>(); 1040 // ldml/localeDisplayNames/languages/language[@type=".*"] 1041 1042 for (String langId : PathDescription.EXTRA_LANGUAGES) { 1043 String langPath = "//ldml/localeDisplayNames/languages/language[@type=\"" + langId + "\"]"; 1044 extraLanguages.add(langPath); 1045 } 1046 1047 Set<String> sorted = Builder.with(new TreeSet<String>()) 1048 .addAll(english) 1049 .removeAll( 1050 new Transform<String, Boolean>() { 1051 public Boolean transform(String source) { 1052 return source.startsWith("//ldml/dates/timeZoneNames/metazone") ? Boolean.TRUE 1053 : Boolean.FALSE; 1054 } 1055 }) 1056 .get(); 1057 sorted.addAll(metazonePaths); 1058 if (DEBUG) { 1059 TreeSet<String> diffs = new TreeSet<String>(extraLanguages); 1060 diffs.removeAll(sorted); 1061 System.out.println(diffs); 1062 } 1063 sorted.addAll(extraLanguages); 1064 1065 // add the extra Count items. 1066 Map<String, String> extras = new HashMap<String, String>(); 1067 Matcher m = COUNT_ATTRIBUTE.matcher(""); 1068 1069 for (String path : sorted) { 1070 if (path.contains("[@count=\"")) { 1071 m.reset(path).find(); 1072 for (String key : EXTRA_PLURAL_KEYS) { 1073 String path2 = path.substring(0, m.start(1)) + key + path.substring(m.end(1)); 1074 extras.put(path2, path); 1075 } 1076 } 1077 // if (path.contains("ellipsis")) { 1078 // System.out.println(path); 1079 // } 1080 } 1081 sorted.addAll(extras.keySet()); 1082 1083 Relation<String, String> reasonsToPaths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 1084 Set<String> missingDescriptions = new TreeSet<String>(); 1085 //Output<String[]> pathArguments = new Output<String[]>(); 1086 1087 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance("en"); 1088 RegexLookup<Boolean> coverageAllow = new RegexLookup<Boolean>() 1089 .add("^//ldml/localeDisplayNames/keys/key", true) 1090 .add("^//ldml/localeDisplayNames/languages/language\\[@type=\"(jv|zxx|gsw|eo)\"]", true) 1091 .add("^//ldml/localeDisplayNames/scripts/script", true) 1092 .add("^//ldml/localeDisplayNames/types/type", true) 1093 .add( 1094 "^//ldml/dates/calendars/calendar\\[@type=\"[^\"]*\"]/dayPeriods/dayPeriodContext\\[@type=\"format\"]", 1095 true); 1096 1097 // TODO: for each count='other' path, add the other keywords and values 1098 PathDescription pathDescription = new PathDescription(GenerateXMB.supplementalDataInfo, english, extras, 1099 starredPaths, PathDescription.ErrorHandling.SKIP); 1100 1101 for (String path : sorted) { 1102 if (DEBUG_PATH != null && path.contains(DEBUG_PATH)) { 1103 int x = 0; 1104 } 1105 String value = english.getStringValue(path); 1106 Level level = coverageLevel.getLevel(path); 1107 if (value == null) { 1108 value = "[EMPTY]"; 1109 addSkipReasons(reasonsToPaths, "empty-value", level, path, value); 1110 continue; 1111 } 1112 if (pathMatcher != null 1113 && !pathMatcher.reset(path).find()) { 1114 addSkipReasons(reasonsToPaths, "path-parameter", level, path, value); 1115 continue; 1116 } 1117 PathStatus pathStatus = shouldSkipPath(path, value); 1118 if (pathStatus == PathStatus.SKIP) { 1119 addSkipReasons(reasonsToPaths, "path-remove", level, path, value); 1120 continue; 1121 } 1122 1123 if (level.compareTo(Level.MODERN) > 0 && pathStatus != PathStatus.KEEP) { 1124 if (coverageAllow.get(path) == null) { // HACK 1125 addSkipReasons(reasonsToPaths, "coverage", level, path, value); 1126 continue; 1127 } else { 1128 addSkipReasons(reasonsToPaths, "coverage*", level, path, value); 1129 continue; 1130 // System.out.println("Not skipping " + path); 1131 } 1132 } 1133 1134 String description = pathDescription.getDescription(path, value, level, null); 1135 EnumSet<PathDescription.Status> descriptionStatus = pathDescription.getStatus(); 1136 if (!descriptionStatus.isEmpty()) { 1137 addSkipReasons(reasonsToPaths, descriptionStatus.toString(), level, path, value); 1138 description = null; 1139 } else { 1140 description = "[ICU CLDR] " + description; 1141 } 1142 1143 String oldValue = oldPathValueMap.get(path); 1144 boolean changedEnglish = !value.equals(oldValue); 1145 PathInfo row = new PathInfo(path, value, changedEnglish, patternPlaceholders.get(path), description, 1146 pathDescription.getStarredPathOutput()); 1147 1148 if (description == PathDescription.MISSING_DESCRIPTION) { 1149 missingDescriptions.add(pathDescription.getStarredPathOutput()); 1150 } 1151 1152 Long hash = row.getId(); 1153 if (longToPathInfo.containsKey(hash)) { 1154 throw new IllegalArgumentException("Id collision for " 1155 + path + " and " + longToPathInfo.get(hash).getPath()); 1156 } 1157 pathToPathInfo.put(path, row); 1158 longToPathInfo.put(hash, row); 1159 if (value.contains("{0}") && patternPlaceholders.get(path) == null) { 1160 System.out.println("ERROR, no placeholders for {0}...: " + path + " ; " + value); 1161 } 1162 } 1163 1164 PrintWriter out = FileUtilities.openUTF8Writer(targetDir + "/log/", "en-paths.txt"); 1165 out.println("# " + DATE); 1166 for (Entry<String, List<Set<String>>> starredPath : starredPaths.entrySet()) { 1167 out.println(starredPath.getKey() + "\t\t" + starredPath.getValue()); 1168 } 1169 out.close(); 1170 out = FileUtilities.openUTF8Writer(targetDir + "/log/", "en-missingDescriptions.txt"); 1171 out.println("# " + DATE); 1172 for (String starredPath : missingDescriptions) { 1173 // ^//ldml/dates/timeZoneNames/zone\[@type=".*"]/exemplarCity ; ROOT timezone ; The name of a city in: 1174 // {0}. See cldr.org/xxxx. 1175 out.println(toRegexPath(starredPath) + "\t;\tDESCRIPTION\t" + starredPaths.get(starredPath)); 1176 } 1177 out.close(); 1178 writeReasons(reasonsToPaths, targetDir, "en"); 1179 } 1180 toRegexPath(String starredPath)1181 private String toRegexPath(String starredPath) { 1182 String result = starredPath.replace("[", "\\["); 1183 result = result.replace("\".*\"", "\"([^\"]*)\""); 1184 return "^" + result; 1185 } 1186 1187 @Override iterator()1188 public Iterator<PathInfo> iterator() { 1189 return pathToPathInfo.values().iterator(); 1190 } 1191 } 1192 addSkipReasons(Relation<String, String> reasonsToPaths, String descriptionStatus, Level level, String path, String value)1193 static void addSkipReasons(Relation<String, String> reasonsToPaths, String descriptionStatus, Level level, 1194 String path, String value) { 1195 reasonsToPaths.put(descriptionStatus + "\t" + level, path + "\t" + value); 1196 } 1197 1198 // Get Date-Time in milliseconds getDateTimeinMillis(int year, int month, int date)1199 private static long getDateTimeinMillis(int year, int month, int date) { 1200 Calendar cal = Calendar.getInstance(); 1201 cal.set(year, month, date); 1202 return cal.getTimeInMillis(); 1203 } 1204 1205 static final long START_TIME = getDateTimeinMillis(2000, 1, 0); 1206 static final long END_TIME = getDateTimeinMillis(2015, 1, 0); 1207 static final long DELTA_TIME = 15 * 60 * 1000; 1208 static final long MIN_DAYLIGHT_PERIOD = 90L * 24 * 60 * 60 * 1000; 1209 1210 static final Set<String> HAS_DAYLIGHT; 1211 static { 1212 Set<String> hasDaylightTemp = new HashSet<String>(); 1213 Date date = new Date(); 1214 main: for (String zoneId : sc.getCanonicalTimeZones()) { 1215 TimeZone zone = TimeZone.getTimeZone(zoneId); 1216 for (long time = START_TIME + MIN_DAYLIGHT_PERIOD; time < END_TIME; time += MIN_DAYLIGHT_PERIOD) { 1217 date.setTime(time); 1218 if (zone.inDaylightTime(date)) { 1219 hasDaylightTemp.add(zoneId); 1220 if (false && !zone.useDaylightTime()) { 1221 System.out.println(zoneId + "\tuseDaylightTime()==false, but \tinDaylightTime(/" + date 1222 + "/)==true"); 1223 } 1224 continue main; 1225 } 1226 } 1227 } 1228 HAS_DAYLIGHT = Collections.unmodifiableSet(hasDaylightTemp); 1229 } 1230 1231 static final Set<String> SINGULAR_COUNTRIES; 1232 1233 private static PrintWriter countFile; 1234 static { 1235 // start with certain special-case countries 1236 Set<String> singularCountries = new HashSet<String>( 1237 Arrays.asList("CL EC ES NZ PT AQ FM GL KI UM PF".split(" "))); 1238 1239 Map<String, Set<String>> countryToZoneSet = sc.getCountryToZoneSet(); 1240 1241 main: for (Entry<String, Set<String>> countryZones : countryToZoneSet.entrySet()) { 1242 String country = countryZones.getKey(); 1243 if (country.equals("001")) { 1244 continue; 1245 } 1246 Set<String> zones = countryZones.getValue(); 1247 if (zones.size() == 1) { 1248 singularCountries.add(country); 1249 continue; 1250 } 1251 // make a set of sets 1252 List<TimeZone> initial = new ArrayList<TimeZone>(); 1253 for (String s : zones) { TimeZone.getTimeZone(s)1254 initial.add(TimeZone.getTimeZone(s)); 1255 } 1256 // now cycle through the times and see if we find any differences 1257 for (long time = START_TIME; time < END_TIME; time += DELTA_TIME) { 1258 int firstOffset = Integer.MIN_VALUE; 1259 for (TimeZone zone : initial) { 1260 int offset = zone.getOffset(time); 1261 if (firstOffset == Integer.MIN_VALUE) { 1262 firstOffset = offset; 1263 } else { 1264 if (firstOffset != offset) { 1265 if (false) 1266 System.out.println(country 1267 + " Difference at: " + new Date(time) 1268 + ", " + zone.getDisplayName() + " " + (offset / 1000.0 / 60 / 60) 1269 + ", " + initial.iterator().next().getDisplayName() + " " 1270 + (firstOffset / 1000.0 / 60 / 60)); 1271 continue main; 1272 } 1273 } 1274 } 1275 } 1276 singularCountries.add(country); 1277 } 1278 SINGULAR_COUNTRIES = Collections.unmodifiableSet(singularCountries); 1279 } 1280 1281 static final class MetazoneInfo { 1282 1283 /** 1284 * @param metazoneId 1285 * @param singleCountry 1286 * @param hasDaylight 1287 * @param zonesForCountry 1288 * @param regionToZone 1289 */ MetazoneInfo(String metazoneId, String golden, boolean singleCountry, boolean hasDaylight)1290 public MetazoneInfo(String metazoneId, String golden, boolean singleCountry, boolean hasDaylight) { 1291 this.golden = golden; 1292 this.metazoneId = metazoneId; 1293 this.singleCountry = singleCountry; 1294 this.hasDaylight = hasDaylight; 1295 } 1296 1297 static final String[] GENERIC = { "/long/generic", 1298 // "/short/generic" 1299 }; 1300 static final String[] DAYLIGHT = { "/long/generic", "/long/standard", "/long/daylight", 1301 // "/short/generic", "/short/standard", "/short/daylight" 1302 }; 1303 getTypes()1304 public String[] getTypes() { 1305 return hasDaylight ? DAYLIGHT : GENERIC; 1306 } 1307 1308 private final String metazoneId; 1309 private final String golden; 1310 private final boolean singleCountry; 1311 private final boolean hasDaylight; 1312 1313 static final List<MetazoneInfo> METAZONE_LIST; 1314 static { 1315 // Set<String> zones = supplementalDataInfo.getCanonicalTimeZones(); 1316 ArrayList<MetazoneInfo> result = new ArrayList<MetazoneInfo>(); 1317 1318 Map<String, String> zoneToCountry = sc.getZoneToCounty(); 1319 1320 Map<String, Map<String, String>> metazoneToRegionToZone = supplementalDataInfo.getMetazoneToRegionToZone(); 1321 for (String metazone : supplementalDataInfo.getAllMetazones()) { 1322 Map<String, String> regionToZone = metazoneToRegionToZone.get(metazone); 1323 String golden = regionToZone.get("001"); 1324 if (golden == null) { 1325 throw new IllegalArgumentException("Missing golden zone " + metazone + ", " + regionToZone); 1326 } 1327 String region = zoneToCountry.get(golden); 1328 boolean isSingleCountry = SINGULAR_COUNTRIES.contains(region); 1329 if (isSingleCountry) { 1330 continue; 1331 } 1332 1333 // TimeZone goldenZone = TimeZone.getTimeZone(golden); 1334 1335 Set<SupplementalDataInfo.MetaZoneRange> metazoneRanges = supplementalDataInfo.getMetaZoneRanges(golden); 1336 if (metazoneRanges == null) { 1337 throw new IllegalArgumentException("Missing golden zone " + metazone + ", " + regionToZone); 1338 } 1339 MetazoneInfo item = new MetazoneInfo(metazone, golden, isSingleCountry, HAS_DAYLIGHT.contains(golden)); 1340 result.add(item); 1341 } 1342 METAZONE_LIST = Collections.unmodifiableList(result); 1343 } 1344 toString()1345 public String toString() { 1346 return sc.getZoneToCounty().get(golden) 1347 + "\t" + metazoneId 1348 + "\t" + golden 1349 + "\t" + (singleCountry ? "singleCountry" : "") 1350 + "\t" + (hasDaylight ? "useDaylightTime" : "") 1351 // + ": " + zonesForCountry 1352 // + "\t" + regionToZone; 1353 ; 1354 } 1355 } 1356 showMetazoneInfo()1357 static void showMetazoneInfo() { 1358 System.out.println("\nZones in multiple metazones\n"); 1359 1360 for (String zone : sc.getCanonicalTimeZones()) { 1361 Set<SupplementalDataInfo.MetaZoneRange> metazoneRanges = supplementalDataInfo.getMetaZoneRanges(zone); 1362 if (metazoneRanges == null) { 1363 System.out.println("Zone doesn't have metazone! " + zone); 1364 continue; 1365 } 1366 if (metazoneRanges.size() != 1) { 1367 for (MetaZoneRange range : metazoneRanges) { 1368 System.out.println(zone + ":\t" + range); 1369 } 1370 System.out.println(); 1371 } 1372 } 1373 1374 System.out.println("\nMetazoneInfo\n"); 1375 1376 for (boolean singleCountry : new boolean[] { false }) { 1377 for (boolean hasDaylight : new boolean[] { false, true }) { 1378 for (MetazoneInfo mzone : MetazoneInfo.METAZONE_LIST) { 1379 if (mzone.hasDaylight != hasDaylight) continue; 1380 if (mzone.singleCountry != singleCountry) continue; 1381 System.out.println(mzone); 1382 } 1383 } 1384 } 1385 } 1386 displayWsb(String file, EnglishInfo info)1387 private static void displayWsb(String file, EnglishInfo info) { 1388 try { 1389 String[] parts = file.split("/"); 1390 ULocale locale = new ULocale(parts[parts.length - 2]); 1391 FileInputStream fis = new FileInputStream(file); 1392 XMLReader xmlReader = XMLFileReader.createXMLReader(false); 1393 xmlReader.setErrorHandler(new MyErrorHandler()); 1394 Map<String, String> data = new TreeMap<String, String>(); 1395 xmlReader.setContentHandler(new MyContentHandler(locale, data, info)); 1396 InputSource is = new InputSource(fis); 1397 is.setSystemId(file); 1398 xmlReader.parse(is); 1399 fis.close(); 1400 for (Entry<String, String> entity : data.entrySet()) { 1401 String path = entity.getKey(); 1402 String value = entity.getValue(); 1403 PathInfo pathInfo = info.getPathInfo(path); 1404 System.out.println(value + "\t" + (pathInfo == null ? "?" : pathInfo.englishValue) + "\t" + path); 1405 } 1406 } catch (SAXParseException e) { 1407 System.out.println("\t" + "Can't read " + file); 1408 System.out.println("\t" + e.getClass() + "\t" + e.getMessage()); 1409 } catch (SAXException e) { 1410 System.out.println("\t" + "Can't read " + file); 1411 System.out.println("\t" + e.getClass() + "\t" + e.getMessage()); 1412 } catch (IOException e) { 1413 System.out.println("\t" + "Can't read " + file); 1414 System.out.println("\t" + e.getClass() + "\t" + e.getMessage()); 1415 } 1416 } 1417 1418 static class MyErrorHandler implements ErrorHandler { error(SAXParseException exception)1419 public void error(SAXParseException exception) throws SAXException { 1420 System.out.println("\nerror: " + XMLFileReader.showSAX(exception)); 1421 throw exception; 1422 } 1423 fatalError(SAXParseException exception)1424 public void fatalError(SAXParseException exception) throws SAXException { 1425 System.out.println("\nfatalError: " + XMLFileReader.showSAX(exception)); 1426 throw exception; 1427 } 1428 warning(SAXParseException exception)1429 public void warning(SAXParseException exception) throws SAXException { 1430 System.out.println("\nwarning: " + XMLFileReader.showSAX(exception)); 1431 throw exception; 1432 } 1433 } 1434 1435 static class MyContentHandler implements ContentHandler { 1436 private static final boolean SHOW = false; 1437 private Map<String, String> myData; 1438 private EnglishInfo info; 1439 private PathInfo lastPathInfo; 1440 private StringBuilder currentText = new StringBuilder(); 1441 private long lastId; 1442 private String lastPluralTag; 1443 private Map<String, String> pluralTags = new LinkedHashMap<String, String>(); 1444 private Set<String> pluralKeywords; 1445 MyContentHandler(ULocale locale, Map<String, String> data, EnglishInfo info)1446 public MyContentHandler(ULocale locale, Map<String, String> data, EnglishInfo info) { 1447 myData = data; 1448 this.info = info; 1449 PluralRules rules = PluralRules.forLocale(locale); 1450 pluralKeywords = Builder.with(new HashSet<String>()).addAll(rules.getKeywords()).add("0").add("1").freeze(); 1451 } 1452 1453 @Override characters(char[] arg0, int arg1, int arg2)1454 public void characters(char[] arg0, int arg1, int arg2) throws SAXException { 1455 String chars = String.valueOf(arg0, arg1, arg2); 1456 // if (SHOW) System.out.println("\t characters\t" + chars); 1457 currentText.append(chars); 1458 } 1459 1460 @Override endDocument()1461 public void endDocument() throws SAXException { 1462 if (SHOW) System.out.println("\t endDocument\t"); 1463 } 1464 1465 @Override endElement(String arg0, String arg1, String qName)1466 public void endElement(String arg0, String arg1, String qName) throws SAXException { 1467 // if (SHOW) System.out.println("\t endElement\t" + arg0 + "\t" + arg1 + "\t" + qName); 1468 if (qName.equals("msg")) { 1469 String chars = currentText.toString().replace("\n", "").trim(); 1470 if (lastPathInfo == null) { 1471 System.out.println("***Missing path info for " + lastId + "\t" + chars); 1472 // myData.put("*** Missing path: " + lastId, chars); 1473 } else if (pluralTags.size() != 0) { 1474 for (Entry<String, String> pluralTagEntry : pluralTags.entrySet()) { 1475 String pluralTag = pluralTagEntry.getKey(); 1476 String pluralTagValue = pluralTagEntry.getValue(); 1477 if (pluralKeywords.contains(pluralTag)) { 1478 String fixedCount = lastPathInfo.path.replace("other", pluralTag); 1479 myData.put(fixedCount, pluralTagValue); 1480 } else { 1481 System.out.println("***Skipping " + pluralTag + "\t" + pluralTagValue); 1482 } 1483 } 1484 // myData.put(lastPathInfo.path, pluralTags.toString()); 1485 pluralTags.clear(); 1486 } else { 1487 myData.put(lastPathInfo.path, chars); 1488 } 1489 currentText.setLength(0); 1490 } 1491 } 1492 1493 @Override endPrefixMapping(String arg0)1494 public void endPrefixMapping(String arg0) throws SAXException { 1495 if (SHOW) System.out.println("\t endPrefixMapping\t" + arg0); 1496 } 1497 1498 @Override ignorableWhitespace(char[] arg0, int arg1, int arg2)1499 public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException { 1500 if (SHOW) System.out.println("\t ignorableWhitespace\t" + String.valueOf(arg0, arg1, arg2)); 1501 } 1502 1503 @Override processingInstruction(String arg0, String arg1)1504 public void processingInstruction(String arg0, String arg1) throws SAXException { 1505 if (SHOW) System.out.println("\t processingInstruction\t" + arg0 + "\t" + arg1); 1506 } 1507 1508 @Override setDocumentLocator(Locator arg0)1509 public void setDocumentLocator(Locator arg0) { 1510 if (SHOW) System.out.println("\t setDocumentLocator\t" + arg0); 1511 } 1512 1513 @Override skippedEntity(String arg0)1514 public void skippedEntity(String arg0) throws SAXException { 1515 if (SHOW) System.out.println("\t skippedEntity\t" + arg0); 1516 } 1517 1518 @Override startDocument()1519 public void startDocument() throws SAXException { 1520 if (SHOW) System.out.println("\t startDocument\t"); 1521 } 1522 1523 @Override startElement(String arg0, String arg1, String qName, Attributes arg3)1524 public void startElement(String arg0, String arg1, String qName, Attributes arg3) throws SAXException { 1525 // if (SHOW) System.out.println("\t startElement\t" + arg0 + "\t" + arg1 + "\t" + qName + "\t" + 1526 // showAttributes(arg3)); 1527 if (qName.equals("msg")) { 1528 lastId = Long.parseLong(arg3.getValue("id")); 1529 lastPathInfo = info.getPathInfo(lastId); 1530 currentText.setLength(0); 1531 } else if (qName.equals("ph")) { 1532 String name = arg3.getValue("name"); 1533 String original = lastPathInfo.getPlaceholderReplacementsToOriginal().get(name); 1534 if (original != null) { 1535 currentText.append(original); 1536 } else if (name.startsWith("[PLURAL_")) { 1537 pluralTags.clear(); 1538 lastPluralTag = "[START_PLURAL]"; 1539 } else { 1540 String pluralTag = PLURAL_TAGS.get(name); 1541 if (pluralTag != null) { 1542 String chars = currentText.toString().replace("\n", "").trim(); 1543 pluralTags.put(lastPluralTag, chars); 1544 currentText.setLength(0); 1545 lastPluralTag = pluralTag; 1546 } else { 1547 System.out.println("***Can't find " + name + " in " 1548 + lastPathInfo.getPlaceholderReplacementsToOriginal()); 1549 } 1550 } 1551 } 1552 } 1553 showAttributes(Attributes atts)1554 private String showAttributes(Attributes atts) { 1555 String result = ""; 1556 for (int i = 0; i < atts.getLength(); ++i) { 1557 result += atts.getQName(i) + "=\"" + atts.getValue(i) + "\"\t"; 1558 } 1559 return result; 1560 } 1561 1562 @Override startPrefixMapping(String arg0, String arg1)1563 public void startPrefixMapping(String arg0, String arg1) throws SAXException { 1564 if (SHOW) System.out.println("\t startPrefixMapping\t" + arg0 + "\t" + arg1); 1565 } 1566 } 1567 1568 static final Map<String, String> PLURAL_TAGS = Builder.with(new HashMap<String, String>()) 1569 .put("[=0]", "0") 1570 .put("[=1]", "1") 1571 .put("[ZERO]", PluralRules.KEYWORD_ZERO) 1572 .put("[ONE]", PluralRules.KEYWORD_ONE) 1573 .put("[TWO]", PluralRules.KEYWORD_TWO) 1574 .put("[FEW]", PluralRules.KEYWORD_FEW) 1575 .put("[MANY]", PluralRules.KEYWORD_MANY) 1576 .put("[OTHER]", PluralRules.KEYWORD_OTHER) 1577 .put("[END_PLURAL]", "") 1578 .freeze(); 1579 1580 private static String compareDirectory; 1581 } 1582