1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.io.FileInputStream;
5 import java.io.IOException;
6 import java.io.PrintWriter;
7 import java.io.StringWriter;
8 import java.util.ArrayList;
9 import java.util.Arrays;
10 import java.util.Calendar;
11 import java.util.Collections;
12 import java.util.Date;
13 import java.util.EnumSet;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Iterator;
17 import java.util.LinkedHashMap;
18 import java.util.LinkedHashSet;
19 import java.util.List;
20 import java.util.Locale;
21 import java.util.Map;
22 import java.util.Map.Entry;
23 import java.util.Set;
24 import java.util.TreeMap;
25 import java.util.TreeSet;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28 
29 import org.unicode.cldr.draft.FileUtilities;
30 import org.unicode.cldr.test.CheckExemplars;
31 import org.unicode.cldr.test.CoverageLevel2;
32 import org.unicode.cldr.test.DisplayAndInputProcessor;
33 import org.unicode.cldr.test.QuickCheck;
34 import org.unicode.cldr.tool.Option.Options;
35 import org.unicode.cldr.util.Builder;
36 import org.unicode.cldr.util.CLDRFile;
37 import org.unicode.cldr.util.CLDRPaths;
38 import org.unicode.cldr.util.Factory;
39 import org.unicode.cldr.util.FileCopier;
40 import org.unicode.cldr.util.LanguageTagParser;
41 import org.unicode.cldr.util.Level;
42 import org.unicode.cldr.util.PathDescription;
43 import org.unicode.cldr.util.PatternCache;
44 import org.unicode.cldr.util.PatternPlaceholders;
45 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo;
46 import org.unicode.cldr.util.PrettyPath;
47 import org.unicode.cldr.util.RegexLookup;
48 import org.unicode.cldr.util.RegexLookup.Finder;
49 import org.unicode.cldr.util.RegexUtilities;
50 import org.unicode.cldr.util.StandardCodes;
51 import org.unicode.cldr.util.StringId;
52 import org.unicode.cldr.util.SupplementalDataInfo;
53 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange;
54 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
55 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
56 import org.unicode.cldr.util.TransliteratorUtilities;
57 import org.unicode.cldr.util.With;
58 import org.unicode.cldr.util.XMLFileReader;
59 import org.unicode.cldr.util.XMLSource;
60 import org.unicode.cldr.util.XPathParts;
61 import org.xml.sax.Attributes;
62 import org.xml.sax.ContentHandler;
63 import org.xml.sax.ErrorHandler;
64 import org.xml.sax.InputSource;
65 import org.xml.sax.Locator;
66 import org.xml.sax.SAXException;
67 import org.xml.sax.SAXParseException;
68 import org.xml.sax.XMLReader;
69 
70 import com.google.common.base.Joiner;
71 import com.ibm.icu.impl.Relation;
72 import com.ibm.icu.impl.Row;
73 import com.ibm.icu.impl.Row.R2;
74 import com.ibm.icu.lang.CharSequences;
75 import com.ibm.icu.text.BreakIterator;
76 import com.ibm.icu.text.DateFormat;
77 import com.ibm.icu.text.MessageFormat;
78 import com.ibm.icu.text.PluralRules;
79 import com.ibm.icu.text.SimpleDateFormat;
80 import com.ibm.icu.text.Transform;
81 import com.ibm.icu.text.UnicodeSet;
82 import com.ibm.icu.util.Output;
83 import com.ibm.icu.util.TimeZone;
84 import com.ibm.icu.util.ULocale;
85 
86 public class GenerateXMB {
87     private static final String DEBUG_PATH = "[@type=\"day\"]/unitPattern[@count=\"1\"]";
88 
89     static StandardCodes sc = StandardCodes.make();
90 
91     static final String DATE;
92     static {
93         DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
94         DATE = dateFormat.format(new Date());
95     }
96     static final String stock = "en|ar|de|es|fr|it|ja|ko|nl|pl|ru|th|tr|pt|zh|zh_Hant|bg|ca|cs|da|el|fa|fi|fil|hi|hr|hu|id|lt|lv|ro|sk|sl|sr|sv|uk|vi|he|nb|et|ms|am|bn|gu|is|kn|ml|mr|sw|ta|te|ur|eu|gl|af|zu|en_GB|es_419|pt_PT|fr_CA|zh_Hant_HK";
97     private static final HashSet<String> REGION_LOCALES = new HashSet<>(Arrays.asList(stock.split("\\|")));
98 
99     final static Options myOptions = new Options("In normal usage, you set the -t option for the target.")
100         .add("target", ".*", CLDRPaths.TMP_DIRECTORY + "dropbox/xmb/",
101             "The target directory for building. Will generate an English .xmb file, and .wsb files for other languages.")
102         .add(
103             "file",
104             ".*",
105             stock,
106             "Filter the information based on file name, using a regex argument. The '.xml' is removed from the file before filtering")
107         // "^(sl|fr)$",
108         .add("path", ".*", "Filter the information based on path name, using a regex argument")
109         // "dates.*(pattern|available)",
110         .add("content", ".*", "Filter the information based on content name, using a regex argument")
111         .add("jason", ".*", "Generate JSON versions instead")
112         .add("zone", null, "Show metazoneinfo and exit")
113         .add("wsb", ".*", "Show metazoneinfo and exit")
114         .add("kompare", ".*", CLDRPaths.BASE_DIRECTORY + "../DATA/cldr/common/google-bulk-imports",
115             "Compare data with directory; generate files in -target.")
116         .add("project_name", 'n', ".*", "CLDR", "The ID of the project.");
117 
118     static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance();
119     // static Matcher contentMatcher;
120     static Matcher pathMatcher;
121     static RegexLookup<String> pathFindRemover = new RegexLookup<String>().loadFromFile(GenerateXMB.class,
122         "xmbSkip.txt"); // .compile("//ldml/dates/calendars/calendar\\[@type=\"(?!gregorian).*").matcher("");
123     static PrettyPath prettyPath = new PrettyPath();
124     static int errors = 0;
125     static Relation<String, String> path2errors = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
126 
127     // enum Handling {SKIP};
128     static final Matcher datePatternMatcher = PatternCache.get("dates.*(pattern|available)").matcher("");
129 
130     public static final boolean DEBUG = false;
131 
132     private static final HashSet<String> SKIP_LOCALES = new HashSet<>(
133         Arrays.asList(new String[] { "en", "root" }));
134 
135     public static String DTD_VERSION;
136 
137     private static String projectId;
138 
139     enum PlaceholderType {
140         BRACES, // e.g. {NAME}
141         XML, // e.g. <ph name='NAME' />
142         XML_EXAMPLE // e.g. <ph name='NAME' /><ex>EXAMPLE</ex>{0}</ph>
143     }
144 
main(String[] args)145     public static void main(String[] args) throws Exception {
146         myOptions.parse(args, true);
147         Option option;
148         option = myOptions.get("zone");
149         if (option.doesOccur()) {
150             showMetazoneInfo();
151             return;
152         }
153         option = myOptions.get("file");
154         String fileMatcherString = option.getValue();
155         option = myOptions.get("content");
156         Matcher contentMatcher = option.doesOccur() ? PatternCache.get(option.getValue()).matcher("") : null;
157         option = myOptions.get("path");
158         pathMatcher = option.doesOccur() ? PatternCache.get(option.getValue()).matcher("") : null;
159 
160         String targetDir = myOptions.get("target").getValue();
161         countFile = FileUtilities.openUTF8Writer(targetDir + "/log/", "counts.txt");
162 
163         Factory cldrFactory1 = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
164         CLDRFile english = cldrFactory1.make("en", true);
165         CLDRFile englishTop = cldrFactory1.make("en", false);
166         DTD_VERSION = englishTop.getDtdVersion();
167 
168         CLDRFile root = cldrFactory1.make("en", true);
169 
170         showDefaultContents(targetDir, english);
171         EnglishInfo englishInfo = new EnglishInfo(targetDir, english, root);
172 
173         option = myOptions.get("kompare");
174         if (option.doesOccur()) {
175             compareDirectory = option.getValue();
176             compareFiles(fileMatcherString, contentMatcher, targetDir, cldrFactory1, english, englishInfo);
177             return;
178         }
179 
180         if (myOptions.get("wsb").doesOccur()) {
181             displayWsb(myOptions.get("wsb").getValue(), englishInfo);
182             return;
183         }
184 
185         projectId = myOptions.get("project_name").getValue();
186 
187         writeFile(targetDir, "en", englishInfo, english, true, false);
188         writeFile(targetDir + "/filtered/", "en", englishInfo, english, true, true);
189 
190         // TODO:
191         // Replace {0}... with placeholders (Mostly done, but need better examples)
192         // Replace datetime fields (MMM, L, ...) with placeholders
193         // Skip items that we don't need translated (most language names, script names, deprecated region names, etc.
194         // Add descriptions
195         // Add pages with detailed descriptions, and links from the descriptions
196         // Represent the items with count= as ICUSyntax
197         // Filter items that we don't want to get translated, and add others that we need even if not in English
198         // Rewire items that are in undistinguished attributes
199         // Test each xml file for validity
200         // Generate strings that let the user choose the placeholder style hh vs HH,...???
201 
202         Factory cldrFactory2 = Factory.make(CLDRPaths.MAIN_DIRECTORY, fileMatcherString);
203         LanguageTagParser ltp = new LanguageTagParser();
204 
205         for (String file : cldrFactory2.getAvailable()) {
206             if (SKIP_LOCALES.contains(file)) {
207                 continue;
208             }
209 
210             // skip all locales with regions (with certain exceptions)
211             if (ltp.set(file).getRegion().length() != 0) {
212                 if (!REGION_LOCALES.contains(file)) {
213                     continue;
214                 }
215             }
216 
217             // skip anything without plural rules
218             final PluralInfo plurals = supplementalDataInfo.getPlurals(file, false);
219             if (plurals == null) {
220                 System.out.println("Skipping " + file + ", no plural rules");
221                 continue;
222             }
223 
224             CLDRFile cldrFile = cldrFactory2.make(file, true);
225             writeFile(targetDir + "/wsb/", file, englishInfo, cldrFile, false, false);
226             writeFile(targetDir + "/wsb/filtered/", file, englishInfo, cldrFile, false, true);
227             countFile.flush();
228         }
229         countFile.close();
230         PrintWriter errorFile = FileUtilities.openUTF8Writer(targetDir + "/log/", "errors.txt");
231         for (Entry<String, Set<String>> entry : path2errors.keyValuesSet()) {
232             errorFile.println(entry);
233         }
234         errorFile.close();
235         System.out.println("Errors: " + (errors + path2errors.size()));
236     }
237 
compareFiles(String fileMatcherString, Matcher contentMatcher, String targetDir, Factory cldrFactory1, CLDRFile english, EnglishInfo englishInfo)238     private static void compareFiles(String fileMatcherString, Matcher contentMatcher, String targetDir,
239         Factory cldrFactory1, CLDRFile english,
240         EnglishInfo englishInfo) throws IOException {
241         SubmittedPathFixer fixer = new SubmittedPathFixer();
242         Factory cldrFactory2 = Factory.make(compareDirectory, fileMatcherString);
243         PrintWriter output = null;
244         PrintWriter log = FileUtilities.openUTF8Writer(targetDir + "/log/", "skipped.txt");
245 
246         for (String file : cldrFactory2.getAvailable()) {
247             // System.out.println("Checking " + file);
248             CLDRFile submitted = cldrFactory2.make(file, false);
249             CLDRFile trunk = cldrFactory1.make(file, true);
250             for (String path : With.in(submitted.iterator(null, submitted.getComparator()))) {
251                 if (pathMatcher != null && !pathMatcher.reset(path).matches()) {
252                     continue;
253                 }
254                 String submittedValue = submitted.getStringValue(path);
255                 if (contentMatcher != null && !contentMatcher.reset(submittedValue).matches()) {
256                     continue;
257                 }
258                 PathStatus pathStatus = shouldSkipPath(path, submittedValue);
259                 if (pathStatus == PathStatus.SKIP) {
260                     continue;
261                 }
262 
263                 // fix alt
264                 String trunkPath = fixer.fix(path, false);
265                 String trunkValue = trunk.getStringValue(trunkPath);
266                 if (CharSequences.equals(submittedValue, trunkValue)) {
267                     continue;
268                 }
269                 if (output == null) {
270                     output = FileUtilities.openUTF8Writer(targetDir, file + ".txt");
271                     output.println("ID\tEnglish\tSource\tRelease\tDescription");
272                 }
273                 String englishValue = english.getStringValue(trunkPath);
274                 final PathInfo pathInfo = englishInfo.getPathInfo(trunkPath);
275                 String description;
276                 if (pathInfo == null) {
277                     log.println(file + "\tDescription unavailable for " + trunkPath);
278                     errors++;
279                     String temp = fixer.fix(path, true);
280                     englishInfo.getPathInfo(trunkPath);
281                     continue;
282                 } else {
283                     description = pathInfo.getDescription();
284                 }
285                 long id = StringId.getId(trunkPath);
286                 if (englishValue == null) {
287                     log.println(file + "\tEmpty English for " + trunkPath);
288                     errors++;
289                     continue;
290                 }
291                 output.println(id + "\t" + ssquote(englishValue, false) + "\t" + ssquote(submittedValue, false) + "\t"
292                     + ssquote(trunkValue, true) + "\t" + description);
293             }
294             if (output != null) {
295                 output.close();
296                 output = null;
297             }
298             log.flush();
299         }
300         log.close();
301     }
302 
303     static Output<String[]> matches = new Output<>();
304     static List<String> failures = new ArrayList<>();
305     static Output<Finder> matcherFound = new Output<>();
306 
307     enum PathStatus {
308         SKIP, KEEP, MAYBE
309     }
310 
shouldSkipPath(String path, String value)311     public static PathStatus shouldSkipPath(String path, String value) {
312         // skip if
313         List<String> myFailures = null;
314         if (false && path.contains("currencies") && path.contains("symbol")) {
315             myFailures = failures;
316         }
317         String skipPath = pathFindRemover.get(path, null, matches, matcherFound, myFailures);
318         if (myFailures != null && failures.size() != 0) {
319             System.out.println("Failures\n\t" + Joiner.on("\n\t").join(failures));
320             failures.clear();
321         }
322         if (skipPath == null || skipPath.equals("MAYBE")) {
323             return PathStatus.MAYBE;
324         } else if (skipPath.equals("VALUE")) {
325             return value.equals(matches.value[1]) ? PathStatus.SKIP : PathStatus.MAYBE;
326         } else if (skipPath.equals("SKIP")) {
327             return PathStatus.SKIP;
328         } else if (skipPath.equals("KEEP")) {
329             return PathStatus.KEEP;
330         }
331         throw new IllegalArgumentException("Unexpected xmbSkip.txt value: " + skipPath);
332     }
333 
ssquote(String englishValue, boolean showRemoved)334     private static String ssquote(String englishValue, boolean showRemoved) {
335         if (englishValue == null) {
336             return showRemoved ? "[removed]" : "[empty]";
337         }
338         englishValue = englishValue.replace("\"", "&quot;");
339         return englishValue;
340     }
341 
342     static class SubmittedPathFixer {
343         private static final Pattern PATH_FIX = PatternCache.get("\\[@alt=\"" +
344             "(?:proposed|((?!proposed)[-a-zA-Z0-9]*)-proposed)" +
345             "-u\\d+-implicit[0-9.]+" +
346             "(?:-proposed-u\\d+-implicit[0-9.]+)?" + // NOTE: we allow duplicated alt values because of a generation
347             // bug.
348             // -proposed-u971-implicit2.0
349             "\"]");
350         static Matcher pathFix = PATH_FIX.matcher("");
351 
fix(String path, boolean debug)352         public String fix(String path, boolean debug) {
353             if (pathFix.reset(path).find()) {
354                 if (debug) {
355                     // debug in case we get a mismatch
356                     String temp = "REGEX:\t" +
357                         RegexUtilities.showMismatch(PATH_FIX, path.substring(pathFix.start(0)));
358                 }
359                 final String group = pathFix.group(1);
360                 String replacement = group == null ? "" : "[@alt=\"" + group + "\"]";
361                 String trunkPath = path.substring(0, pathFix.start(0)) + replacement + path.substring(pathFix.end(0));
362                 // HACK because of change in CLDR defaults
363                 if (trunkPath.startsWith("//ldml/numbers/symbols/")) {
364                     trunkPath = "//ldml/numbers/symbols[@numberSystem=\"latn\"]/"
365                         + trunkPath.substring("//ldml/numbers/symbols/".length());
366                 }
367                 return trunkPath;
368             }
369             return path;
370         }
371 
372     }
373 
showDefaultContents(String targetDir, CLDRFile english)374     private static void showDefaultContents(String targetDir, CLDRFile english) throws IOException {
375         PrintWriter out = FileUtilities.openUTF8Writer(targetDir + "/log/", "locales.txt");
376         String[] locales = stock.split("\\|");
377         Set<R2<String, String>> sorted = new TreeSet<>();
378         for (String locale : locales) {
379             if (locale.isEmpty()) continue;
380             String name = english.getName(locale);
381             R2<String, String> row = Row.of(name, locale);
382             sorted.add(row);
383         }
384         Set<String> defaultContents = supplementalDataInfo.getDefaultContentLocales();
385 
386         for (R2<String, String> row : sorted) {
387             String locale = row.get1();
388             String dlocale = getDefaultContentLocale(locale, defaultContents);
389             out.println(row.get0() + "\t" + locale + "\t" + english.getName(dlocale) + "\t" + dlocale);
390         }
391         out.close();
392     }
393 
getDefaultContentLocale(String locale, Set<String> defaultContents)394     private static String getDefaultContentLocale(String locale, Set<String> defaultContents) {
395         String best = null;
396         for (String s : defaultContents) {
397             if (s.startsWith(locale)) {
398                 if (best == null) {
399                     best = s;
400                 } else if (s.length() < best.length()) {
401                     best = s;
402                 }
403             }
404         }
405         if (best == null) {
406             return locale;
407         }
408         return best;
409     }
410 
411     static final Pattern COUNT_OR_ALT_ATTRIBUTE = PatternCache.get("\\[@(count)=\"([^\"]*)\"]");
412     static final Pattern PLURAL_XPATH = Pattern
413         .compile("//ldml/(units/unit|numbers/(decimal|currency)Formats).*\\[@count=\"\\w+\"].*");
414     static final Pattern SKIP_EXEMPLAR_TEST = PatternCache.get(
415         "/(currencySpacing"
416             + "|hourFormat"
417             + "|exemplarCharacters"
418             + "|pattern"
419             + "|localizedPatternChars"
420             + "|segmentations"
421             + "|dateFormatItem"
422             + "|references"
423             + "|unitPattern"
424             + "|intervalFormatItem"
425             + "|localeDisplayNames/variants/"
426             + "|commonlyUsed"
427             + "|currency.*/symbol"
428             + "|symbols/(exponential|nan))");
429 
430     static final Matcher skipExemplarTest = SKIP_EXEMPLAR_TEST.matcher("");
431     static final UnicodeSet ASCII_LATIN = new UnicodeSet("[A-Za-z]").freeze();
432     static final UnicodeSet LATIN = new UnicodeSet("[:sc=Latn:]").freeze();
433 
434     static final Matcher keepFromRoot = PatternCache.get("/(exemplarCity|currencies/currency.*/symbol)").matcher("");
435     static final Matcher currencyDisplayName = Pattern
436         .compile("/currencies/currency\\[@type=\"([^\"]*)\"]/displayName").matcher("");
437 
writeFile(String targetDir, String localeId, EnglishInfo englishInfo, CLDRFile cldrFile, boolean isEnglish, boolean filter)438     private static void writeFile(String targetDir, String localeId, EnglishInfo englishInfo, CLDRFile cldrFile,
439         boolean isEnglish, boolean filter) throws IOException {
440 
441         String extension = "xml";
442         Relation<String, String> reasonsToPaths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
443         Set<String> seenStarred = new HashSet<>();
444 
445         Relation<String, Row.R2<PathInfo, String>> countItems = Relation.of(
446             new TreeMap<String, Set<Row.R2<PathInfo, String>>>(), TreeSet.class);
447         Matcher countMatcher = COUNT_OR_ALT_ATTRIBUTE.matcher("");
448         int lineCount = 0;
449         int wordCount = 0;
450         int messageCount = 0;
451 
452         StringWriter buffer = new StringWriter();
453         PrintWriter out1 = new PrintWriter(buffer);
454         StringWriter buffer3 = new StringWriter();
455         PrintWriter out3 = new PrintWriter(buffer3);
456         UnicodeSet exemplars = getExemplars(cldrFile);
457 
458         for (PathInfo pathInfo : englishInfo) {
459             if (false && pathInfo.id == 46139888945574604L) { // for debugging
460                 System.out.println("?");
461             }
462             String path = pathInfo.getPath();
463             String value;
464             if (isEnglish) {
465                 value = pathInfo.englishValue;
466             } else {
467                 value = cldrFile.getStringValue(path);
468             }
469             // Remove quotes from number formats (we'll put them back in during
470             // post-processing).
471             // TODO: we should actually call daip.processForDisplay() here, but
472             // it does more stuff than we need it to do, e.g. stripping the
473             // brackets from exemplarCharacters.
474             if (DisplayAndInputProcessor.NUMBER_FORMAT_XPATH.matcher(path).matches()) {
475                 value = value.replace("'", "");
476             }
477 
478             // skip root if not English
479             if (!isEnglish && value != null && !keepFromRoot.reset(path).find()) { // note that mismatched script will
480                 // be checked later
481                 String locale = cldrFile.getSourceLocaleID(path, null);
482                 if (locale.equals("root")) {
483                     reasonsToPaths.put("root", path + "\t" + value);
484                     continue;
485                 }
486                 if (locale.equals(XMLSource.CODE_FALLBACK_ID)) {
487                     reasonsToPaths.put("codeFallback", path + "\t" + value);
488                     continue;
489                 }
490             }
491             boolean hasPlurals = PLURAL_XPATH.matcher(path).matches();
492             if (filter && !hasPlurals) {
493                 String starred = pathInfo.getStarredPath();
494                 if (seenStarred.contains(starred)) {
495                     continue;
496                 }
497                 seenStarred.add(starred);
498             }
499             if (value == null) {
500                 reasonsToPaths.put("missing", path + "	" + value);
501                 continue;
502             }
503             if (!isEnglish) {
504                 String fullPath = cldrFile.getFullXPath(path);
505                 if (fullPath.contains("draft")) {
506                     XPathParts xpathParts = XPathParts.getFrozenInstance(fullPath);
507                     String draftValue = xpathParts.getAttributeValue(-1, "draft");
508                     if (!draftValue.equals("contributed")) {
509                         reasonsToPaths.put(draftValue, path + "\t" + value);
510                         continue;
511                     }
512                 }
513             }
514             if (!isEnglish
515                 && !exemplars.containsAll(value)
516                 && !skipExemplarTest.reset(path).find()) {
517                 // check for special cases in currency names. If the code itself occurs in the name, that's ok
518                 // ldml/numbers/currencies/currency[@type="XXX"]/displayName
519                 boolean bad = true;
520                 if (currencyDisplayName.reset(path).find()) {
521                     String code = currencyDisplayName.group(1);
522                     String value2 = value.replace(code, "");
523                     bad = !exemplars.containsAll(value2);
524                 }
525                 if (bad) {
526                     UnicodeSet diff = new UnicodeSet().addAll(value).removeAll(exemplars);
527                     reasonsToPaths.put("exemplars", path + "\t" + value + "\t" + diff);
528                     continue;
529                 }
530             }
531             // String fullPath = cldrFile.getStringValue(path);
532             // //ldml/units/unit[@type="day"]/unitPattern[@count="one"]
533             if (hasPlurals) {
534                 countMatcher.reset(path).find();
535                 String countLessPath = countMatcher.replaceAll("");
536                 countItems.put(countLessPath, Row.of(pathInfo, value));
537                 continue;
538             }
539             if (!isEnglish && pathInfo.changedEnglish) {
540                 reasonsToPaths.put("changed-english", path);
541             } else {
542                 writePathInfo(out1, pathInfo, value, isEnglish);
543                 messageCount++;
544             }
545             if (isEnglish) {
546                 writeJavaInfo(out3, pathInfo.getStringId(), pathInfo.getPath(), value);
547             }
548             wordCount += pathInfo.wordCount;
549             ++lineCount;
550         }
551         R2<Integer, Integer> lineWordCount = writeCountPathInfo(out1, out3, cldrFile.getLocaleID(), countItems,
552             isEnglish, filter);
553         messageCount += lineWordCount.get0();
554         lineCount += lineWordCount.get0();
555         wordCount += lineWordCount.get1();
556         if (!filter && countItems.size() != lineWordCount.get0().intValue()) {
557             System.out.println(localeId + "\t" + countItems.size() + "\t" + lineWordCount.get0().intValue());
558         }
559         out1.flush();
560         out3.flush();
561 
562         String file = LanguageCodeConverter.toGoogleLocaleId(localeId);
563         String localeName = englishInfo.getName(localeId);
564         PrintWriter out = FileUtilities.openUTF8Writer(targetDir, file + "." + extension);
565 
566         if (isEnglish) {
567             FileCopier.copy(GenerateXMB.class, "xmb-dtd.xml", out);
568 //            FileUtilities.appendFile(GenerateXMB.class, "xmb-dtd.xml", out);
569             out.println("<!-- " + localeName + " -->");
570             out.println("<messagebundle class='" + projectId + "'> <!-- version: " + DTD_VERSION + ", date: " + DATE
571                 + " -->");
572             out.println(buffer.toString());
573             out.println("</messagebundle>");
574 
575             PrintWriter out3File = FileUtilities.openUTF8Writer(targetDir, "IdToPath.java");
576             out3File.println("package org.unicode.cldr.tool;");
577             out3File.println();
578             out3File.println("import java.util.HashMap;");
579             out3File.println();
580             out3File.println("/**");
581             out3File.println(" * Autogenerated by GenerateXMB for use by ConvertXTB.");
582             out3File.println(" * Do not manually edit this file.");
583             out3File.println(" */");
584             out3File.println("public class IdToPath {");
585             out3File.println("  static final HashMap<String,String> map = new HashMap<String,String>();");
586             out3File.println("  public static String getPath(String id) {");
587             out3File.println("      return map.get(id);");
588             out3File.println("  }");
589             out3File.println("  static {");
590             out3File.println("      String[][] data = {");
591             out3File.println(buffer3);
592             out3File.println("      };");
593             out3File.println("      for (String[] pair : data) {");
594             out3File.println("          map.put(pair[0], pair[1]);");
595             out3File.println("      }");
596             out3File.println("  }");
597             out3File.println("}");
598             out3File.close();
599         } else {
600 
601 //            FileUtilities.appendFile(GenerateXMB.class, "wsb-dtd.xml", out);
602             FileCopier.copy(GenerateXMB.class, "wsb-dtd.xml", out);
603             out.println("<!-- " + localeName + " -->");
604             out.println("<worldserverbundles lazarus_id='dummy' date='" + DATE + "'> <!-- version: " + DTD_VERSION
605                 + " -->");
606             out.println("  <worldserverbundle project_id='" + projectId + "' message_count='" + messageCount + "'>");
607             out.println(buffer.toString());
608             out.println("  </worldserverbundle>");
609             out.println("</worldserverbundles>");
610         }
611         out.close();
612         QuickCheck.check(new File(targetDir, file + "." + extension));
613         if (!filter) {
614             countFile.println(file + "\t" + lineCount + "\t" + wordCount);
615         }
616         if (!isEnglish && !filter) {
617             writeReasons(reasonsToPaths, targetDir, file);
618         }
619     }
620 
writeJavaInfo(PrintWriter out3, String id, String path, String value)621     private static void writeJavaInfo(PrintWriter out3, String id, String path, String value) {
622         out3.println("              {\"" + id + "\",\"" + path.replace("\"", "\\\"") + "\",\""
623             + value.replace("\\", "\\\\").replace("\"", "\\\"") + "\"},");
624     }
625 
getExemplars(CLDRFile cldrFile)626     private static UnicodeSet getExemplars(CLDRFile cldrFile) {
627         UnicodeSet exemplars = cldrFile.getExemplarSet("", CLDRFile.WinningChoice.WINNING);
628         boolean isLatin = exemplars.containsSome(ASCII_LATIN);
629         exemplars.addAll(CheckExemplars.AlwaysOK);
630         UnicodeSet auxExemplars = cldrFile.getExemplarSet("auxiliary", CLDRFile.WinningChoice.WINNING);
631         if (auxExemplars != null) {
632             exemplars.addAll(auxExemplars);
633         }
634         if (!isLatin) {
635             exemplars.removeAll(LATIN);
636         }
637         exemplars.freeze();
638         return exemplars;
639     }
640 
641     static final Pattern COUNT_ATTRIBUTE = PatternCache.get("\\[@count=\"([^\"]*)\"]");
642     static final Pattern PLURAL_NUMBER = PatternCache.get("(decimal|number)Format");
643 
writeCountPathInfo(PrintWriter out, PrintWriter out3, String locale, Relation<String, R2<PathInfo, String>> countItems, boolean isEnglish, boolean filter)644     private static Row.R2<Integer, Integer> writeCountPathInfo(PrintWriter out, PrintWriter out3, String locale,
645         Relation<String, R2<PathInfo, String>> countItems, boolean isEnglish, boolean filter) {
646         Matcher m = COUNT_ATTRIBUTE.matcher("");
647         int wordCount = 0;
648         PluralInfo pluralInfo = supplementalDataInfo.getPlurals(locale);
649         int lineCount = 0;
650         Set<String> errorSet = new LinkedHashSet<>();
651         for (Entry<String, Set<R2<PathInfo, String>>> entry : countItems.keyValuesSet()) {
652             String countLessPath = entry.getKey();
653             Map<String, String> fullValues = new TreeMap<>();
654             PathInfo pathInfo = null;
655             String value = null;
656             for (R2<PathInfo, String> entry2 : entry.getValue()) {
657                 PathInfo pathInfoN = entry2.get0();
658                 m.reset(pathInfoN.getPath()).find();
659                 String count = m.group(1);
660                 if (count.equals("other")) {
661                     pathInfo = pathInfoN;
662                 }
663                 value = entry2.get1();
664                 fullValues.put(count, value);
665             }
666             if (pathInfo == null) {
667                 continue;
668             }
669             if (fullValues.size() < 2) {
670                 // if we don't have two count values, skip
671                 System.out.println(locale + "\tMust have 2 count values: " + entry.getKey());
672                 continue;
673             }
674             String fullPlurals = showPlurals(fullValues, locale, pathInfo, pluralInfo, isEnglish, errorSet);
675             if (fullPlurals == null) {
676                 System.out.println(locale + "\tCan't format plurals for: " + entry.getKey() + "\t" + errorSet);
677                 errors++;
678                 continue;
679             }
680 
681             out.println();
682             out.println("    <!--    "
683                 // + prettyPath.getPrettyPath(pathInfo.getPath(), false) + " ;    "
684                 + countLessPath + "    -->");
685             out.println("    <msg id='" + pathInfo.getStringId() + "' desc='" + pathInfo.description + "'");
686             out.println("     >" + fullPlurals + "</msg>");
687             // Use the last plural value in the loop because we only need it for example purposes.
688             writeJavaInfo(out3, pathInfo.getStringId(), countLessPath, value);
689             // if (!isEnglish || pathInfo.placeholderReplacements != null) {
690             // out.println("\t<!-- English original:\t" + pathInfo.getEnglishValue() + "\t-->");
691             // }
692             out.flush();
693             ++lineCount;
694             wordCount += pathInfo.wordCount * 3;
695             if (filter) {
696                 break;
697             }
698         }
699         return Row.of(lineCount, wordCount);
700     }
701 
702     static final String[] PLURAL_KEYS = { "=0", "=1", "zero", "one", "two", "few", "many", "other" };
703     static final String[] EXTRA_PLURAL_KEYS = { "0", "1", "zero", "one", "two", "few", "many" };
704 
showPlurals(Map<String, String> values, String locale, PathInfo pathInfo, PluralInfo pluralInfo, boolean isEnglish, Set<String> errorSet)705     private static String showPlurals(Map<String, String> values,
706         String locale, PathInfo pathInfo, PluralInfo pluralInfo,
707         boolean isEnglish, Set<String> errorSet) {
708         errorSet.clear();
709         /*
710          * Desired output for English XMB
711          * <msg desc=
712          * "[ICU Syntax] Plural forms for a number of hours. These are special messages: before translating, see cldr.org/translation/plurals."
713          * >
714          * {LENGTH, select,
715          * abbreviated {
716          * {NUMBER_OF_HOURS, plural,
717          * =0 {0 hrs}
718          * =1 {1 hr}
719          * zero {# hrs}
720          * one {# hrs}
721          * two {# hrs}
722          * few {# hrs}
723          * many {# hrs}
724          * other {# hrs}}}
725          * full {
726          * {NUMBER_OF_HOURS, plural,
727          * =0 {0 hours}
728          * =1 {1 hour}
729          * zero {# hours}
730          * one {# hours}
731          * two {# hours}
732          * few {# hours}
733          * many {# hours}
734          * other {# hours}}}}
735          * </msg>
736          *
737          * NOTE: For the WSB, the format has to match the following, WITHOUT LFs
738          *
739          * <msg id='1431840205484292448' desc='[ICU Syntax] who is viewing?​ This message requires special attention.
740          * Please follow the instructions here:
741          * https://sites.google.com/a/google.com/localization-info-site/Home/training/icusyntax'>
742          * <ph name='[PLURAL_NUM_USERS_OFFSET_1]' ex='Special placeholder used in [ICU Syntax] messages, see
743          * instructions page.'/>
744          * <ph name='[​=0]'/>No one else is viewing.
745          * <ph name='[=1]'/><ph name='USERNAME' ex='Bob'/> is viewing.
746          * <ph name='[=2]'/><ph name='USERNAME' ex='Bob'/> and one other are viewing.
747          * <ph name='[ZERO]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
748          * <ph name='[ONE]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
749          * <ph name='[TWO]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
750          * <ph name='[FEW]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
751          * <ph name='[MANY]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
752          * <ph name='[OTHER]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
753          * <ph name='[END_PLURAL]'/>
754          * </msg>
755          */
756         Matcher matcher = PLURAL_NUMBER.matcher(pathInfo.getPath());
757         String var = null;
758         if (matcher.find()) {
759             // Plural doesn't use placeholders so create a label.
760             var = matcher.group(1).toUpperCase() + "_NUMBER";
761         } else {
762             var = pathInfo.getFirstVariable();
763         }
764 
765         StringBuilder result = new StringBuilder();
766         if (isEnglish) {
767             result.append('{')
768                 // .append("PLURAL_")
769                 .append(var).append(",plural,");
770         } else {
771             result.append("<ph name='[PLURAL_").append(var).append("]'/>"); // ex='Special placeholder used in [ICU
772             // Syntax] messages, see instructions page.'
773         }
774         for (String key : PLURAL_KEYS) {
775             String value;
776             String coreKey = key.startsWith("=") ? key.substring(1, 2) : key;
777             value = values.get(coreKey);
778             if (value == null) {
779                 if (key.startsWith("=")) {
780                     String stringCount = key.substring(1);
781                     // handle both =x case, and the category
782                     int intCount = Integer.parseInt(stringCount);
783                     Count count = pluralInfo.getCount(intCount);
784                     value = values.get(count.toString());
785                     if (value == null) {
786                         errorSet.add("Bad key/value " + key + "='" + value + "' in " + values);
787                         return null;
788                     }
789                     value = value.replace("{0}", stringCount);
790                 } else {
791                     value = values.get("other");
792                     if (value == null) {
793                         errorSet.add("No 'other' value in " + values);
794                         return null;
795                     }
796                 }
797             }
798             String newValue = MessageFormat.format(MessageFormat.autoQuoteApostrophe(value),
799                 new Object[] { key.startsWith("=") ? key.substring(1, 2) : "#" });
800             PlaceholderType type = isEnglish ? PlaceholderType.BRACES : PlaceholderType.XML;
801             newValue = pathInfo.transformValue(newValue, type);
802             if (isEnglish) {
803                 result.append("\n            ").append(key).append(" {").append(newValue).append('}');
804             } else {
805                 String prefix = key.toUpperCase(Locale.ENGLISH);
806                 result.append("<!--\n        --><ph name='[").append(prefix).append("]'/>").append(newValue);
807             }
808         }
809         if (isEnglish) {
810             result.append('}');
811         } else {
812             result.append("<!--\n        --><ph name='[END_PLURAL]'/>");
813         }
814         return result.toString();
815     }
816 
writePathInfo(PrintWriter out, PathInfo pathInfo, String value, boolean isEnglish)817     private static void writePathInfo(PrintWriter out, PathInfo pathInfo, String value, boolean isEnglish) {
818         out.println();
819         out.println("    <!--    " + pathInfo.getPath() + "    -->");
820         out.println("    <msg id='" + pathInfo.getStringId() + "' desc='" + pathInfo.description + "'");
821         PlaceholderType type = isEnglish ? PlaceholderType.XML_EXAMPLE : PlaceholderType.XML;
822         String transformValue = pathInfo.transformValue(value, type);
823         out.println("     >" + transformValue + "</msg>");
824         value = TransliteratorUtilities.toHTML.transform(value);
825         if (!value.equals(transformValue) && (!isEnglish || pathInfo.placeholders != null)) {
826             out.println("    <!-- English original:    " + value + "    -->");
827         }
828         out.flush();
829     }
830 
writeReasons(Relation<String, String> reasonsToPaths, String targetDir, String filename)831     private static void writeReasons(Relation<String, String> reasonsToPaths, String targetDir, String filename)
832         throws IOException {
833         targetDir += "/skipped/";
834         filename += ".txt";
835         PrintWriter out = FileUtilities.openUTF8Writer(targetDir, filename);
836         out.println("# " + DATE);
837         for (Entry<String, Set<String>> reasonToSet : reasonsToPaths.keyValuesSet()) {
838             for (String path : reasonToSet.getValue()) {
839                 out.println(reasonToSet.getKey() + "    " + path);
840             }
841         }
842         out.close();
843     }
844 
845     static class PathInfo implements Comparable<PathInfo> {
846         private static final Pattern PLACEHOLDER = PatternCache.get("\\{(\\d)}");
847 
848         private final String path;
849         private final Long id;
850         private final String stringId;
851         private final String englishValue;
852         private final boolean changedEnglish;
853         private final Map<String, PlaceholderInfo> placeholders;
854         private final String description;
855         private final String starredPath;
856         private final int wordCount;
857 
858         private static final BreakIterator bi = BreakIterator.getWordInstance(ULocale.ENGLISH);
859         private static final UnicodeSet ALPHABETIC = new UnicodeSet("[:Alphabetic:]");
860 
PathInfo(String path, String englishValue, boolean changedEnglish, Map<String, PlaceholderInfo> placeholders, String description, String starredPath)861         public PathInfo(String path, String englishValue, boolean changedEnglish,
862             Map<String, PlaceholderInfo> placeholders,
863             String description, String starredPath) {
864             if (DEBUG_PATH != null && path.contains(DEBUG_PATH)) {
865                 int x = 0;
866             }
867             if (description == null) {
868                 path2errors.put(path, "missing description");
869             }
870             this.path = path;
871             long id = StringId.getId(path);
872             this.id = id;
873             stringId = String.valueOf(id);
874             this.englishValue = englishValue;
875             this.changedEnglish = changedEnglish;
876             this.placeholders = placeholders;
877             this.description = description == null ? null : description.intern();
878             this.starredPath = starredPath;
879             // count words
880             int tempCount = 0;
881             bi.setText(englishValue);
882             int start = bi.first();
883             for (int end = bi.next(); end != BreakIterator.DONE; start = end, end = bi.next()) {
884                 String word = englishValue.substring(start, end);
885                 if (ALPHABETIC.containsSome(word)) {
886                     ++tempCount;
887                 }
888             }
889             wordCount = tempCount == 0 ? 1 : tempCount;
890         }
891 
getFirstVariable()892         public String getFirstVariable() {
893             // ... name='FIRST_PART_OF_TEXT' ...
894             PlaceholderInfo info = placeholders.get("{0}");
895             if (info == null) {
896                 throw new IllegalArgumentException("Missing {0} for " + this);
897             }
898             return info.name;
899         }
900 
getPath()901         public String getPath() {
902             return path;
903         }
904 
getId()905         public Long getId() {
906             return id;
907         }
908 
getStringId()909         public String getStringId() {
910             return stringId;
911         }
912 
getEnglishValue()913         public String getEnglishValue() {
914             return englishValue;
915         }
916 
getDescription()917         public String getDescription() {
918             return description;
919         }
920 
getStarredPath()921         public String getStarredPath() {
922             return starredPath;
923         }
924 
getPlaceholderReplacementsToOriginal()925         public Map<String, String> getPlaceholderReplacementsToOriginal() {
926             if (placeholders == null) return null;
927             Map<String, String> placeholderOutput = new LinkedHashMap<>();
928             for (String id : placeholders.keySet()) {
929                 placeholderOutput.put(id, getPlaceholderWithExample(id));
930             }
931             return placeholderOutput;
932         }
933 
getPlaceholderWithExample(String placeholder)934         private String getPlaceholderWithExample(String placeholder) {
935             PlaceholderInfo info = placeholders.get(placeholder);
936             // <ph name='x'><ex>xxx</ex>yyy</ph>
937             return "<ph name='" + info.name + "'><ex>" + info.example + "</ex>" + placeholder + "</ph>";
938         }
939 
940         // static DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
941 
transformValue(String value, PlaceholderType type)942         private String transformValue(String value, PlaceholderType type) {
943             value = TransliteratorUtilities.toHTML.transform(value);
944             if (placeholders == null) return value;
945 
946             String placeholderFormat = "";
947             switch (type) {
948             case BRACES:
949                 placeholderFormat = "'{'{0}'}'";
950                 break;
951             case XML:
952                 placeholderFormat = "<ph name=''[{0}]'' />";
953                 break;
954             case XML_EXAMPLE:
955                 placeholderFormat = "<ph name=''{0}''><ex>{1}</ex>'{'{2}'}'</ph>";
956                 break;
957             }
958             Matcher matcher = PLACEHOLDER.matcher(value);
959             StringBuffer buffer = new StringBuffer();
960             int start = 0;
961             while (matcher.find()) {
962                 buffer.append(value.substring(start, matcher.start()));
963                 PlaceholderInfo info = placeholders.get(matcher.group());
964                 buffer.append(MessageFormat.format(placeholderFormat,
965                     new Object[] { info.name, info.example, matcher.group(1) }));
966                 start = matcher.end();
967             }
968             buffer.append(value.substring(start));
969             return buffer.toString();
970         }
971 
replacePlaceholders(String value, String placeholderStart, String placeholderEnd)972         private String replacePlaceholders(String value, String placeholderStart, String placeholderEnd) {
973             Matcher matcher = PLACEHOLDER.matcher(value);
974             StringBuffer buffer = new StringBuffer();
975             int start = 0;
976             while (matcher.find()) {
977                 buffer.append(value.substring(start, matcher.start()));
978                 String name = placeholders.get(matcher.group()).name;
979                 buffer.append(placeholderStart).append(name).append(placeholderEnd);
980                 start = matcher.end();
981             }
982             buffer.append(value.substring(start));
983             return buffer.toString();
984         }
985 
986         @Override
compareTo(PathInfo arg0)987         public int compareTo(PathInfo arg0) {
988             return path.compareTo(arg0.path);
989         }
990 
991         @Override
toString()992         public String toString() {
993             return path;
994         }
995     }
996 
997     static class EnglishInfo implements Iterable<PathInfo> {
998 
999         final Map<String, PathInfo> pathToPathInfo = new TreeMap<>();
1000         final Map<Long, PathInfo> longToPathInfo = new HashMap<>();
1001         final CLDRFile english;
1002 
getPathInfo(long hash)1003         PathInfo getPathInfo(long hash) {
1004             return longToPathInfo.get(hash);
1005         }
1006 
getName(String localeId)1007         public String getName(String localeId) {
1008             return english.getName(localeId);
1009         }
1010 
getPathInfo(String path)1011         PathInfo getPathInfo(String path) {
1012             return pathToPathInfo.get(path);
1013         }
1014 
EnglishInfo(String targetDir, CLDRFile english, CLDRFile root)1015         EnglishInfo(String targetDir, CLDRFile english, CLDRFile root) throws Exception {
1016 
1017             Map<String, String> oldPathValueMap = ReadXMB.load(CLDRPaths.BASE_DIRECTORY +
1018                 "/cldr-tools/org/unicode/cldr/unittest/data/xmb/",
1019                 "en.xml");
1020 
1021             PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance();
1022 
1023             this.english = english;
1024             // we don't want the fully resolved paths, but we do want the direct inheritance from root.
1025             //Status status = new Status();
1026             Map<String, List<Set<String>>> starredPaths = new TreeMap<>();
1027 
1028             HashSet<String> metazonePaths = new HashSet<>();
1029             // ^//ldml/dates/timeZoneNames/metazone\[@type="([^"]*)"]
1030             for (MetazoneInfo metazoneInfo : MetazoneInfo.METAZONE_LIST) {
1031                 for (String item : metazoneInfo.getTypes()) {
1032                     String path = "//ldml/dates/timeZoneNames/metazone[@type=\"" + metazoneInfo.metazoneId + "\"]"
1033                         + item;
1034                     metazonePaths.add(path);
1035                 }
1036             }
1037 
1038             // TODO add short countries
1039             HashSet<String> extraLanguages = new HashSet<>();
1040             // ldml/localeDisplayNames/languages/language[@type=".*"]
1041 
1042             for (String langId : PathDescription.EXTRA_LANGUAGES) {
1043                 String langPath = "//ldml/localeDisplayNames/languages/language[@type=\"" + langId + "\"]";
1044                 extraLanguages.add(langPath);
1045             }
1046 
1047             Set<String> sorted = Builder.with(new TreeSet<String>())
1048                 .addAll(english)
1049                 .removeAll(
1050                     new Transform<String, Boolean>() {
1051                         @Override
1052                         public Boolean transform(String source) {
1053                             return source.startsWith("//ldml/dates/timeZoneNames/metazone") ? Boolean.TRUE
1054                                 : Boolean.FALSE;
1055                         }
1056                     })
1057                 .get();
1058             sorted.addAll(metazonePaths);
1059             if (DEBUG) {
1060                 TreeSet<String> diffs = new TreeSet<>(extraLanguages);
1061                 diffs.removeAll(sorted);
1062                 System.out.println(diffs);
1063             }
1064             sorted.addAll(extraLanguages);
1065 
1066             // add the extra Count items.
1067             Map<String, String> extras = new HashMap<>();
1068             Matcher m = COUNT_ATTRIBUTE.matcher("");
1069 
1070             for (String path : sorted) {
1071                 if (path.contains("[@count=\"")) {
1072                     m.reset(path).find();
1073                     for (String key : EXTRA_PLURAL_KEYS) {
1074                         String path2 = path.substring(0, m.start(1)) + key + path.substring(m.end(1));
1075                         extras.put(path2, path);
1076                     }
1077                 }
1078                 // if (path.contains("ellipsis")) {
1079                 // System.out.println(path);
1080                 // }
1081             }
1082             sorted.addAll(extras.keySet());
1083 
1084             Relation<String, String> reasonsToPaths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
1085             Set<String> missingDescriptions = new TreeSet<>();
1086             //Output<String[]> pathArguments = new Output<String[]>();
1087 
1088             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance("en");
1089             RegexLookup<Boolean> coverageAllow = new RegexLookup<Boolean>()
1090                 .add("^//ldml/localeDisplayNames/keys/key", true)
1091                 .add("^//ldml/localeDisplayNames/languages/language\\[@type=\"(jv|zxx|gsw|eo)\"]", true)
1092                 .add("^//ldml/localeDisplayNames/scripts/script", true)
1093                 .add("^//ldml/localeDisplayNames/types/type", true)
1094                 .add(
1095                     "^//ldml/dates/calendars/calendar\\[@type=\"[^\"]*\"]/dayPeriods/dayPeriodContext\\[@type=\"format\"]",
1096                     true);
1097 
1098             // TODO: for each count='other' path, add the other keywords and values
1099             PathDescription pathDescription = new PathDescription(GenerateXMB.supplementalDataInfo, english, extras,
1100                 starredPaths, PathDescription.ErrorHandling.SKIP);
1101 
1102             for (String path : sorted) {
1103                 if (DEBUG_PATH != null && path.contains(DEBUG_PATH)) {
1104                     int x = 0;
1105                 }
1106                 String value = english.getStringValue(path);
1107                 Level level = coverageLevel.getLevel(path);
1108                 if (value == null) {
1109                     value = "[EMPTY]";
1110                     addSkipReasons(reasonsToPaths, "empty-value", level, path, value);
1111                     continue;
1112                 }
1113                 if (pathMatcher != null
1114                     && !pathMatcher.reset(path).find()) {
1115                     addSkipReasons(reasonsToPaths, "path-parameter", level, path, value);
1116                     continue;
1117                 }
1118                 PathStatus pathStatus = shouldSkipPath(path, value);
1119                 if (pathStatus == PathStatus.SKIP) {
1120                     addSkipReasons(reasonsToPaths, "path-remove", level, path, value);
1121                     continue;
1122                 }
1123 
1124                 if (level.compareTo(Level.MODERN) > 0 && pathStatus != PathStatus.KEEP) {
1125                     if (coverageAllow.get(path) == null) { // HACK
1126                         addSkipReasons(reasonsToPaths, "coverage", level, path, value);
1127                         continue;
1128                     } else {
1129                         addSkipReasons(reasonsToPaths, "coverage*", level, path, value);
1130                         continue;
1131                         // System.out.println("Not skipping " + path);
1132                     }
1133                 }
1134 
1135                 String description = pathDescription.getDescription(path, value, level, null);
1136                 EnumSet<PathDescription.Status> descriptionStatus = pathDescription.getStatus();
1137                 if (!descriptionStatus.isEmpty()) {
1138                     addSkipReasons(reasonsToPaths, descriptionStatus.toString(), level, path, value);
1139                     description = null;
1140                 } else {
1141                     description = "[ICU CLDR] " + description;
1142                 }
1143 
1144                 String oldValue = oldPathValueMap.get(path);
1145                 boolean changedEnglish = !value.equals(oldValue);
1146                 PathInfo row = new PathInfo(path, value, changedEnglish, patternPlaceholders.get(path), description,
1147                     pathDescription.getStarredPathOutput());
1148 
1149                 if (description == PathDescription.MISSING_DESCRIPTION) {
1150                     missingDescriptions.add(pathDescription.getStarredPathOutput());
1151                 }
1152 
1153                 Long hash = row.getId();
1154                 if (longToPathInfo.containsKey(hash)) {
1155                     throw new IllegalArgumentException("Id collision for "
1156                         + path + " and " + longToPathInfo.get(hash).getPath());
1157                 }
1158                 pathToPathInfo.put(path, row);
1159                 longToPathInfo.put(hash, row);
1160                 if (value.contains("{0}") && patternPlaceholders.get(path) == null) {
1161                     System.out.println("ERROR, no placeholders for {0}...: " + path + " ; " + value);
1162                 }
1163             }
1164 
1165             PrintWriter out = FileUtilities.openUTF8Writer(targetDir + "/log/", "en-paths.txt");
1166             out.println("# " + DATE);
1167             for (Entry<String, List<Set<String>>> starredPath : starredPaths.entrySet()) {
1168                 out.println(starredPath.getKey() + "\t\t" + starredPath.getValue());
1169             }
1170             out.close();
1171             out = FileUtilities.openUTF8Writer(targetDir + "/log/", "en-missingDescriptions.txt");
1172             out.println("# " + DATE);
1173             for (String starredPath : missingDescriptions) {
1174                 // ^//ldml/dates/timeZoneNames/zone\[@type=".*"]/exemplarCity ; ROOT timezone ; The name of a city in:
1175                 // {0}. See cldr.org/xxxx.
1176                 out.println(toRegexPath(starredPath) + "\t;\tDESCRIPTION\t" + starredPaths.get(starredPath));
1177             }
1178             out.close();
1179             writeReasons(reasonsToPaths, targetDir, "en");
1180         }
1181 
toRegexPath(String starredPath)1182         private String toRegexPath(String starredPath) {
1183             String result = starredPath.replace("[", "\\[");
1184             result = result.replace("\".*\"", "\"([^\"]*)\"");
1185             return "^" + result;
1186         }
1187 
1188         @Override
iterator()1189         public Iterator<PathInfo> iterator() {
1190             return pathToPathInfo.values().iterator();
1191         }
1192     }
1193 
addSkipReasons(Relation<String, String> reasonsToPaths, String descriptionStatus, Level level, String path, String value)1194     static void addSkipReasons(Relation<String, String> reasonsToPaths, String descriptionStatus, Level level,
1195         String path, String value) {
1196         reasonsToPaths.put(descriptionStatus + "\t" + level, path + "\t" + value);
1197     }
1198 
1199     // Get Date-Time in milliseconds
getDateTimeinMillis(int year, int month, int date)1200     private static long getDateTimeinMillis(int year, int month, int date) {
1201         Calendar cal = Calendar.getInstance();
1202         cal.set(year, month, date);
1203         return cal.getTimeInMillis();
1204     }
1205 
1206     static final long START_TIME = getDateTimeinMillis(2000, 1, 0);
1207     static final long END_TIME = getDateTimeinMillis(2015, 1, 0);
1208     static final long DELTA_TIME = 15 * 60 * 1000;
1209     static final long MIN_DAYLIGHT_PERIOD = 90L * 24 * 60 * 60 * 1000;
1210 
1211     static final Set<String> HAS_DAYLIGHT;
1212     static {
1213         Set<String> hasDaylightTemp = new HashSet<>();
1214         Date date = new Date();
1215         main: for (String zoneId : sc.getCanonicalTimeZones()) {
1216             TimeZone zone = TimeZone.getTimeZone(zoneId);
1217             for (long time = START_TIME + MIN_DAYLIGHT_PERIOD; time < END_TIME; time += MIN_DAYLIGHT_PERIOD) {
1218                 date.setTime(time);
1219                 if (zone.inDaylightTime(date)) {
1220                     hasDaylightTemp.add(zoneId);
1221                     if (false && !zone.useDaylightTime()) {
1222                         System.out.println(zoneId + "\tuseDaylightTime()==false, but \tinDaylightTime(/" + date
1223                             + "/)==true");
1224                     }
1225                     continue main;
1226                 }
1227             }
1228         }
1229         HAS_DAYLIGHT = Collections.unmodifiableSet(hasDaylightTemp);
1230     }
1231 
1232     static final Set<String> SINGULAR_COUNTRIES;
1233 
1234     private static PrintWriter countFile;
1235     static {
1236         // start with certain special-case countries
1237         Set<String> singularCountries = new HashSet<>(
1238             Arrays.asList("CL EC ES NZ PT AQ FM GL KI UM PF".split(" ")));
1239 
1240         Map<String, Set<String>> countryToZoneSet = sc.getCountryToZoneSet();
1241 
1242         main: for (Entry<String, Set<String>> countryZones : countryToZoneSet.entrySet()) {
1243             String country = countryZones.getKey();
1244             if (country.equals("001")) {
1245                 continue;
1246             }
1247             Set<String> zones = countryZones.getValue();
1248             if (zones.size() == 1) {
1249                 singularCountries.add(country);
1250                 continue;
1251             }
1252             // make a set of sets
1253             List<TimeZone> initial = new ArrayList<>();
1254             for (String s : zones) {
TimeZone.getTimeZone(s)1255                 initial.add(TimeZone.getTimeZone(s));
1256             }
1257             // now cycle through the times and see if we find any differences
1258             for (long time = START_TIME; time < END_TIME; time += DELTA_TIME) {
1259                 int firstOffset = Integer.MIN_VALUE;
1260                 for (TimeZone zone : initial) {
1261                     int offset = zone.getOffset(time);
1262                     if (firstOffset == Integer.MIN_VALUE) {
1263                         firstOffset = offset;
1264                     } else {
1265                         if (firstOffset != offset) {
1266                             if (false)
1267                                 System.out.println(country
1268                                     + " Difference at: " + new Date(time)
1269                                     + ", " + zone.getDisplayName() + " " + (offset / 1000.0 / 60 / 60)
1270                                     + ", " + initial.iterator().next().getDisplayName() + " "
1271                                     + (firstOffset / 1000.0 / 60 / 60));
1272                             continue main;
1273                         }
1274                     }
1275                 }
1276             }
1277             singularCountries.add(country);
1278         }
1279         SINGULAR_COUNTRIES = Collections.unmodifiableSet(singularCountries);
1280     }
1281 
1282     static final class MetazoneInfo {
1283 
1284         /**
1285          * @param metazoneId
1286          * @param singleCountry
1287          * @param hasDaylight
1288          * @param zonesForCountry
1289          * @param regionToZone
1290          */
MetazoneInfo(String metazoneId, String golden, boolean singleCountry, boolean hasDaylight)1291         public MetazoneInfo(String metazoneId, String golden, boolean singleCountry, boolean hasDaylight) {
1292             this.golden = golden;
1293             this.metazoneId = metazoneId;
1294             this.singleCountry = singleCountry;
1295             this.hasDaylight = hasDaylight;
1296         }
1297 
1298         static final String[] GENERIC = { "/long/generic",
1299             // "/short/generic"
1300         };
1301         static final String[] DAYLIGHT = { "/long/generic", "/long/standard", "/long/daylight",
1302             // "/short/generic", "/short/standard", "/short/daylight"
1303         };
1304 
getTypes()1305         public String[] getTypes() {
1306             return hasDaylight ? DAYLIGHT : GENERIC;
1307         }
1308 
1309         private final String metazoneId;
1310         private final String golden;
1311         private final boolean singleCountry;
1312         private final boolean hasDaylight;
1313 
1314         static final List<MetazoneInfo> METAZONE_LIST;
1315         static {
1316             // Set<String> zones = supplementalDataInfo.getCanonicalTimeZones();
1317             ArrayList<MetazoneInfo> result = new ArrayList<>();
1318 
1319             Map<String, String> zoneToCountry = sc.getZoneToCounty();
1320 
1321             Map<String, Map<String, String>> metazoneToRegionToZone = supplementalDataInfo.getMetazoneToRegionToZone();
1322             for (String metazone : supplementalDataInfo.getAllMetazones()) {
1323                 Map<String, String> regionToZone = metazoneToRegionToZone.get(metazone);
1324                 String golden = regionToZone.get("001");
1325                 if (golden == null) {
1326                     throw new IllegalArgumentException("Missing golden zone " + metazone + ", " + regionToZone);
1327                 }
1328                 String region = zoneToCountry.get(golden);
1329                 boolean isSingleCountry = SINGULAR_COUNTRIES.contains(region);
1330                 if (isSingleCountry) {
1331                     continue;
1332                 }
1333 
1334                 // TimeZone goldenZone = TimeZone.getTimeZone(golden);
1335 
1336                 Set<SupplementalDataInfo.MetaZoneRange> metazoneRanges = supplementalDataInfo.getMetaZoneRanges(golden);
1337                 if (metazoneRanges == null) {
1338                     throw new IllegalArgumentException("Missing golden zone " + metazone + ", " + regionToZone);
1339                 }
1340                 MetazoneInfo item = new MetazoneInfo(metazone, golden, isSingleCountry, HAS_DAYLIGHT.contains(golden));
1341                 result.add(item);
1342             }
1343             METAZONE_LIST = Collections.unmodifiableList(result);
1344         }
1345 
1346         @Override
toString()1347         public String toString() {
1348             return sc.getZoneToCounty().get(golden)
1349                 + "\t" + metazoneId
1350                 + "\t" + golden
1351                 + "\t" + (singleCountry ? "singleCountry" : "")
1352                 + "\t" + (hasDaylight ? "useDaylightTime" : "")
1353             // + ": " + zonesForCountry
1354             // + "\t" + regionToZone;
1355             ;
1356         }
1357     }
1358 
showMetazoneInfo()1359     static void showMetazoneInfo() {
1360         System.out.println("\nZones in multiple metazones\n");
1361 
1362         for (String zone : sc.getCanonicalTimeZones()) {
1363             Set<SupplementalDataInfo.MetaZoneRange> metazoneRanges = supplementalDataInfo.getMetaZoneRanges(zone);
1364             if (metazoneRanges == null) {
1365                 System.out.println("Zone doesn't have metazone! " + zone);
1366                 continue;
1367             }
1368             if (metazoneRanges.size() != 1) {
1369                 for (MetaZoneRange range : metazoneRanges) {
1370                     System.out.println(zone + ":\t" + range);
1371                 }
1372                 System.out.println();
1373             }
1374         }
1375 
1376         System.out.println("\nMetazoneInfo\n");
1377 
1378         for (boolean singleCountry : new boolean[] { false }) {
1379             for (boolean hasDaylight : new boolean[] { false, true }) {
1380                 for (MetazoneInfo mzone : MetazoneInfo.METAZONE_LIST) {
1381                     if (mzone.hasDaylight != hasDaylight) continue;
1382                     if (mzone.singleCountry != singleCountry) continue;
1383                     System.out.println(mzone);
1384                 }
1385             }
1386         }
1387     }
1388 
displayWsb(String file, EnglishInfo info)1389     private static void displayWsb(String file, EnglishInfo info) {
1390         try {
1391             String[] parts = file.split("/");
1392             ULocale locale = new ULocale(parts[parts.length - 2]);
1393             FileInputStream fis = new FileInputStream(file);
1394             XMLReader xmlReader = XMLFileReader.createXMLReader(false);
1395             xmlReader.setErrorHandler(new MyErrorHandler());
1396             Map<String, String> data = new TreeMap<>();
1397             xmlReader.setContentHandler(new MyContentHandler(locale, data, info));
1398             InputSource is = new InputSource(fis);
1399             is.setSystemId(file);
1400             xmlReader.parse(is);
1401             fis.close();
1402             for (Entry<String, String> entity : data.entrySet()) {
1403                 String path = entity.getKey();
1404                 String value = entity.getValue();
1405                 PathInfo pathInfo = info.getPathInfo(path);
1406                 System.out.println(value + "\t" + (pathInfo == null ? "?" : pathInfo.englishValue) + "\t" + path);
1407             }
1408         } catch (SAXParseException e) {
1409             System.out.println("\t" + "Can't read " + file);
1410             System.out.println("\t" + e.getClass() + "\t" + e.getMessage());
1411         } catch (SAXException e) {
1412             System.out.println("\t" + "Can't read " + file);
1413             System.out.println("\t" + e.getClass() + "\t" + e.getMessage());
1414         } catch (IOException e) {
1415             System.out.println("\t" + "Can't read " + file);
1416             System.out.println("\t" + e.getClass() + "\t" + e.getMessage());
1417         }
1418     }
1419 
1420     static class MyErrorHandler implements ErrorHandler {
1421         @Override
error(SAXParseException exception)1422         public void error(SAXParseException exception) throws SAXException {
1423             System.out.println("\nerror: " + XMLFileReader.showSAX(exception));
1424             throw exception;
1425         }
1426 
1427         @Override
fatalError(SAXParseException exception)1428         public void fatalError(SAXParseException exception) throws SAXException {
1429             System.out.println("\nfatalError: " + XMLFileReader.showSAX(exception));
1430             throw exception;
1431         }
1432 
1433         @Override
warning(SAXParseException exception)1434         public void warning(SAXParseException exception) throws SAXException {
1435             System.out.println("\nwarning: " + XMLFileReader.showSAX(exception));
1436             throw exception;
1437         }
1438     }
1439 
1440     static class MyContentHandler implements ContentHandler {
1441         private static final boolean SHOW = false;
1442         private Map<String, String> myData;
1443         private EnglishInfo info;
1444         private PathInfo lastPathInfo;
1445         private StringBuilder currentText = new StringBuilder();
1446         private long lastId;
1447         private String lastPluralTag;
1448         private Map<String, String> pluralTags = new LinkedHashMap<>();
1449         private Set<String> pluralKeywords;
1450 
MyContentHandler(ULocale locale, Map<String, String> data, EnglishInfo info)1451         public MyContentHandler(ULocale locale, Map<String, String> data, EnglishInfo info) {
1452             myData = data;
1453             this.info = info;
1454             PluralRules rules = PluralRules.forLocale(locale);
1455             pluralKeywords = Builder.with(new HashSet<String>()).addAll(rules.getKeywords()).add("0").add("1").freeze();
1456         }
1457 
1458         @Override
characters(char[] arg0, int arg1, int arg2)1459         public void characters(char[] arg0, int arg1, int arg2) throws SAXException {
1460             String chars = String.valueOf(arg0, arg1, arg2);
1461             // if (SHOW) System.out.println("\t characters\t" + chars);
1462             currentText.append(chars);
1463         }
1464 
1465         @Override
endDocument()1466         public void endDocument() throws SAXException {
1467             if (SHOW) System.out.println("\t endDocument\t");
1468         }
1469 
1470         @Override
endElement(String arg0, String arg1, String qName)1471         public void endElement(String arg0, String arg1, String qName) throws SAXException {
1472             // if (SHOW) System.out.println("\t endElement\t" + arg0 + "\t" + arg1 + "\t" + qName);
1473             if (qName.equals("msg")) {
1474                 String chars = currentText.toString().replace("\n", "").trim();
1475                 if (lastPathInfo == null) {
1476                     System.out.println("***Missing path info for " + lastId + "\t" + chars);
1477                     // myData.put("*** Missing path: " + lastId, chars);
1478                 } else if (pluralTags.size() != 0) {
1479                     for (Entry<String, String> pluralTagEntry : pluralTags.entrySet()) {
1480                         String pluralTag = pluralTagEntry.getKey();
1481                         String pluralTagValue = pluralTagEntry.getValue();
1482                         if (pluralKeywords.contains(pluralTag)) {
1483                             String fixedCount = lastPathInfo.path.replace("other", pluralTag);
1484                             myData.put(fixedCount, pluralTagValue);
1485                         } else {
1486                             System.out.println("***Skipping " + pluralTag + "\t" + pluralTagValue);
1487                         }
1488                     }
1489                     // myData.put(lastPathInfo.path, pluralTags.toString());
1490                     pluralTags.clear();
1491                 } else {
1492                     myData.put(lastPathInfo.path, chars);
1493                 }
1494                 currentText.setLength(0);
1495             }
1496         }
1497 
1498         @Override
endPrefixMapping(String arg0)1499         public void endPrefixMapping(String arg0) throws SAXException {
1500             if (SHOW) System.out.println("\t endPrefixMapping\t" + arg0);
1501         }
1502 
1503         @Override
ignorableWhitespace(char[] arg0, int arg1, int arg2)1504         public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException {
1505             if (SHOW) System.out.println("\t ignorableWhitespace\t" + String.valueOf(arg0, arg1, arg2));
1506         }
1507 
1508         @Override
processingInstruction(String arg0, String arg1)1509         public void processingInstruction(String arg0, String arg1) throws SAXException {
1510             if (SHOW) System.out.println("\t processingInstruction\t" + arg0 + "\t" + arg1);
1511         }
1512 
1513         @Override
setDocumentLocator(Locator arg0)1514         public void setDocumentLocator(Locator arg0) {
1515             if (SHOW) System.out.println("\t setDocumentLocator\t" + arg0);
1516         }
1517 
1518         @Override
skippedEntity(String arg0)1519         public void skippedEntity(String arg0) throws SAXException {
1520             if (SHOW) System.out.println("\t skippedEntity\t" + arg0);
1521         }
1522 
1523         @Override
startDocument()1524         public void startDocument() throws SAXException {
1525             if (SHOW) System.out.println("\t startDocument\t");
1526         }
1527 
1528         @Override
startElement(String arg0, String arg1, String qName, Attributes arg3)1529         public void startElement(String arg0, String arg1, String qName, Attributes arg3) throws SAXException {
1530             // if (SHOW) System.out.println("\t startElement\t" + arg0 + "\t" + arg1 + "\t" + qName + "\t" +
1531             // showAttributes(arg3));
1532             if (qName.equals("msg")) {
1533                 lastId = Long.parseLong(arg3.getValue("id"));
1534                 lastPathInfo = info.getPathInfo(lastId);
1535                 currentText.setLength(0);
1536             } else if (qName.equals("ph")) {
1537                 String name = arg3.getValue("name");
1538                 String original = lastPathInfo.getPlaceholderReplacementsToOriginal().get(name);
1539                 if (original != null) {
1540                     currentText.append(original);
1541                 } else if (name.startsWith("[PLURAL_")) {
1542                     pluralTags.clear();
1543                     lastPluralTag = "[START_PLURAL]";
1544                 } else {
1545                     String pluralTag = PLURAL_TAGS.get(name);
1546                     if (pluralTag != null) {
1547                         String chars = currentText.toString().replace("\n", "").trim();
1548                         pluralTags.put(lastPluralTag, chars);
1549                         currentText.setLength(0);
1550                         lastPluralTag = pluralTag;
1551                     } else {
1552                         System.out.println("***Can't find " + name + " in "
1553                             + lastPathInfo.getPlaceholderReplacementsToOriginal());
1554                     }
1555                 }
1556             }
1557         }
1558 
showAttributes(Attributes atts)1559         private String showAttributes(Attributes atts) {
1560             String result = "";
1561             for (int i = 0; i < atts.getLength(); ++i) {
1562                 result += atts.getQName(i) + "=\"" + atts.getValue(i) + "\"\t";
1563             }
1564             return result;
1565         }
1566 
1567         @Override
startPrefixMapping(String arg0, String arg1)1568         public void startPrefixMapping(String arg0, String arg1) throws SAXException {
1569             if (SHOW) System.out.println("\t startPrefixMapping\t" + arg0 + "\t" + arg1);
1570         }
1571     }
1572 
1573     static final Map<String, String> PLURAL_TAGS = Builder.with(new HashMap<String, String>())
1574         .put("[​=0]", "0")
1575         .put("[=1]", "1")
1576         .put("[ZERO]", PluralRules.KEYWORD_ZERO)
1577         .put("[ONE]", PluralRules.KEYWORD_ONE)
1578         .put("[TWO]", PluralRules.KEYWORD_TWO)
1579         .put("[FEW]", PluralRules.KEYWORD_FEW)
1580         .put("[MANY]", PluralRules.KEYWORD_MANY)
1581         .put("[OTHER]", PluralRules.KEYWORD_OTHER)
1582         .put("[END_PLURAL]", "")
1583         .freeze();
1584 
1585     private static String compareDirectory;
1586 }
1587