1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.io.FileInputStream;
5 import java.io.IOException;
6 import java.io.PrintWriter;
7 import java.io.StringWriter;
8 import java.util.ArrayList;
9 import java.util.Arrays;
10 import java.util.Calendar;
11 import java.util.Collections;
12 import java.util.Date;
13 import java.util.EnumSet;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Iterator;
17 import java.util.LinkedHashMap;
18 import java.util.LinkedHashSet;
19 import java.util.List;
20 import java.util.Locale;
21 import java.util.Map;
22 import java.util.Map.Entry;
23 import java.util.Set;
24 import java.util.TreeMap;
25 import java.util.TreeSet;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28 
29 import org.unicode.cldr.draft.FileUtilities;
30 import org.unicode.cldr.test.CheckExemplars;
31 import org.unicode.cldr.test.CoverageLevel2;
32 import org.unicode.cldr.test.DisplayAndInputProcessor;
33 import org.unicode.cldr.test.QuickCheck;
34 import org.unicode.cldr.tool.Option.Options;
35 import org.unicode.cldr.util.Builder;
36 import org.unicode.cldr.util.CLDRFile;
37 import org.unicode.cldr.util.CLDRPaths;
38 import org.unicode.cldr.util.Factory;
39 import org.unicode.cldr.util.FileCopier;
40 import org.unicode.cldr.util.LanguageTagParser;
41 import org.unicode.cldr.util.Level;
42 import org.unicode.cldr.util.PathDescription;
43 import org.unicode.cldr.util.PatternCache;
44 import org.unicode.cldr.util.PatternPlaceholders;
45 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo;
46 import org.unicode.cldr.util.PrettyPath;
47 import org.unicode.cldr.util.RegexLookup;
48 import org.unicode.cldr.util.RegexLookup.Finder;
49 import org.unicode.cldr.util.RegexUtilities;
50 import org.unicode.cldr.util.StandardCodes;
51 import org.unicode.cldr.util.StringId;
52 import org.unicode.cldr.util.SupplementalDataInfo;
53 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange;
54 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
55 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
56 import org.unicode.cldr.util.TransliteratorUtilities;
57 import org.unicode.cldr.util.With;
58 import org.unicode.cldr.util.XMLFileReader;
59 import org.unicode.cldr.util.XMLSource;
60 import org.unicode.cldr.util.XPathParts;
61 import org.xml.sax.Attributes;
62 import org.xml.sax.ContentHandler;
63 import org.xml.sax.ErrorHandler;
64 import org.xml.sax.InputSource;
65 import org.xml.sax.Locator;
66 import org.xml.sax.SAXException;
67 import org.xml.sax.SAXParseException;
68 import org.xml.sax.XMLReader;
69 
70 import com.ibm.icu.dev.util.CollectionUtilities;
71 import com.ibm.icu.impl.Relation;
72 import com.ibm.icu.impl.Row;
73 import com.ibm.icu.impl.Row.R2;
74 import com.ibm.icu.lang.CharSequences;
75 import com.ibm.icu.text.BreakIterator;
76 import com.ibm.icu.text.DateFormat;
77 import com.ibm.icu.text.MessageFormat;
78 import com.ibm.icu.text.PluralRules;
79 import com.ibm.icu.text.SimpleDateFormat;
80 import com.ibm.icu.text.Transform;
81 import com.ibm.icu.text.UnicodeSet;
82 import com.ibm.icu.util.Output;
83 import com.ibm.icu.util.TimeZone;
84 import com.ibm.icu.util.ULocale;
85 
86 public class GenerateXMB {
87     private static final String DEBUG_PATH = "[@type=\"day\"]/unitPattern[@count=\"1\"]";
88 
89     static StandardCodes sc = StandardCodes.make();
90 
91     static final String DATE;
92     static {
93         DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
94         DATE = dateFormat.format(new Date());
95     }
96     static final String stock = "en|ar|de|es|fr|it|ja|ko|nl|pl|ru|th|tr|pt|zh|zh_Hant|bg|ca|cs|da|el|fa|fi|fil|hi|hr|hu|id|lt|lv|ro|sk|sl|sr|sv|uk|vi|he|nb|et|ms|am|bn|gu|is|kn|ml|mr|sw|ta|te|ur|eu|gl|af|zu|en_GB|es_419|pt_PT|fr_CA|zh_Hant_HK";
97     private static final HashSet<String> REGION_LOCALES = new HashSet<String>(Arrays.asList(stock.split("\\|")));
98 
99     final static Options myOptions = new Options("In normal usage, you set the -t option for the target.")
100         .add("target", ".*", CLDRPaths.TMP_DIRECTORY + "dropbox/xmb/",
101             "The target directory for building. Will generate an English .xmb file, and .wsb files for other languages.")
102         .add(
103             "file",
104             ".*",
105             stock,
106             "Filter the information based on file name, using a regex argument. The '.xml' is removed from the file before filtering")
107         // "^(sl|fr)$",
108         .add("path", ".*", "Filter the information based on path name, using a regex argument")
109         // "dates.*(pattern|available)",
110         .add("content", ".*", "Filter the information based on content name, using a regex argument")
111         .add("jason", ".*", "Generate JSON versions instead")
112         .add("zone", null, "Show metazoneinfo and exit")
113         .add("wsb", ".*", "Show metazoneinfo and exit")
114         .add("kompare", ".*", CLDRPaths.BASE_DIRECTORY + "../DATA/cldr/common/google-bulk-imports",
115             "Compare data with directory; generate files in -target.")
116         .add("project_name", 'n', ".*", "CLDR", "The ID of the project.");
117 
118     static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance();
119     // static Matcher contentMatcher;
120     static Matcher pathMatcher;
121     static RegexLookup<String> pathFindRemover = new RegexLookup<String>().loadFromFile(GenerateXMB.class,
122         "xmbSkip.txt");; // .compile("//ldml/dates/calendars/calendar\\[@type=\"(?!gregorian).*").matcher("");
123     static PrettyPath prettyPath = new PrettyPath();
124     static int errors = 0;
125     static Relation<String, String> path2errors = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
126 
127     // enum Handling {SKIP};
128     static final Matcher datePatternMatcher = PatternCache.get("dates.*(pattern|available)").matcher("");
129 
130     public static final boolean DEBUG = false;
131 
132     private static final HashSet<String> SKIP_LOCALES = new HashSet<String>(
133         Arrays.asList(new String[] { "en", "root" }));
134 
135     public static String DTD_VERSION;
136 
137     private static String projectId;
138 
139     enum PlaceholderType {
140         BRACES, // e.g. {NAME}
141         XML, // e.g. <ph name='NAME' />
142         XML_EXAMPLE // e.g. <ph name='NAME' /><ex>EXAMPLE</ex>{0}</ph>
143     };
144 
main(String[] args)145     public static void main(String[] args) throws Exception {
146         myOptions.parse(args, true);
147         Option option;
148         option = myOptions.get("zone");
149         if (option.doesOccur()) {
150             showMetazoneInfo();
151             return;
152         }
153         option = myOptions.get("file");
154         String fileMatcherString = option.getValue();
155         option = myOptions.get("content");
156         Matcher contentMatcher = option.doesOccur() ? PatternCache.get(option.getValue()).matcher("") : null;
157         option = myOptions.get("path");
158         pathMatcher = option.doesOccur() ? PatternCache.get(option.getValue()).matcher("") : null;
159 
160         String targetDir = myOptions.get("target").getValue();
161         countFile = FileUtilities.openUTF8Writer(targetDir + "/log/", "counts.txt");
162 
163         Factory cldrFactory1 = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
164         CLDRFile english = cldrFactory1.make("en", true);
165         CLDRFile englishTop = cldrFactory1.make("en", false);
166         DTD_VERSION = englishTop.getDtdVersion();
167 
168         CLDRFile root = cldrFactory1.make("en", true);
169 
170         showDefaultContents(targetDir, english);
171         EnglishInfo englishInfo = new EnglishInfo(targetDir, english, root);
172 
173         option = myOptions.get("kompare");
174         if (option.doesOccur()) {
175             compareDirectory = option.getValue();
176             compareFiles(fileMatcherString, contentMatcher, targetDir, cldrFactory1, english, englishInfo);
177             return;
178         }
179 
180         if (myOptions.get("wsb").doesOccur()) {
181             displayWsb(myOptions.get("wsb").getValue(), englishInfo);
182             return;
183         }
184 
185         projectId = myOptions.get("project_name").getValue();
186 
187         writeFile(targetDir, "en", englishInfo, english, true, false);
188         writeFile(targetDir + "/filtered/", "en", englishInfo, english, true, true);
189 
190         // TODO:
191         // Replace {0}... with placeholders (Mostly done, but need better examples)
192         // Replace datetime fields (MMM, L, ...) with placeholders
193         // Skip items that we don't need translated (most language names, script names, deprecated region names, etc.
194         // Add descriptions
195         // Add pages with detailed descriptions, and links from the descriptions
196         // Represent the items with count= as ICUSyntax
197         // Filter items that we don't want to get translated, and add others that we need even if not in English
198         // Rewire items that are in undistinguished attributes
199         // Test each xml file for validity
200         // Generate strings that let the user choose the placeholder style hh vs HH,...???
201 
202         Factory cldrFactory2 = Factory.make(CLDRPaths.MAIN_DIRECTORY, fileMatcherString);
203         LanguageTagParser ltp = new LanguageTagParser();
204 
205         for (String file : cldrFactory2.getAvailable()) {
206             if (SKIP_LOCALES.contains(file)) {
207                 continue;
208             }
209 
210             // skip all locales with regions (with certain exceptions)
211             if (ltp.set(file).getRegion().length() != 0) {
212                 if (!REGION_LOCALES.contains(file)) {
213                     continue;
214                 }
215             }
216 
217             // skip anything without plural rules
218             final PluralInfo plurals = supplementalDataInfo.getPlurals(file, false);
219             if (plurals == null) {
220                 System.out.println("Skipping " + file + ", no plural rules");
221                 continue;
222             }
223 
224             CLDRFile cldrFile = cldrFactory2.make(file, true);
225             writeFile(targetDir + "/wsb/", file, englishInfo, cldrFile, false, false);
226             writeFile(targetDir + "/wsb/filtered/", file, englishInfo, cldrFile, false, true);
227             countFile.flush();
228         }
229         countFile.close();
230         PrintWriter errorFile = FileUtilities.openUTF8Writer(targetDir + "/log/", "errors.txt");
231         for (Entry<String, Set<String>> entry : path2errors.keyValuesSet()) {
232             errorFile.println(entry);
233         }
234         errorFile.close();
235         System.out.println("Errors: " + (errors + path2errors.size()));
236     }
237 
compareFiles(String fileMatcherString, Matcher contentMatcher, String targetDir, Factory cldrFactory1, CLDRFile english, EnglishInfo englishInfo)238     private static void compareFiles(String fileMatcherString, Matcher contentMatcher, String targetDir,
239         Factory cldrFactory1, CLDRFile english,
240         EnglishInfo englishInfo) throws IOException {
241         SubmittedPathFixer fixer = new SubmittedPathFixer();
242         Factory cldrFactory2 = Factory.make(compareDirectory, fileMatcherString);
243         PrintWriter output = null;
244         PrintWriter log = FileUtilities.openUTF8Writer(targetDir + "/log/", "skipped.txt");
245 
246         for (String file : cldrFactory2.getAvailable()) {
247             // System.out.println("Checking " + file);
248             CLDRFile submitted = cldrFactory2.make(file, false);
249             CLDRFile trunk = cldrFactory1.make(file, true);
250             for (String path : With.in(submitted.iterator(null, submitted.getComparator()))) {
251                 if (pathMatcher != null && !pathMatcher.reset(path).matches()) {
252                     continue;
253                 }
254                 String submittedValue = submitted.getStringValue(path);
255                 if (contentMatcher != null && !contentMatcher.reset(submittedValue).matches()) {
256                     continue;
257                 }
258                 PathStatus pathStatus = shouldSkipPath(path, submittedValue);
259                 if (pathStatus == PathStatus.SKIP) {
260                     continue;
261                 }
262 
263                 // fix alt
264                 String trunkPath = fixer.fix(path, false);
265                 String trunkValue = trunk.getStringValue(trunkPath);
266                 if (CharSequences.equals(submittedValue, trunkValue)) {
267                     continue;
268                 }
269                 if (output == null) {
270                     output = FileUtilities.openUTF8Writer(targetDir, file + ".txt");
271                     output.println("ID\tEnglish\tSource\tRelease\tDescription");
272                 }
273                 String englishValue = english.getStringValue(trunkPath);
274                 final PathInfo pathInfo = englishInfo.getPathInfo(trunkPath);
275                 String description;
276                 if (pathInfo == null) {
277                     log.println(file + "\tDescription unavailable for " + trunkPath);
278                     errors++;
279                     String temp = fixer.fix(path, true);
280                     englishInfo.getPathInfo(trunkPath);
281                     continue;
282                 } else {
283                     description = pathInfo.getDescription();
284                 }
285                 long id = StringId.getId(trunkPath);
286                 if (englishValue == null) {
287                     log.println(file + "\tEmpty English for " + trunkPath);
288                     errors++;
289                     continue;
290                 }
291                 output.println(id + "\t" + ssquote(englishValue, false) + "\t" + ssquote(submittedValue, false) + "\t"
292                     + ssquote(trunkValue, true) + "\t" + description);
293             }
294             if (output != null) {
295                 output.close();
296                 output = null;
297             }
298             log.flush();
299         }
300         log.close();
301     }
302 
303     static Output<String[]> matches = new Output<String[]>();
304     static List<String> failures = new ArrayList<String>();
305     static Output<Finder> matcherFound = new Output<Finder>();
306 
307     enum PathStatus {
308         SKIP, KEEP, MAYBE
309     }
310 
shouldSkipPath(String path, String value)311     public static PathStatus shouldSkipPath(String path, String value) {
312         // skip if
313         List<String> myFailures = null;
314         if (false && path.contains("currencies") && path.contains("symbol")) {
315             myFailures = failures;
316         }
317         String skipPath = pathFindRemover.get(path, null, matches, matcherFound, myFailures);
318         if (myFailures != null && failures.size() != 0) {
319             System.out.println("Failures\n\t" + CollectionUtilities.join(failures, "\n\t"));
320             failures.clear();
321         }
322         if (skipPath == null || skipPath.equals("MAYBE")) {
323             return PathStatus.MAYBE;
324         } else if (skipPath.equals("VALUE")) {
325             return value.equals(matches.value[1]) ? PathStatus.SKIP : PathStatus.MAYBE;
326         } else if (skipPath.equals("SKIP")) {
327             return PathStatus.SKIP;
328         } else if (skipPath.equals("KEEP")) {
329             return PathStatus.KEEP;
330         }
331         throw new IllegalArgumentException("Unexpected xmbSkip.txt value: " + skipPath);
332     }
333 
ssquote(String englishValue, boolean showRemoved)334     private static String ssquote(String englishValue, boolean showRemoved) {
335         if (englishValue == null) {
336             return showRemoved ? "[removed]" : "[empty]";
337         }
338         englishValue = englishValue.replace("\"", "&quot;");
339         return englishValue;
340     }
341 
342     static class SubmittedPathFixer {
343         private static final Pattern PATH_FIX = PatternCache.get("\\[@alt=\"" +
344             "(?:proposed|((?!proposed)[-a-zA-Z0-9]*)-proposed)" +
345             "-u\\d+-implicit[0-9.]+" +
346             "(?:-proposed-u\\d+-implicit[0-9.]+)?" + // NOTE: we allow duplicated alt values because of a generation
347             // bug.
348             // -proposed-u971-implicit2.0
349             "\"]");
350         static Matcher pathFix = PATH_FIX.matcher("");
351 
fix(String path, boolean debug)352         public String fix(String path, boolean debug) {
353             if (pathFix.reset(path).find()) {
354                 if (debug) {
355                     // debug in case we get a mismatch
356                     String temp = "REGEX:\t" +
357                         RegexUtilities.showMismatch(PATH_FIX, path.substring(pathFix.start(0)));
358                 }
359                 final String group = pathFix.group(1);
360                 String replacement = group == null ? "" : "[@alt=\"" + group + "\"]";
361                 String trunkPath = path.substring(0, pathFix.start(0)) + replacement + path.substring(pathFix.end(0));
362                 // HACK because of change in CLDR defaults
363                 if (trunkPath.startsWith("//ldml/numbers/symbols/")) {
364                     trunkPath = "//ldml/numbers/symbols[@numberSystem=\"latn\"]/"
365                         + trunkPath.substring("//ldml/numbers/symbols/".length());
366                 }
367                 return trunkPath;
368             }
369             return path;
370         }
371 
372     }
373 
showDefaultContents(String targetDir, CLDRFile english)374     private static void showDefaultContents(String targetDir, CLDRFile english) throws IOException {
375         PrintWriter out = FileUtilities.openUTF8Writer(targetDir + "/log/", "locales.txt");
376         String[] locales = stock.split("\\|");
377         Set<R2<String, String>> sorted = new TreeSet<R2<String, String>>();
378         for (String locale : locales) {
379             if (locale.isEmpty()) continue;
380             String name = english.getName(locale);
381             R2<String, String> row = Row.of(name, locale);
382             sorted.add(row);
383         }
384         Set<String> defaultContents = supplementalDataInfo.getDefaultContentLocales();
385 
386         for (R2<String, String> row : sorted) {
387             String locale = row.get1();
388             String dlocale = getDefaultContentLocale(locale, defaultContents);
389             out.println(row.get0() + "\t" + locale + "\t" + english.getName(dlocale) + "\t" + dlocale);
390         }
391         out.close();
392     }
393 
getDefaultContentLocale(String locale, Set<String> defaultContents)394     private static String getDefaultContentLocale(String locale, Set<String> defaultContents) {
395         String best = null;
396         for (String s : defaultContents) {
397             if (s.startsWith(locale)) {
398                 if (best == null) {
399                     best = s;
400                 } else if (s.length() < best.length()) {
401                     best = s;
402                 }
403             }
404         }
405         if (best == null) {
406             return locale;
407         }
408         return best;
409     }
410 
411     static final Pattern COUNT_OR_ALT_ATTRIBUTE = PatternCache.get("\\[@(count)=\"([^\"]*)\"]");
412     static final Pattern PLURAL_XPATH = Pattern
413         .compile("//ldml/(units/unit|numbers/(decimal|currency)Formats).*\\[@count=\"\\w+\"].*");
414     static final Pattern SKIP_EXEMPLAR_TEST = PatternCache.get(
415         "/(currencySpacing"
416             + "|hourFormat"
417             + "|exemplarCharacters"
418             + "|pattern"
419             + "|localizedPatternChars"
420             + "|segmentations"
421             + "|dateFormatItem"
422             + "|references"
423             + "|unitPattern"
424             + "|intervalFormatItem"
425             + "|localeDisplayNames/variants/"
426             + "|commonlyUsed"
427             + "|currency.*/symbol"
428             + "|symbols/(exponential|nan))");
429 
430     static final Matcher skipExemplarTest = SKIP_EXEMPLAR_TEST.matcher("");
431     static final UnicodeSet ASCII_LATIN = new UnicodeSet("[A-Za-z]").freeze();
432     static final UnicodeSet LATIN = new UnicodeSet("[:sc=Latn:]").freeze();
433 
434     static final Matcher keepFromRoot = PatternCache.get("/(exemplarCity|currencies/currency.*/symbol)").matcher("");
435     static final Matcher currencyDisplayName = Pattern
436         .compile("/currencies/currency\\[@type=\"([^\"]*)\"]/displayName").matcher("");
437 
writeFile(String targetDir, String localeId, EnglishInfo englishInfo, CLDRFile cldrFile, boolean isEnglish, boolean filter)438     private static void writeFile(String targetDir, String localeId, EnglishInfo englishInfo, CLDRFile cldrFile,
439         boolean isEnglish, boolean filter) throws IOException {
440 
441         String extension = "xml";
442         XPathParts xpathParts = new XPathParts();
443         Relation<String, String> reasonsToPaths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
444         Set<String> seenStarred = new HashSet<String>();
445 
446         Relation<String, Row.R2<PathInfo, String>> countItems = Relation.of(
447             new TreeMap<String, Set<Row.R2<PathInfo, String>>>(), TreeSet.class);
448         Matcher countMatcher = COUNT_OR_ALT_ATTRIBUTE.matcher("");
449         int lineCount = 0;
450         int wordCount = 0;
451         int messageCount = 0;
452 
453         StringWriter buffer = new StringWriter();
454         PrintWriter out1 = new PrintWriter(buffer);
455         StringWriter buffer3 = new StringWriter();
456         PrintWriter out3 = new PrintWriter(buffer3);
457         UnicodeSet exemplars = getExemplars(cldrFile);
458 
459         for (PathInfo pathInfo : englishInfo) {
460             if (false && pathInfo.id == 46139888945574604L) { // for debugging
461                 System.out.println("?");
462             }
463             String path = pathInfo.getPath();
464             String value;
465             if (isEnglish) {
466                 value = pathInfo.englishValue;
467             } else {
468                 value = cldrFile.getStringValue(path);
469             }
470             // Remove quotes from number formats (we'll put them back in during
471             // post-processing).
472             // TODO: we should actually call daip.processForDisplay() here, but
473             // it does more stuff than we need it to do, e.g. stripping the
474             // brackets from exemplarCharacters.
475             if (DisplayAndInputProcessor.NUMBER_FORMAT_XPATH.matcher(path).matches()) {
476                 value = value.replace("'", "");
477             }
478 
479             // skip root if not English
480             if (!isEnglish && value != null && !keepFromRoot.reset(path).find()) { // note that mismatched script will
481                 // be checked later
482                 String locale = cldrFile.getSourceLocaleID(path, null);
483                 if (locale.equals("root")) {
484                     reasonsToPaths.put("root", path + "\t" + value);
485                     continue;
486                 }
487                 if (locale.equals(XMLSource.CODE_FALLBACK_ID)) {
488                     reasonsToPaths.put("codeFallback", path + "\t" + value);
489                     continue;
490                 }
491             }
492             boolean hasPlurals = PLURAL_XPATH.matcher(path).matches();
493             if (filter && !hasPlurals) {
494                 String starred = pathInfo.getStarredPath();
495                 if (seenStarred.contains(starred)) {
496                     continue;
497                 }
498                 seenStarred.add(starred);
499             }
500             if (value == null) {
501                 reasonsToPaths.put("missing", path + "	" + value);
502                 continue;
503             }
504             if (!isEnglish) {
505                 String fullPath = cldrFile.getFullXPath(path);
506                 if (fullPath.contains("draft")) {
507                     xpathParts.set(fullPath);
508                     String draftValue = xpathParts.getAttributeValue(-1, "draft");
509                     if (!draftValue.equals("contributed")) {
510                         reasonsToPaths.put(draftValue, path + "\t" + value);
511                         continue;
512                     }
513                 }
514             }
515             if (!isEnglish
516                 && !exemplars.containsAll(value)
517                 && !skipExemplarTest.reset(path).find()) {
518                 // check for special cases in currency names. If the code itself occurs in the name, that's ok
519                 // ldml/numbers/currencies/currency[@type="XXX"]/displayName
520                 boolean bad = true;
521                 if (currencyDisplayName.reset(path).find()) {
522                     String code = currencyDisplayName.group(1);
523                     String value2 = value.replace(code, "");
524                     bad = !exemplars.containsAll(value2);
525                 }
526                 if (bad) {
527                     UnicodeSet diff = new UnicodeSet().addAll(value).removeAll(exemplars);
528                     reasonsToPaths.put("exemplars", path + "\t" + value + "\t" + diff);
529                     continue;
530                 }
531             }
532             // String fullPath = cldrFile.getStringValue(path);
533             // //ldml/units/unit[@type="day"]/unitPattern[@count="one"]
534             if (hasPlurals) {
535                 countMatcher.reset(path).find();
536                 String countLessPath = countMatcher.replaceAll("");
537                 countItems.put(countLessPath, Row.of(pathInfo, value));
538                 continue;
539             }
540             if (!isEnglish && pathInfo.changedEnglish) {
541                 reasonsToPaths.put("changed-english", path);
542             } else {
543                 writePathInfo(out1, pathInfo, value, isEnglish);
544                 messageCount++;
545             }
546             if (isEnglish) {
547                 writeJavaInfo(out3, pathInfo.getStringId(), pathInfo.getPath(), value);
548             }
549             wordCount += pathInfo.wordCount;
550             ++lineCount;
551         }
552         R2<Integer, Integer> lineWordCount = writeCountPathInfo(out1, out3, cldrFile.getLocaleID(), countItems,
553             isEnglish, filter);
554         messageCount += lineWordCount.get0();
555         lineCount += lineWordCount.get0();
556         wordCount += lineWordCount.get1();
557         if (!filter && countItems.size() != lineWordCount.get0().intValue()) {
558             System.out.println(localeId + "\t" + countItems.size() + "\t" + lineWordCount.get0().intValue());
559         }
560         out1.flush();
561         out3.flush();
562 
563         String file = LanguageCodeConverter.toGoogleLocaleId(localeId);
564         String localeName = englishInfo.getName(localeId);
565         PrintWriter out = FileUtilities.openUTF8Writer(targetDir, file + "." + extension);
566 
567         if (isEnglish) {
568             FileCopier.copy(GenerateXMB.class, "xmb-dtd.xml", out);
569 //            FileUtilities.appendFile(GenerateXMB.class, "xmb-dtd.xml", out);
570             out.println("<!-- " + localeName + " -->");
571             out.println("<messagebundle class='" + projectId + "'> <!-- version: " + DTD_VERSION + ", date: " + DATE
572                 + " -->");
573             out.println(buffer.toString());
574             out.println("</messagebundle>");
575 
576             PrintWriter out3File = FileUtilities.openUTF8Writer(targetDir, "IdToPath.java");
577             out3File.println("package org.unicode.cldr.tool;");
578             out3File.println();
579             out3File.println("import java.util.HashMap;");
580             out3File.println();
581             out3File.println("/**");
582             out3File.println(" * Autogenerated by GenerateXMB for use by ConvertXTB.");
583             out3File.println(" * Do not manually edit this file.");
584             out3File.println(" */");
585             out3File.println("public class IdToPath {");
586             out3File.println("  static final HashMap<String,String> map = new HashMap<String,String>();");
587             out3File.println("  public static String getPath(String id) {");
588             out3File.println("      return map.get(id);");
589             out3File.println("  }");
590             out3File.println("  static {");
591             out3File.println("      String[][] data = {");
592             out3File.println(buffer3);
593             out3File.println("      };");
594             out3File.println("      for (String[] pair : data) {");
595             out3File.println("          map.put(pair[0], pair[1]);");
596             out3File.println("      }");
597             out3File.println("  }");
598             out3File.println("}");
599             out3File.close();
600         } else {
601 
602 //            FileUtilities.appendFile(GenerateXMB.class, "wsb-dtd.xml", out);
603             FileCopier.copy(GenerateXMB.class, "wsb-dtd.xml", out);
604             out.println("<!-- " + localeName + " -->");
605             out.println("<worldserverbundles lazarus_id='dummy' date='" + DATE + "'> <!-- version: " + DTD_VERSION
606                 + " -->");
607             out.println("  <worldserverbundle project_id='" + projectId + "' message_count='" + messageCount + "'>");
608             out.println(buffer.toString());
609             out.println("  </worldserverbundle>");
610             out.println("</worldserverbundles>");
611         }
612         out.close();
613         QuickCheck.check(new File(targetDir, file + "." + extension));
614         if (!filter) {
615             countFile.println(file + "\t" + lineCount + "\t" + wordCount);
616         }
617         if (!isEnglish && !filter) {
618             writeReasons(reasonsToPaths, targetDir, file);
619         }
620     }
621 
writeJavaInfo(PrintWriter out3, String id, String path, String value)622     private static void writeJavaInfo(PrintWriter out3, String id, String path, String value) {
623         out3.println("              {\"" + id + "\",\"" + path.replace("\"", "\\\"") + "\",\""
624             + value.replace("\\", "\\\\").replace("\"", "\\\"") + "\"},");
625     }
626 
getExemplars(CLDRFile cldrFile)627     private static UnicodeSet getExemplars(CLDRFile cldrFile) {
628         UnicodeSet exemplars = cldrFile.getExemplarSet("", CLDRFile.WinningChoice.WINNING);
629         boolean isLatin = exemplars.containsSome(ASCII_LATIN);
630         exemplars.addAll(CheckExemplars.AlwaysOK);
631         UnicodeSet auxExemplars = cldrFile.getExemplarSet("auxiliary", CLDRFile.WinningChoice.WINNING);
632         if (auxExemplars != null) {
633             exemplars.addAll(auxExemplars);
634         }
635         if (!isLatin) {
636             exemplars.removeAll(LATIN);
637         }
638         exemplars.freeze();
639         return exemplars;
640     }
641 
642     static final Pattern COUNT_ATTRIBUTE = PatternCache.get("\\[@count=\"([^\"]*)\"]");
643     static final Pattern PLURAL_NUMBER = PatternCache.get("(decimal|number)Format");
644 
writeCountPathInfo(PrintWriter out, PrintWriter out3, String locale, Relation<String, R2<PathInfo, String>> countItems, boolean isEnglish, boolean filter)645     private static Row.R2<Integer, Integer> writeCountPathInfo(PrintWriter out, PrintWriter out3, String locale,
646         Relation<String, R2<PathInfo, String>> countItems, boolean isEnglish, boolean filter) {
647         Matcher m = COUNT_ATTRIBUTE.matcher("");
648         int wordCount = 0;
649         PluralInfo pluralInfo = supplementalDataInfo.getPlurals(locale);
650         int lineCount = 0;
651         Set<String> errorSet = new LinkedHashSet<String>();
652         for (Entry<String, Set<R2<PathInfo, String>>> entry : countItems.keyValuesSet()) {
653             String countLessPath = entry.getKey();
654             Map<String, String> fullValues = new TreeMap<String, String>();
655             PathInfo pathInfo = null;
656             String value = null;
657             for (R2<PathInfo, String> entry2 : entry.getValue()) {
658                 PathInfo pathInfoN = entry2.get0();
659                 m.reset(pathInfoN.getPath()).find();
660                 String count = m.group(1);
661                 if (count.equals("other")) {
662                     pathInfo = pathInfoN;
663                 }
664                 value = entry2.get1();
665                 fullValues.put(count, value);
666             }
667             if (pathInfo == null) {
668                 continue;
669             }
670             if (fullValues.size() < 2) {
671                 // if we don't have two count values, skip
672                 System.out.println(locale + "\tMust have 2 count values: " + entry.getKey());
673                 continue;
674             }
675             String fullPlurals = showPlurals(fullValues, locale, pathInfo, pluralInfo, isEnglish, errorSet);
676             if (fullPlurals == null) {
677                 System.out.println(locale + "\tCan't format plurals for: " + entry.getKey() + "\t" + errorSet);
678                 errors++;
679                 continue;
680             }
681 
682             out.println();
683             out.println("    <!--    "
684                 // + prettyPath.getPrettyPath(pathInfo.getPath(), false) + " ;    "
685                 + countLessPath + "    -->");
686             out.println("    <msg id='" + pathInfo.getStringId() + "' desc='" + pathInfo.description + "'");
687             out.println("     >" + fullPlurals + "</msg>");
688             // Use the last plural value in the loop because we only need it for example purposes.
689             writeJavaInfo(out3, pathInfo.getStringId(), countLessPath, value);
690             // if (!isEnglish || pathInfo.placeholderReplacements != null) {
691             // out.println("\t<!-- English original:\t" + pathInfo.getEnglishValue() + "\t-->");
692             // }
693             out.flush();
694             ++lineCount;
695             wordCount += pathInfo.wordCount * 3;
696             if (filter) {
697                 break;
698             }
699         }
700         return Row.of(lineCount, wordCount);
701     }
702 
703     static final String[] PLURAL_KEYS = { "=0", "=1", "zero", "one", "two", "few", "many", "other" };
704     static final String[] EXTRA_PLURAL_KEYS = { "0", "1", "zero", "one", "two", "few", "many" };
705 
showPlurals(Map<String, String> values, String locale, PathInfo pathInfo, PluralInfo pluralInfo, boolean isEnglish, Set<String> errorSet)706     private static String showPlurals(Map<String, String> values,
707         String locale, PathInfo pathInfo, PluralInfo pluralInfo,
708         boolean isEnglish, Set<String> errorSet) {
709         errorSet.clear();
710         /*
711          * Desired output for English XMB
712          * <msg desc=
713          * "[ICU Syntax] Plural forms for a number of hours. These are special messages: before translating, see cldr.org/translation/plurals."
714          * >
715          * {LENGTH, select,
716          * abbreviated {
717          * {NUMBER_OF_HOURS, plural,
718          * =0 {0 hrs}
719          * =1 {1 hr}
720          * zero {# hrs}
721          * one {# hrs}
722          * two {# hrs}
723          * few {# hrs}
724          * many {# hrs}
725          * other {# hrs}}}
726          * full {
727          * {NUMBER_OF_HOURS, plural,
728          * =0 {0 hours}
729          * =1 {1 hour}
730          * zero {# hours}
731          * one {# hours}
732          * two {# hours}
733          * few {# hours}
734          * many {# hours}
735          * other {# hours}}}}
736          * </msg>
737          *
738          * NOTE: For the WSB, the format has to match the following, WITHOUT LFs
739          *
740          * <msg id='1431840205484292448' desc='[ICU Syntax] who is viewing?​ This message requires special attention.
741          * Please follow the instructions here:
742          * https://sites.google.com/a/google.com/localization-info-site/Home/training/icusyntax'>
743          * <ph name='[PLURAL_NUM_USERS_OFFSET_1]' ex='Special placeholder used in [ICU Syntax] messages, see
744          * instructions page.'/>
745          * <ph name='[​=0]'/>No one else is viewing.
746          * <ph name='[=1]'/><ph name='USERNAME' ex='Bob'/> is viewing.
747          * <ph name='[=2]'/><ph name='USERNAME' ex='Bob'/> and one other are viewing.
748          * <ph name='[ZERO]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
749          * <ph name='[ONE]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
750          * <ph name='[TWO]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
751          * <ph name='[FEW]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
752          * <ph name='[MANY]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
753          * <ph name='[OTHER]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
754          * <ph name='[END_PLURAL]'/>
755          * </msg>
756          */
757         Matcher matcher = PLURAL_NUMBER.matcher(pathInfo.getPath());
758         String var = null;
759         if (matcher.find()) {
760             // Plural doesn't use placeholders so create a label.
761             var = matcher.group(1).toUpperCase() + "_NUMBER";
762         } else {
763             var = pathInfo.getFirstVariable();
764         }
765 
766         StringBuilder result = new StringBuilder();
767         if (isEnglish) {
768             result.append('{')
769                 // .append("PLURAL_")
770                 .append(var).append(",plural,");
771         } else {
772             result.append("<ph name='[PLURAL_").append(var).append("]'/>"); // ex='Special placeholder used in [ICU
773             // Syntax] messages, see instructions page.'
774         }
775         for (String key : PLURAL_KEYS) {
776             String value;
777             String coreKey = key.startsWith("=") ? key.substring(1, 2) : key;
778             value = values.get(coreKey);
779             if (value == null) {
780                 if (key.startsWith("=")) {
781                     String stringCount = key.substring(1);
782                     // handle both =x case, and the category
783                     int intCount = Integer.parseInt(stringCount);
784                     Count count = pluralInfo.getCount(intCount);
785                     value = values.get(count.toString());
786                     if (value == null) {
787                         errorSet.add("Bad key/value " + key + "='" + value + "' in " + values);
788                         return null;
789                     }
790                     value = value.replace("{0}", stringCount);
791                 } else {
792                     value = values.get("other");
793                     if (value == null) {
794                         errorSet.add("No 'other' value in " + values);
795                         return null;
796                     }
797                 }
798             }
799             String newValue = MessageFormat.format(MessageFormat.autoQuoteApostrophe(value),
800                 new Object[] { key.startsWith("=") ? key.substring(1, 2) : "#" });
801             PlaceholderType type = isEnglish ? PlaceholderType.BRACES : PlaceholderType.XML;
802             newValue = pathInfo.transformValue(newValue, type);
803             if (isEnglish) {
804                 result.append("\n            ").append(key).append(" {").append(newValue).append('}');
805             } else {
806                 String prefix = key.toUpperCase(Locale.ENGLISH);
807                 result.append("<!--\n        --><ph name='[").append(prefix).append("]'/>").append(newValue);
808             }
809         }
810         if (isEnglish) {
811             result.append('}');
812         } else {
813             result.append("<!--\n        --><ph name='[END_PLURAL]'/>");
814         }
815         return result.toString();
816     }
817 
writePathInfo(PrintWriter out, PathInfo pathInfo, String value, boolean isEnglish)818     private static void writePathInfo(PrintWriter out, PathInfo pathInfo, String value, boolean isEnglish) {
819         out.println();
820         out.println("    <!--    " + pathInfo.getPath() + "    -->");
821         out.println("    <msg id='" + pathInfo.getStringId() + "' desc='" + pathInfo.description + "'");
822         PlaceholderType type = isEnglish ? PlaceholderType.XML_EXAMPLE : PlaceholderType.XML;
823         String transformValue = pathInfo.transformValue(value, type);
824         out.println("     >" + transformValue + "</msg>");
825         value = TransliteratorUtilities.toHTML.transform(value);
826         if (!value.equals(transformValue) && (!isEnglish || pathInfo.placeholders != null)) {
827             out.println("    <!-- English original:    " + value + "    -->");
828         }
829         out.flush();
830     }
831 
writeReasons(Relation<String, String> reasonsToPaths, String targetDir, String filename)832     private static void writeReasons(Relation<String, String> reasonsToPaths, String targetDir, String filename)
833         throws IOException {
834         targetDir += "/skipped/";
835         filename += ".txt";
836         PrintWriter out = FileUtilities.openUTF8Writer(targetDir, filename);
837         out.println("# " + DATE);
838         for (Entry<String, Set<String>> reasonToSet : reasonsToPaths.keyValuesSet()) {
839             for (String path : reasonToSet.getValue()) {
840                 out.println(reasonToSet.getKey() + "    " + path);
841             }
842         }
843         out.close();
844     }
845 
846     static class PathInfo implements Comparable<PathInfo> {
847         private static final Pattern PLACEHOLDER = PatternCache.get("\\{(\\d)}");
848 
849         private final String path;
850         private final Long id;
851         private final String stringId;
852         private final String englishValue;
853         private final boolean changedEnglish;
854         private final Map<String, PlaceholderInfo> placeholders;
855         private final String description;
856         private final String starredPath;
857         private final int wordCount;
858 
859         private static final BreakIterator bi = BreakIterator.getWordInstance(ULocale.ENGLISH);
860         private static final UnicodeSet ALPHABETIC = new UnicodeSet("[:Alphabetic:]");
861 
PathInfo(String path, String englishValue, boolean changedEnglish, Map<String, PlaceholderInfo> placeholders, String description, String starredPath)862         public PathInfo(String path, String englishValue, boolean changedEnglish,
863             Map<String, PlaceholderInfo> placeholders,
864             String description, String starredPath) {
865             if (DEBUG_PATH != null && path.contains(DEBUG_PATH)) {
866                 int x = 0;
867             }
868             if (description == null) {
869                 path2errors.put(path, "missing description");
870             }
871             this.path = path;
872             long id = StringId.getId(path);
873             this.id = id;
874             stringId = String.valueOf(id);
875             this.englishValue = englishValue;
876             this.changedEnglish = changedEnglish;
877             this.placeholders = placeholders;
878             this.description = description == null ? null : description.intern();
879             this.starredPath = starredPath;
880             // count words
881             int tempCount = 0;
882             bi.setText(englishValue);
883             int start = bi.first();
884             for (int end = bi.next(); end != BreakIterator.DONE; start = end, end = bi.next()) {
885                 String word = englishValue.substring(start, end);
886                 if (ALPHABETIC.containsSome(word)) {
887                     ++tempCount;
888                 }
889             }
890             wordCount = tempCount == 0 ? 1 : tempCount;
891         }
892 
getFirstVariable()893         public String getFirstVariable() {
894             // ... name='FIRST_PART_OF_TEXT' ...
895             PlaceholderInfo info = placeholders.get("{0}");
896             if (info == null) {
897                 throw new IllegalArgumentException("Missing {0} for " + this);
898             }
899             return info.name;
900         }
901 
getPath()902         public String getPath() {
903             return path;
904         }
905 
getId()906         public Long getId() {
907             return id;
908         }
909 
getStringId()910         public String getStringId() {
911             return stringId;
912         }
913 
getEnglishValue()914         public String getEnglishValue() {
915             return englishValue;
916         }
917 
getDescription()918         public String getDescription() {
919             return description;
920         }
921 
getStarredPath()922         public String getStarredPath() {
923             return starredPath;
924         }
925 
getPlaceholderReplacementsToOriginal()926         public Map<String, String> getPlaceholderReplacementsToOriginal() {
927             if (placeholders == null) return null;
928             Map<String, String> placeholderOutput = new LinkedHashMap<String, String>();
929             for (String id : placeholders.keySet()) {
930                 placeholderOutput.put(id, getPlaceholderWithExample(id));
931             }
932             return placeholderOutput;
933         }
934 
getPlaceholderWithExample(String placeholder)935         private String getPlaceholderWithExample(String placeholder) {
936             PlaceholderInfo info = placeholders.get(placeholder);
937             // <ph name='x'><ex>xxx</ex>yyy</ph>
938             return "<ph name='" + info.name + "'><ex>" + info.example + "</ex>" + placeholder + "</ph>";
939         }
940 
941         // static DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
942 
transformValue(String value, PlaceholderType type)943         private String transformValue(String value, PlaceholderType type) {
944             value = TransliteratorUtilities.toHTML.transform(value);
945             if (placeholders == null) return value;
946 
947             String placeholderFormat = "";
948             switch (type) {
949             case BRACES:
950                 placeholderFormat = "'{'{0}'}'";
951                 break;
952             case XML:
953                 placeholderFormat = "<ph name=''[{0}]'' />";
954                 break;
955             case XML_EXAMPLE:
956                 placeholderFormat = "<ph name=''{0}''><ex>{1}</ex>'{'{2}'}'</ph>";
957                 break;
958             }
959             Matcher matcher = PLACEHOLDER.matcher(value);
960             StringBuffer buffer = new StringBuffer();
961             int start = 0;
962             while (matcher.find()) {
963                 buffer.append(value.substring(start, matcher.start()));
964                 PlaceholderInfo info = placeholders.get(matcher.group());
965                 buffer.append(MessageFormat.format(placeholderFormat,
966                     new Object[] { info.name, info.example, matcher.group(1) }));
967                 start = matcher.end();
968             }
969             buffer.append(value.substring(start));
970             return buffer.toString();
971         }
972 
replacePlaceholders(String value, String placeholderStart, String placeholderEnd)973         private String replacePlaceholders(String value, String placeholderStart, String placeholderEnd) {
974             Matcher matcher = PLACEHOLDER.matcher(value);
975             StringBuffer buffer = new StringBuffer();
976             int start = 0;
977             while (matcher.find()) {
978                 buffer.append(value.substring(start, matcher.start()));
979                 String name = placeholders.get(matcher.group()).name;
980                 buffer.append(placeholderStart).append(name).append(placeholderEnd);
981                 start = matcher.end();
982             }
983             buffer.append(value.substring(start));
984             return buffer.toString();
985         }
986 
987         @Override
compareTo(PathInfo arg0)988         public int compareTo(PathInfo arg0) {
989             return path.compareTo(arg0.path);
990         }
991 
toString()992         public String toString() {
993             return path;
994         }
995     }
996 
997     static class EnglishInfo implements Iterable<PathInfo> {
998 
999         final Map<String, PathInfo> pathToPathInfo = new TreeMap<String, PathInfo>();
1000         final Map<Long, PathInfo> longToPathInfo = new HashMap<Long, PathInfo>();
1001         final CLDRFile english;
1002 
getPathInfo(long hash)1003         PathInfo getPathInfo(long hash) {
1004             return longToPathInfo.get(hash);
1005         }
1006 
getName(String localeId)1007         public String getName(String localeId) {
1008             return english.getName(localeId);
1009         }
1010 
getPathInfo(String path)1011         PathInfo getPathInfo(String path) {
1012             return pathToPathInfo.get(path);
1013         }
1014 
EnglishInfo(String targetDir, CLDRFile english, CLDRFile root)1015         EnglishInfo(String targetDir, CLDRFile english, CLDRFile root) throws Exception {
1016 
1017             Map<String, String> oldPathValueMap = ReadXMB.load(CLDRPaths.BASE_DIRECTORY +
1018                 "/cldr-tools/org/unicode/cldr/unittest/data/xmb/",
1019                 "en.xml");
1020 
1021             PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance();
1022 
1023             this.english = english;
1024             // we don't want the fully resolved paths, but we do want the direct inheritance from root.
1025             //Status status = new Status();
1026             Map<String, List<Set<String>>> starredPaths = new TreeMap<String, List<Set<String>>>();
1027 
1028             HashSet<String> metazonePaths = new HashSet<String>();
1029             // ^//ldml/dates/timeZoneNames/metazone\[@type="([^"]*)"]
1030             for (MetazoneInfo metazoneInfo : MetazoneInfo.METAZONE_LIST) {
1031                 for (String item : metazoneInfo.getTypes()) {
1032                     String path = "//ldml/dates/timeZoneNames/metazone[@type=\"" + metazoneInfo.metazoneId + "\"]"
1033                         + item;
1034                     metazonePaths.add(path);
1035                 }
1036             }
1037 
1038             // TODO add short countries
1039             HashSet<String> extraLanguages = new HashSet<String>();
1040             // ldml/localeDisplayNames/languages/language[@type=".*"]
1041 
1042             for (String langId : PathDescription.EXTRA_LANGUAGES) {
1043                 String langPath = "//ldml/localeDisplayNames/languages/language[@type=\"" + langId + "\"]";
1044                 extraLanguages.add(langPath);
1045             }
1046 
1047             Set<String> sorted = Builder.with(new TreeSet<String>())
1048                 .addAll(english)
1049                 .removeAll(
1050                     new Transform<String, Boolean>() {
1051                         public Boolean transform(String source) {
1052                             return source.startsWith("//ldml/dates/timeZoneNames/metazone") ? Boolean.TRUE
1053                                 : Boolean.FALSE;
1054                         }
1055                     })
1056                 .get();
1057             sorted.addAll(metazonePaths);
1058             if (DEBUG) {
1059                 TreeSet<String> diffs = new TreeSet<String>(extraLanguages);
1060                 diffs.removeAll(sorted);
1061                 System.out.println(diffs);
1062             }
1063             sorted.addAll(extraLanguages);
1064 
1065             // add the extra Count items.
1066             Map<String, String> extras = new HashMap<String, String>();
1067             Matcher m = COUNT_ATTRIBUTE.matcher("");
1068 
1069             for (String path : sorted) {
1070                 if (path.contains("[@count=\"")) {
1071                     m.reset(path).find();
1072                     for (String key : EXTRA_PLURAL_KEYS) {
1073                         String path2 = path.substring(0, m.start(1)) + key + path.substring(m.end(1));
1074                         extras.put(path2, path);
1075                     }
1076                 }
1077                 // if (path.contains("ellipsis")) {
1078                 // System.out.println(path);
1079                 // }
1080             }
1081             sorted.addAll(extras.keySet());
1082 
1083             Relation<String, String> reasonsToPaths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
1084             Set<String> missingDescriptions = new TreeSet<String>();
1085             //Output<String[]> pathArguments = new Output<String[]>();
1086 
1087             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance("en");
1088             RegexLookup<Boolean> coverageAllow = new RegexLookup<Boolean>()
1089                 .add("^//ldml/localeDisplayNames/keys/key", true)
1090                 .add("^//ldml/localeDisplayNames/languages/language\\[@type=\"(jv|zxx|gsw|eo)\"]", true)
1091                 .add("^//ldml/localeDisplayNames/scripts/script", true)
1092                 .add("^//ldml/localeDisplayNames/types/type", true)
1093                 .add(
1094                     "^//ldml/dates/calendars/calendar\\[@type=\"[^\"]*\"]/dayPeriods/dayPeriodContext\\[@type=\"format\"]",
1095                     true);
1096 
1097             // TODO: for each count='other' path, add the other keywords and values
1098             PathDescription pathDescription = new PathDescription(GenerateXMB.supplementalDataInfo, english, extras,
1099                 starredPaths, PathDescription.ErrorHandling.SKIP);
1100 
1101             for (String path : sorted) {
1102                 if (DEBUG_PATH != null && path.contains(DEBUG_PATH)) {
1103                     int x = 0;
1104                 }
1105                 String value = english.getStringValue(path);
1106                 Level level = coverageLevel.getLevel(path);
1107                 if (value == null) {
1108                     value = "[EMPTY]";
1109                     addSkipReasons(reasonsToPaths, "empty-value", level, path, value);
1110                     continue;
1111                 }
1112                 if (pathMatcher != null
1113                     && !pathMatcher.reset(path).find()) {
1114                     addSkipReasons(reasonsToPaths, "path-parameter", level, path, value);
1115                     continue;
1116                 }
1117                 PathStatus pathStatus = shouldSkipPath(path, value);
1118                 if (pathStatus == PathStatus.SKIP) {
1119                     addSkipReasons(reasonsToPaths, "path-remove", level, path, value);
1120                     continue;
1121                 }
1122 
1123                 if (level.compareTo(Level.MODERN) > 0 && pathStatus != PathStatus.KEEP) {
1124                     if (coverageAllow.get(path) == null) { // HACK
1125                         addSkipReasons(reasonsToPaths, "coverage", level, path, value);
1126                         continue;
1127                     } else {
1128                         addSkipReasons(reasonsToPaths, "coverage*", level, path, value);
1129                         continue;
1130                         // System.out.println("Not skipping " + path);
1131                     }
1132                 }
1133 
1134                 String description = pathDescription.getDescription(path, value, level, null);
1135                 EnumSet<PathDescription.Status> descriptionStatus = pathDescription.getStatus();
1136                 if (!descriptionStatus.isEmpty()) {
1137                     addSkipReasons(reasonsToPaths, descriptionStatus.toString(), level, path, value);
1138                     description = null;
1139                 } else {
1140                     description = "[ICU CLDR] " + description;
1141                 }
1142 
1143                 String oldValue = oldPathValueMap.get(path);
1144                 boolean changedEnglish = !value.equals(oldValue);
1145                 PathInfo row = new PathInfo(path, value, changedEnglish, patternPlaceholders.get(path), description,
1146                     pathDescription.getStarredPathOutput());
1147 
1148                 if (description == PathDescription.MISSING_DESCRIPTION) {
1149                     missingDescriptions.add(pathDescription.getStarredPathOutput());
1150                 }
1151 
1152                 Long hash = row.getId();
1153                 if (longToPathInfo.containsKey(hash)) {
1154                     throw new IllegalArgumentException("Id collision for "
1155                         + path + " and " + longToPathInfo.get(hash).getPath());
1156                 }
1157                 pathToPathInfo.put(path, row);
1158                 longToPathInfo.put(hash, row);
1159                 if (value.contains("{0}") && patternPlaceholders.get(path) == null) {
1160                     System.out.println("ERROR, no placeholders for {0}...: " + path + " ; " + value);
1161                 }
1162             }
1163 
1164             PrintWriter out = FileUtilities.openUTF8Writer(targetDir + "/log/", "en-paths.txt");
1165             out.println("# " + DATE);
1166             for (Entry<String, List<Set<String>>> starredPath : starredPaths.entrySet()) {
1167                 out.println(starredPath.getKey() + "\t\t" + starredPath.getValue());
1168             }
1169             out.close();
1170             out = FileUtilities.openUTF8Writer(targetDir + "/log/", "en-missingDescriptions.txt");
1171             out.println("# " + DATE);
1172             for (String starredPath : missingDescriptions) {
1173                 // ^//ldml/dates/timeZoneNames/zone\[@type=".*"]/exemplarCity ; ROOT timezone ; The name of a city in:
1174                 // {0}. See cldr.org/xxxx.
1175                 out.println(toRegexPath(starredPath) + "\t;\tDESCRIPTION\t" + starredPaths.get(starredPath));
1176             }
1177             out.close();
1178             writeReasons(reasonsToPaths, targetDir, "en");
1179         }
1180 
toRegexPath(String starredPath)1181         private String toRegexPath(String starredPath) {
1182             String result = starredPath.replace("[", "\\[");
1183             result = result.replace("\".*\"", "\"([^\"]*)\"");
1184             return "^" + result;
1185         }
1186 
1187         @Override
iterator()1188         public Iterator<PathInfo> iterator() {
1189             return pathToPathInfo.values().iterator();
1190         }
1191     }
1192 
addSkipReasons(Relation<String, String> reasonsToPaths, String descriptionStatus, Level level, String path, String value)1193     static void addSkipReasons(Relation<String, String> reasonsToPaths, String descriptionStatus, Level level,
1194         String path, String value) {
1195         reasonsToPaths.put(descriptionStatus + "\t" + level, path + "\t" + value);
1196     }
1197 
1198     // Get Date-Time in milliseconds
getDateTimeinMillis(int year, int month, int date)1199     private static long getDateTimeinMillis(int year, int month, int date) {
1200         Calendar cal = Calendar.getInstance();
1201         cal.set(year, month, date);
1202         return cal.getTimeInMillis();
1203     }
1204 
1205     static final long START_TIME = getDateTimeinMillis(2000, 1, 0);
1206     static final long END_TIME = getDateTimeinMillis(2015, 1, 0);
1207     static final long DELTA_TIME = 15 * 60 * 1000;
1208     static final long MIN_DAYLIGHT_PERIOD = 90L * 24 * 60 * 60 * 1000;
1209 
1210     static final Set<String> HAS_DAYLIGHT;
1211     static {
1212         Set<String> hasDaylightTemp = new HashSet<String>();
1213         Date date = new Date();
1214         main: for (String zoneId : sc.getCanonicalTimeZones()) {
1215             TimeZone zone = TimeZone.getTimeZone(zoneId);
1216             for (long time = START_TIME + MIN_DAYLIGHT_PERIOD; time < END_TIME; time += MIN_DAYLIGHT_PERIOD) {
1217                 date.setTime(time);
1218                 if (zone.inDaylightTime(date)) {
1219                     hasDaylightTemp.add(zoneId);
1220                     if (false && !zone.useDaylightTime()) {
1221                         System.out.println(zoneId + "\tuseDaylightTime()==false, but \tinDaylightTime(/" + date
1222                             + "/)==true");
1223                     }
1224                     continue main;
1225                 }
1226             }
1227         }
1228         HAS_DAYLIGHT = Collections.unmodifiableSet(hasDaylightTemp);
1229     }
1230 
1231     static final Set<String> SINGULAR_COUNTRIES;
1232 
1233     private static PrintWriter countFile;
1234     static {
1235         // start with certain special-case countries
1236         Set<String> singularCountries = new HashSet<String>(
1237             Arrays.asList("CL EC ES NZ PT AQ FM GL KI UM PF".split(" ")));
1238 
1239         Map<String, Set<String>> countryToZoneSet = sc.getCountryToZoneSet();
1240 
1241         main: for (Entry<String, Set<String>> countryZones : countryToZoneSet.entrySet()) {
1242             String country = countryZones.getKey();
1243             if (country.equals("001")) {
1244                 continue;
1245             }
1246             Set<String> zones = countryZones.getValue();
1247             if (zones.size() == 1) {
1248                 singularCountries.add(country);
1249                 continue;
1250             }
1251             // make a set of sets
1252             List<TimeZone> initial = new ArrayList<TimeZone>();
1253             for (String s : zones) {
TimeZone.getTimeZone(s)1254                 initial.add(TimeZone.getTimeZone(s));
1255             }
1256             // now cycle through the times and see if we find any differences
1257             for (long time = START_TIME; time < END_TIME; time += DELTA_TIME) {
1258                 int firstOffset = Integer.MIN_VALUE;
1259                 for (TimeZone zone : initial) {
1260                     int offset = zone.getOffset(time);
1261                     if (firstOffset == Integer.MIN_VALUE) {
1262                         firstOffset = offset;
1263                     } else {
1264                         if (firstOffset != offset) {
1265                             if (false)
1266                                 System.out.println(country
1267                                     + " Difference at: " + new Date(time)
1268                                     + ", " + zone.getDisplayName() + " " + (offset / 1000.0 / 60 / 60)
1269                                     + ", " + initial.iterator().next().getDisplayName() + " "
1270                                     + (firstOffset / 1000.0 / 60 / 60));
1271                             continue main;
1272                         }
1273                     }
1274                 }
1275             }
1276             singularCountries.add(country);
1277         }
1278         SINGULAR_COUNTRIES = Collections.unmodifiableSet(singularCountries);
1279     }
1280 
1281     static final class MetazoneInfo {
1282 
1283         /**
1284          * @param metazoneId
1285          * @param singleCountry
1286          * @param hasDaylight
1287          * @param zonesForCountry
1288          * @param regionToZone
1289          */
MetazoneInfo(String metazoneId, String golden, boolean singleCountry, boolean hasDaylight)1290         public MetazoneInfo(String metazoneId, String golden, boolean singleCountry, boolean hasDaylight) {
1291             this.golden = golden;
1292             this.metazoneId = metazoneId;
1293             this.singleCountry = singleCountry;
1294             this.hasDaylight = hasDaylight;
1295         }
1296 
1297         static final String[] GENERIC = { "/long/generic",
1298             // "/short/generic"
1299         };
1300         static final String[] DAYLIGHT = { "/long/generic", "/long/standard", "/long/daylight",
1301             // "/short/generic", "/short/standard", "/short/daylight"
1302         };
1303 
getTypes()1304         public String[] getTypes() {
1305             return hasDaylight ? DAYLIGHT : GENERIC;
1306         }
1307 
1308         private final String metazoneId;
1309         private final String golden;
1310         private final boolean singleCountry;
1311         private final boolean hasDaylight;
1312 
1313         static final List<MetazoneInfo> METAZONE_LIST;
1314         static {
1315             // Set<String> zones = supplementalDataInfo.getCanonicalTimeZones();
1316             ArrayList<MetazoneInfo> result = new ArrayList<MetazoneInfo>();
1317 
1318             Map<String, String> zoneToCountry = sc.getZoneToCounty();
1319 
1320             Map<String, Map<String, String>> metazoneToRegionToZone = supplementalDataInfo.getMetazoneToRegionToZone();
1321             for (String metazone : supplementalDataInfo.getAllMetazones()) {
1322                 Map<String, String> regionToZone = metazoneToRegionToZone.get(metazone);
1323                 String golden = regionToZone.get("001");
1324                 if (golden == null) {
1325                     throw new IllegalArgumentException("Missing golden zone " + metazone + ", " + regionToZone);
1326                 }
1327                 String region = zoneToCountry.get(golden);
1328                 boolean isSingleCountry = SINGULAR_COUNTRIES.contains(region);
1329                 if (isSingleCountry) {
1330                     continue;
1331                 }
1332 
1333                 // TimeZone goldenZone = TimeZone.getTimeZone(golden);
1334 
1335                 Set<SupplementalDataInfo.MetaZoneRange> metazoneRanges = supplementalDataInfo.getMetaZoneRanges(golden);
1336                 if (metazoneRanges == null) {
1337                     throw new IllegalArgumentException("Missing golden zone " + metazone + ", " + regionToZone);
1338                 }
1339                 MetazoneInfo item = new MetazoneInfo(metazone, golden, isSingleCountry, HAS_DAYLIGHT.contains(golden));
1340                 result.add(item);
1341             }
1342             METAZONE_LIST = Collections.unmodifiableList(result);
1343         }
1344 
toString()1345         public String toString() {
1346             return sc.getZoneToCounty().get(golden)
1347                 + "\t" + metazoneId
1348                 + "\t" + golden
1349                 + "\t" + (singleCountry ? "singleCountry" : "")
1350                 + "\t" + (hasDaylight ? "useDaylightTime" : "")
1351             // + ": " + zonesForCountry
1352             // + "\t" + regionToZone;
1353             ;
1354         }
1355     }
1356 
showMetazoneInfo()1357     static void showMetazoneInfo() {
1358         System.out.println("\nZones in multiple metazones\n");
1359 
1360         for (String zone : sc.getCanonicalTimeZones()) {
1361             Set<SupplementalDataInfo.MetaZoneRange> metazoneRanges = supplementalDataInfo.getMetaZoneRanges(zone);
1362             if (metazoneRanges == null) {
1363                 System.out.println("Zone doesn't have metazone! " + zone);
1364                 continue;
1365             }
1366             if (metazoneRanges.size() != 1) {
1367                 for (MetaZoneRange range : metazoneRanges) {
1368                     System.out.println(zone + ":\t" + range);
1369                 }
1370                 System.out.println();
1371             }
1372         }
1373 
1374         System.out.println("\nMetazoneInfo\n");
1375 
1376         for (boolean singleCountry : new boolean[] { false }) {
1377             for (boolean hasDaylight : new boolean[] { false, true }) {
1378                 for (MetazoneInfo mzone : MetazoneInfo.METAZONE_LIST) {
1379                     if (mzone.hasDaylight != hasDaylight) continue;
1380                     if (mzone.singleCountry != singleCountry) continue;
1381                     System.out.println(mzone);
1382                 }
1383             }
1384         }
1385     }
1386 
displayWsb(String file, EnglishInfo info)1387     private static void displayWsb(String file, EnglishInfo info) {
1388         try {
1389             String[] parts = file.split("/");
1390             ULocale locale = new ULocale(parts[parts.length - 2]);
1391             FileInputStream fis = new FileInputStream(file);
1392             XMLReader xmlReader = XMLFileReader.createXMLReader(false);
1393             xmlReader.setErrorHandler(new MyErrorHandler());
1394             Map<String, String> data = new TreeMap<String, String>();
1395             xmlReader.setContentHandler(new MyContentHandler(locale, data, info));
1396             InputSource is = new InputSource(fis);
1397             is.setSystemId(file);
1398             xmlReader.parse(is);
1399             fis.close();
1400             for (Entry<String, String> entity : data.entrySet()) {
1401                 String path = entity.getKey();
1402                 String value = entity.getValue();
1403                 PathInfo pathInfo = info.getPathInfo(path);
1404                 System.out.println(value + "\t" + (pathInfo == null ? "?" : pathInfo.englishValue) + "\t" + path);
1405             }
1406         } catch (SAXParseException e) {
1407             System.out.println("\t" + "Can't read " + file);
1408             System.out.println("\t" + e.getClass() + "\t" + e.getMessage());
1409         } catch (SAXException e) {
1410             System.out.println("\t" + "Can't read " + file);
1411             System.out.println("\t" + e.getClass() + "\t" + e.getMessage());
1412         } catch (IOException e) {
1413             System.out.println("\t" + "Can't read " + file);
1414             System.out.println("\t" + e.getClass() + "\t" + e.getMessage());
1415         }
1416     }
1417 
1418     static class MyErrorHandler implements ErrorHandler {
error(SAXParseException exception)1419         public void error(SAXParseException exception) throws SAXException {
1420             System.out.println("\nerror: " + XMLFileReader.showSAX(exception));
1421             throw exception;
1422         }
1423 
fatalError(SAXParseException exception)1424         public void fatalError(SAXParseException exception) throws SAXException {
1425             System.out.println("\nfatalError: " + XMLFileReader.showSAX(exception));
1426             throw exception;
1427         }
1428 
warning(SAXParseException exception)1429         public void warning(SAXParseException exception) throws SAXException {
1430             System.out.println("\nwarning: " + XMLFileReader.showSAX(exception));
1431             throw exception;
1432         }
1433     }
1434 
1435     static class MyContentHandler implements ContentHandler {
1436         private static final boolean SHOW = false;
1437         private Map<String, String> myData;
1438         private EnglishInfo info;
1439         private PathInfo lastPathInfo;
1440         private StringBuilder currentText = new StringBuilder();
1441         private long lastId;
1442         private String lastPluralTag;
1443         private Map<String, String> pluralTags = new LinkedHashMap<String, String>();
1444         private Set<String> pluralKeywords;
1445 
MyContentHandler(ULocale locale, Map<String, String> data, EnglishInfo info)1446         public MyContentHandler(ULocale locale, Map<String, String> data, EnglishInfo info) {
1447             myData = data;
1448             this.info = info;
1449             PluralRules rules = PluralRules.forLocale(locale);
1450             pluralKeywords = Builder.with(new HashSet<String>()).addAll(rules.getKeywords()).add("0").add("1").freeze();
1451         }
1452 
1453         @Override
characters(char[] arg0, int arg1, int arg2)1454         public void characters(char[] arg0, int arg1, int arg2) throws SAXException {
1455             String chars = String.valueOf(arg0, arg1, arg2);
1456             // if (SHOW) System.out.println("\t characters\t" + chars);
1457             currentText.append(chars);
1458         }
1459 
1460         @Override
endDocument()1461         public void endDocument() throws SAXException {
1462             if (SHOW) System.out.println("\t endDocument\t");
1463         }
1464 
1465         @Override
endElement(String arg0, String arg1, String qName)1466         public void endElement(String arg0, String arg1, String qName) throws SAXException {
1467             // if (SHOW) System.out.println("\t endElement\t" + arg0 + "\t" + arg1 + "\t" + qName);
1468             if (qName.equals("msg")) {
1469                 String chars = currentText.toString().replace("\n", "").trim();
1470                 if (lastPathInfo == null) {
1471                     System.out.println("***Missing path info for " + lastId + "\t" + chars);
1472                     // myData.put("*** Missing path: " + lastId, chars);
1473                 } else if (pluralTags.size() != 0) {
1474                     for (Entry<String, String> pluralTagEntry : pluralTags.entrySet()) {
1475                         String pluralTag = pluralTagEntry.getKey();
1476                         String pluralTagValue = pluralTagEntry.getValue();
1477                         if (pluralKeywords.contains(pluralTag)) {
1478                             String fixedCount = lastPathInfo.path.replace("other", pluralTag);
1479                             myData.put(fixedCount, pluralTagValue);
1480                         } else {
1481                             System.out.println("***Skipping " + pluralTag + "\t" + pluralTagValue);
1482                         }
1483                     }
1484                     // myData.put(lastPathInfo.path, pluralTags.toString());
1485                     pluralTags.clear();
1486                 } else {
1487                     myData.put(lastPathInfo.path, chars);
1488                 }
1489                 currentText.setLength(0);
1490             }
1491         }
1492 
1493         @Override
endPrefixMapping(String arg0)1494         public void endPrefixMapping(String arg0) throws SAXException {
1495             if (SHOW) System.out.println("\t endPrefixMapping\t" + arg0);
1496         }
1497 
1498         @Override
ignorableWhitespace(char[] arg0, int arg1, int arg2)1499         public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException {
1500             if (SHOW) System.out.println("\t ignorableWhitespace\t" + String.valueOf(arg0, arg1, arg2));
1501         }
1502 
1503         @Override
processingInstruction(String arg0, String arg1)1504         public void processingInstruction(String arg0, String arg1) throws SAXException {
1505             if (SHOW) System.out.println("\t processingInstruction\t" + arg0 + "\t" + arg1);
1506         }
1507 
1508         @Override
setDocumentLocator(Locator arg0)1509         public void setDocumentLocator(Locator arg0) {
1510             if (SHOW) System.out.println("\t setDocumentLocator\t" + arg0);
1511         }
1512 
1513         @Override
skippedEntity(String arg0)1514         public void skippedEntity(String arg0) throws SAXException {
1515             if (SHOW) System.out.println("\t skippedEntity\t" + arg0);
1516         }
1517 
1518         @Override
startDocument()1519         public void startDocument() throws SAXException {
1520             if (SHOW) System.out.println("\t startDocument\t");
1521         }
1522 
1523         @Override
startElement(String arg0, String arg1, String qName, Attributes arg3)1524         public void startElement(String arg0, String arg1, String qName, Attributes arg3) throws SAXException {
1525             // if (SHOW) System.out.println("\t startElement\t" + arg0 + "\t" + arg1 + "\t" + qName + "\t" +
1526             // showAttributes(arg3));
1527             if (qName.equals("msg")) {
1528                 lastId = Long.parseLong(arg3.getValue("id"));
1529                 lastPathInfo = info.getPathInfo(lastId);
1530                 currentText.setLength(0);
1531             } else if (qName.equals("ph")) {
1532                 String name = arg3.getValue("name");
1533                 String original = lastPathInfo.getPlaceholderReplacementsToOriginal().get(name);
1534                 if (original != null) {
1535                     currentText.append(original);
1536                 } else if (name.startsWith("[PLURAL_")) {
1537                     pluralTags.clear();
1538                     lastPluralTag = "[START_PLURAL]";
1539                 } else {
1540                     String pluralTag = PLURAL_TAGS.get(name);
1541                     if (pluralTag != null) {
1542                         String chars = currentText.toString().replace("\n", "").trim();
1543                         pluralTags.put(lastPluralTag, chars);
1544                         currentText.setLength(0);
1545                         lastPluralTag = pluralTag;
1546                     } else {
1547                         System.out.println("***Can't find " + name + " in "
1548                             + lastPathInfo.getPlaceholderReplacementsToOriginal());
1549                     }
1550                 }
1551             }
1552         }
1553 
showAttributes(Attributes atts)1554         private String showAttributes(Attributes atts) {
1555             String result = "";
1556             for (int i = 0; i < atts.getLength(); ++i) {
1557                 result += atts.getQName(i) + "=\"" + atts.getValue(i) + "\"\t";
1558             }
1559             return result;
1560         }
1561 
1562         @Override
startPrefixMapping(String arg0, String arg1)1563         public void startPrefixMapping(String arg0, String arg1) throws SAXException {
1564             if (SHOW) System.out.println("\t startPrefixMapping\t" + arg0 + "\t" + arg1);
1565         }
1566     }
1567 
1568     static final Map<String, String> PLURAL_TAGS = Builder.with(new HashMap<String, String>())
1569         .put("[​=0]", "0")
1570         .put("[=1]", "1")
1571         .put("[ZERO]", PluralRules.KEYWORD_ZERO)
1572         .put("[ONE]", PluralRules.KEYWORD_ONE)
1573         .put("[TWO]", PluralRules.KEYWORD_TWO)
1574         .put("[FEW]", PluralRules.KEYWORD_FEW)
1575         .put("[MANY]", PluralRules.KEYWORD_MANY)
1576         .put("[OTHER]", PluralRules.KEYWORD_OTHER)
1577         .put("[END_PLURAL]", "")
1578         .freeze();
1579 
1580     private static String compareDirectory;
1581 }
1582