1 /*
2  **********************************************************************
3  * Copyright (c) 2004-2005, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.tool;
10 
11 import java.io.BufferedReader;
12 import java.io.File;
13 import java.io.IOException;
14 import java.io.PrintWriter;
15 import java.util.ArrayList;
16 import java.util.Arrays;
17 import java.util.Calendar;
18 import java.util.Collection;
19 import java.util.HashMap;
20 import java.util.Iterator;
21 import java.util.List;
22 import java.util.Map;
23 import java.util.Set;
24 import java.util.TreeMap;
25 import java.util.TreeSet;
26 import java.util.regex.Matcher;
27 
28 import org.unicode.cldr.draft.FileUtilities;
29 import org.unicode.cldr.util.CLDRFile;
30 import org.unicode.cldr.util.CLDRPaths;
31 import org.unicode.cldr.util.CldrUtility;
32 import org.unicode.cldr.util.Factory;
33 import org.unicode.cldr.util.LanguageTagParser;
34 import org.unicode.cldr.util.PatternCache;
35 import org.unicode.cldr.util.SimpleXMLSource;
36 import org.unicode.cldr.util.StandardCodes;
37 import org.unicode.cldr.util.TimezoneFormatter;
38 import org.unicode.cldr.util.TransliteratorUtilities;
39 import org.unicode.cldr.util.XPathParts;
40 import org.unicode.cldr.util.ZoneParser;
41 import org.unicode.cldr.util.ZoneParser.RuleLine;
42 import org.unicode.cldr.util.ZoneParser.ZoneLine;
43 
44 import com.ibm.icu.dev.tool.UOption;
45 import com.ibm.icu.impl.Utility;
46 import com.ibm.icu.lang.UCharacter;
47 import com.ibm.icu.text.Collator;
48 import com.ibm.icu.text.DecimalFormat;
49 import com.ibm.icu.text.NumberFormat;
50 import com.ibm.icu.text.RuleBasedCollator;
51 import com.ibm.icu.text.Transliterator;
52 import com.ibm.icu.text.UTF16;
53 import com.ibm.icu.util.TimeZone;
54 import com.ibm.icu.util.ULocale;
55 
56 /**
57  * Grab-bag set of tools that needs to be rationalized.
58  */
59 public class Misc {
60     static Factory cldrFactory;
61     static CLDRFile english;
62     static CLDRFile resolvedRoot;
63     // WARNING: this file needs a serious cleanup
64 
65     private static final int HELP1 = 0,
66         HELP2 = 1,
67         SOURCEDIR = 2,
68         DESTDIR = 3,
69         MATCH = 4,
70         TO_LOCALIZE = 5,
71         CURRENT = 6,
72         WINDOWS = 7,
73         OBSOLETES = 8,
74         ALIASES = 9,
75         INFO = 10,
76         ZONES = 11,
77         LANGUAGE_TAGS = 12,
78         FUNCTION = 13;
79 
80     private static final UOption[] options = {
81         UOption.HELP_H(),
82         UOption.HELP_QUESTION_MARK(),
83         UOption.SOURCEDIR().setDefault(CLDRPaths.COMMON_DIRECTORY),
84         UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "timezones/"),
85         UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
86         UOption.create("to_localize", 't', UOption.NO_ARG),
87         UOption.create("current", 'c', UOption.NO_ARG),
88         UOption.create("windows", 'w', UOption.NO_ARG),
89         UOption.create("obsoletes", 'o', UOption.NO_ARG),
90         UOption.create("aliases", 'a', UOption.NO_ARG),
91         UOption.create("info", 'i', UOption.NO_ARG),
92         UOption.create("zones", 'z', UOption.NO_ARG),
93         UOption.create("langauge-tags", 'l', UOption.NO_ARG),
94         UOption.create("function", 'f', UOption.REQUIRES_ARG),
95     };
96 
97     private static final String HELP_TEXT = "Use the following options" + XPathParts.NEWLINE
98         + "-h or -?\tfor this message" + XPathParts.NEWLINE
99         + "-" + options[SOURCEDIR].shortName + "\tsource directory. Default = "
100         + CldrUtility.getCanonicalName(CLDRPaths.MAIN_DIRECTORY) + XPathParts.NEWLINE
101         + "-" + options[DESTDIR].shortName + "\tdestination directory. Default = "
102         + CldrUtility.getCanonicalName(CLDRPaths.GEN_DIRECTORY + "main/") + XPathParts.NEWLINE
103         + "-m<regex>\tto restrict the locales to what matches <regex>" + XPathParts.NEWLINE
104         + "-t\tgenerates files that contain items missing localizations" + XPathParts.NEWLINE
105         + "-c\tgenerates missing timezone localizations" + XPathParts.NEWLINE
106         + "-w\tgenerates Windows timezone IDs" + XPathParts.NEWLINE
107         + "-o\tlist display codes that are obsolete" + XPathParts.NEWLINE
108         + "-o\tshows timezone aliases"
109         + "-i\tgets element/attribute/value information"
110         + "-z\tcollected timezone localizations";
111 
112     /**
113      * Picks options and executes. Use -h to see options.
114      *
115      * @throws ClassNotFoundException
116      */
main(String[] args)117     public static void main(String[] args) throws Exception {
118         try {
119 
120             showLanguageTagCount();
121 
122             // Locale someLocale = Locale.FRENCH;
123             // Date someDate = new Date();
124             // ULocale uloc;
125             //
126             // SimpleDateFormat dateTimeFormat = (SimpleDateFormat) DateFormat.getTimeInstance(DateFormat.SHORT,
127             // someLocale);
128             // String pattern = dateTimeFormat.toPattern();
129             // // you now have a pattern, which you can copy and modify
130             // System.out.println(dateTimeFormat.format(someDate)); // unmodified
131             // pattern += "'some other stuff'";
132             // dateTimeFormat.applyPattern(pattern);
133             // System.out.println(dateTimeFormat.format(someDate)); // modified
134             //
135             // if (true) return;
136             UOption.parseArgs(args, options);
137             if (options[HELP1].doesOccur || options[HELP1].doesOccur) {
138                 System.out.println(HELP_TEXT);
139                 CldrUtility.showMethods(Misc.class);
140                 return;
141             }
142             cldrFactory = Factory.make(options[SOURCEDIR].value + "/main/", options[MATCH].value);
143             english = cldrFactory.make("en", false);
144             resolvedRoot = cldrFactory.make("root", true);
145             if (options[MATCH].value.equals("group1")) options[MATCH].value = "(en|fr|de|it|es|pt|ja|ko|zh)";
146             Set<String> languages = new TreeSet<String>(cldrFactory.getAvailableLanguages());
147             // new Utility.MatcherFilter(options[MATCH].value).retainAll(languages);
148             // new Utility.MatcherFilter("(sh|zh_Hans|sr_Cyrl)").removeAll(languages);
149 
150             if (options[CURRENT].doesOccur) {
151                 printCurrentTimezoneLocalizations(languages);
152             }
153 
154             if (options[ZONES].doesOccur) {
155                 printAllZoneLocalizations();
156             }
157 
158             if (options[TO_LOCALIZE].doesOccur) {
159                 for (Iterator<String> it = languages.iterator(); it.hasNext();) {
160                     String language = it.next();
161                     printSupplementalData(language);
162                 }
163             }
164 
165             if (options[WINDOWS].doesOccur) {
166                 printWindowsZones();
167             }
168 
169             if (options[INFO].doesOccur) {
170                 PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.TMP_DIRECTORY + "logs/", "attributesAndValues.html");
171                 new GenerateAttributeList(cldrFactory).show(pw);
172                 pw.close();
173             }
174 
175             if (options[OBSOLETES].doesOccur) {
176                 listObsoletes();
177             }
178 
179             if (options[ALIASES].doesOccur) {
180                 printZoneAliases();
181             }
182 
183             // TODO add options for these later
184             // getCities();
185             //
186             if (options[FUNCTION].doesOccur) {
187                 String function = options[FUNCTION].value;
188 
189                 CldrUtility.callMethod(function, Misc.class);
190             }
191 
192             // getZoneData();
193 
194         } finally {
195             System.out.println("DONE");
196         }
197     }
198 
199     // public static void callMethod(String methodName, Class cls) {
200     // try {
201     // Method method;
202     // try {
203     // method = cls.getMethod(methodName, (Class[]) null);
204     // try {
205     // method.invoke(null, (Object[]) null);
206     // } catch (Exception e) {
207     // e.printStackTrace();
208     // }
209     // } catch (Exception e) {
210     // System.out.println("No such method: " + methodName);
211     // showMethods(cls);
212     // }
213     // } catch (ClassNotFoundException e) {
214     // e.printStackTrace();
215     // }
216     // }
217     //
218     // public static void showMethods(Class cls) throws ClassNotFoundException {
219     // System.out.println("Possible methods are: ");
220     // Method[] methods = cls.getMethods();
221     // Set<String> names = new TreeSet<String>();
222     // for (int i = 0; i < methods.length; ++i) {
223     // if (methods[i].getGenericParameterTypes().length != 0) continue;
224     // int mods = methods[i].getModifiers();
225     // if (!Modifier.isStatic(mods)) continue;
226     // String name = methods[i].getName();
227     // names.add(name);
228     // }
229     // for (Iterator it = names.iterator(); it.hasNext();) {
230     // System.out.println("\t-f" + it.next());
231     // }
232     // }
233 
234     /**
235      *
236      */
showLanguageTagCount()237     private static void showLanguageTagCount() {
238         StandardCodes sc = StandardCodes.make();
239         int languageCount = sc.getGoodAvailableCodes("language").size();
240         int scriptCount = sc.getGoodAvailableCodes("script").size();
241         int countryCount = sc.getGoodAvailableCodes("territory").size();
242         System.out.println("language subtags:\t" + languageCount);
243         System.out.println("script subtags:\t" + scriptCount);
244         System.out.println("region subtags:\t" + countryCount);
245 
246         // for (Iterator it = sc.getAvailableCodes("territory").iterator(); it.hasNext();) {
247         // System.out.print("fr-" + it.next() + ", ");
248         // }
249         System.out.println();
250     }
251 
listObsoletes()252     private static void listObsoletes() {
253         //java.util.TimeZone t;
254         StandardCodes sc = StandardCodes.make();
255         for (Iterator<String> typeIt = sc.getAvailableTypes().iterator(); typeIt.hasNext();) {
256             String type = typeIt.next();
257             System.out.println(type);
258             for (Iterator<String> codeIt = sc.getAvailableCodes(type).iterator(); codeIt.hasNext();) {
259                 String code = codeIt.next();
260                 List<String> list = sc.getFullData(type, code);
261                 if (list.size() < 3) continue;
262                 String replacementCode = list.get(2);
263                 if (replacementCode.length() == 0) continue;
264                 System.out.println(code + " => " + replacementCode + "; "
265                     + english.getName(type, replacementCode));
266             }
267         }
268     }
269 
270     // Windows info:
271     // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/e2k3/e2k3/_cdoex_time_zone_to_cdotimezoneid_map.asp
272     // ICU info: http://oss.software.ibm.com/cvs/icu/~checkout~/icu/source/common/putil.c
273     // search for "Mapping between Windows zone IDs"
274 
275     static Set<String> priorities = new TreeSet<String>(Arrays.asList(new String[] { "en", "zh_Hans",
276         "zh_Hant", "da", "nl", "fi", "fr", "de", "it",
277         "ja", "ko", "nb", "pt_BR", "ru", "es", "sv", "ar", "bg", "ca",
278         "hr", "cs", "et", "el", "he", "hi", "hu", "is", "id", "lv", "lt",
279         "pl", "ro", "sr", "sk", "sl", "tl", "th", "tr", "uk", "ur", "vi"
280         // // "en_GB",
281     }));
282 
printAllZoneLocalizations()283     private static void printAllZoneLocalizations() throws IOException {
284         StandardCodes sc = StandardCodes.make();
285         Set<String> zones = sc.getAvailableCodes("tzid");
286         Map<Integer, Map<String, Map<String, String>>> offset_zone_locale_name = new TreeMap<Integer, Map<String, Map<String, String>>>();
287         for (Iterator<String> it2 = priorities.iterator(); it2.hasNext();) {
288             String locale = it2.next();
289             System.out.println(locale);
290             try {
291                 TimezoneFormatter tzf = new TimezoneFormatter(cldrFactory, locale, true);
292                 for (Iterator<String> it = zones.iterator(); it.hasNext();) {
293                     String zone = it.next();
294                     TimeZone tzone = TimeZone.getTimeZone(zone);
295                     int stdOffset = tzone.getRawOffset();
296                     Integer standardOffset = new Integer(-stdOffset);
297                     String name = tzf.getFormattedZone(zone, "vvvv", false, stdOffset, false);
298                     String gmt = tzf.getFormattedZone(zone, "ZZZZ", false, stdOffset, false);
299                     String fullName = "(" + gmt + ") "
300                         + (zone.startsWith("Etc") ? "" : name);
301 
302                     Map<String, Map<String, String>> zone_locale_name = offset_zone_locale_name.get(standardOffset);
303                     if (zone_locale_name == null)
304                         offset_zone_locale_name.put(standardOffset, zone_locale_name = new TreeMap<String, Map<String, String>>());
305 
306                     Map<String, String> locale_name = zone_locale_name.get(zone);
307                     if (locale_name == null) zone_locale_name.put(zone, locale_name = new TreeMap<String, String>());
308 
309                     locale_name.put(locale, fullName);
310                 }
311             } catch (RuntimeException e) {
312                 e.printStackTrace();
313             }
314         }
315         PrintWriter out = FileUtilities.openUTF8Writer("c:/", "zone_localizations.html");
316         out.println("<html><head>");
317         out.println("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
318         out.println("<title>Zone Localizations</title>");
319         out.println("<style>");
320         out.println("th,td { text-align: left; vertical-align: top }");
321         out.println("th { background-color: gray }");
322         out.println("</style>");
323         out.println("</head>");
324         out.println("<body>");
325         out.println("<table cellspacing='0' cellpadding='2' border='1'>");
326         out.println("<tr><th></th><th>No</th><th>Country</th><th>Offset(s)</th>");
327 
328         // do the header
329         for (Iterator<String> it2 = priorities.iterator(); it2.hasNext();) {
330             String locale = it2.next();
331             String englishLocaleName = english.getName(locale);
332             out.println("<th>" + locale + " (" + englishLocaleName + ")" + "</th>");
333         }
334 
335         // now the rows
336         out.println("</tr>");
337         Map<String, String> zone_country = sc.getZoneToCounty();
338         int count = 0;
339         for (Iterator<Integer> it = offset_zone_locale_name.keySet().iterator(); it.hasNext();) {
340             Integer offset = it.next();
341             // out.println(offset);
342             Map<String, Map<String, String>> zone_locale_name = offset_zone_locale_name.get(offset);
343             for (Iterator<String> it2 = zone_locale_name.keySet().iterator(); it2.hasNext();) {
344                 String zone = it2.next();
345                 out.println("<tr>");
346                 out.println("<th>" + (++count) + "</th>");
347                 out.println("<th>" + zone + "</th>");
348                 String country = zone_country.get(zone);
349                 String countryName = english.getName(CLDRFile.TERRITORY_NAME, country);
350                 out.println("<td>" + country + " (" + countryName + ")" + "</td>");
351                 TimeZone tzone = TimeZone.getTimeZone(zone);
352                 out.println("<td>" + offsetString(tzone) + "</td>");
353                 Map<String, String> locale_name = zone_locale_name.get(zone);
354                 for (Iterator<String> it3 = priorities.iterator(); it3.hasNext();) {
355                     String locale = it3.next();
356                     String name = locale_name.get(locale);
357                     out.println("<td>");
358                     if (name == null) {
359                         out.println("&nbsp;");
360                     } else {
361                         out.println(TransliteratorUtilities.toHTML.transliterate(name));
362                     }
363                     out.println("</td>");
364                 }
365                 out.println("</tr>");
366             }
367         }
368         out.println("</table>");
369         out.println(CldrUtility.ANALYTICS);
370         out.println("</body></html>");
371         out.close();
372     }
373 
374     /**
375      * @param tzone
376      * @return
377      */
offsetString(TimeZone tzone)378     private static String offsetString(TimeZone tzone) {
379         // TODO Auto-generated method stub
380         int janOffset = tzone.getOffset(JAN152006);
381         int juneOffset = tzone.getOffset(JUNE152006);
382         String result = hours.format(janOffset / 3600000.0);
383         if (juneOffset != janOffset) result += " / " + hours.format(juneOffset / 3600000.0);
384         return result;
385     }
386 
387     // Get Date-Time in milliseconds
getDateTimeinMillis(int year, int month, int date, int hourOfDay, int minute, int second)388     private static long getDateTimeinMillis(int year, int month, int date, int hourOfDay, int minute, int second) {
389         Calendar cal = Calendar.getInstance();
390         cal.set(year, month, date, hourOfDay, minute, second);
391         return cal.getTimeInMillis();
392     }
393 
394     static long JAN152006 = getDateTimeinMillis(2006, 1, 15, 0, 0, 0);
395     static long JUNE152006 = getDateTimeinMillis(2006, 6, 15, 0, 0, 0);
396     static NumberFormat hours = new DecimalFormat("0.##");
397 
398     /**
399      * @param languages
400      * @throws IOException
401      */
printCurrentTimezoneLocalizations(Set<String> languages)402     private static void printCurrentTimezoneLocalizations(Set<String> languages) throws IOException {
403         Set<String> rtlLanguages = new TreeSet<String>();
404         for (Iterator<String> it = languages.iterator(); it.hasNext();) {
405             String language = it.next();
406             CLDRFile desiredLocaleFile = cldrFactory.make(language, true);
407             String orientation = desiredLocaleFile.getStringValue("//ldml/layout/orientation/characterOrder");
408             boolean rtl = orientation == null ? false : orientation.equals("right-to-left");
409             PrintWriter log = FileUtilities.openUTF8Writer(options[DESTDIR].value + "", language + "_timezones.html");
410             log.println("<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">");
411             log.println("<style type=\"text/css\"><!--");
412             log.println("td { text-align: center; vertical-align:top }");
413             log.println("th { vertical-align:top }");
414             if (rtl) {
415                 rtlLanguages.add(language);
416                 log.println("body { direction:rtl }");
417                 log.println(".ID {background-color: silver; text-align:right;}");
418                 log.println(".T {text-align:right; color: green}");
419             } else {
420                 log.println(".ID {background-color: silver; text-align:left;}");
421                 log.println(".T {text-align:left; color: green}");
422             }
423             log.println(".I {color: blue}");
424             log.println(".A {color: red}");
425             log.println("--></style>");
426             log.println("<title>Time Zone Localizations for " + language + "</title><head><body>");
427             log.println("<table border=\"1\" cellpadding=\"0\" cellspacing=\"0\" style=\"border-collapse: collapse\">");
428             printCurrentTimezoneLocalizations(log, language);
429             // printSupplementalData(group1[i]);
430             log.println("</table>");
431             log.println(CldrUtility.ANALYTICS);
432             log.println("</body></html>");
433             log.close();
434         }
435         System.out.println("RTL languages: " + rtlLanguages);
436     }
437 
printZoneAliases()438     static void printZoneAliases() {
439         RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH);
440         col.setNumericCollation(true);
441         StandardCodes sc = StandardCodes.make();
442         Map<String, String> zone_countries = sc.getZoneToCounty();
443         Map<String, String> old_new = sc.getZoneLinkold_new();
444         Map<String, Set<String>> new_old = new TreeMap<String, Set<String>>(col);
445         Map<String, Set<String>> country_zones = new TreeMap<String, Set<String>>(col);
446         for (Iterator<String> it = zone_countries.keySet().iterator(); it.hasNext();) {
447             String zone = it.next();
448             new_old.put(zone, new TreeSet<String>(col));
449             String country = zone_countries.get(zone);
450             String name = english.getName("territory", country) + " (" + country + ")";
451             Set<String> oldSet = country_zones.get(name);
452             if (oldSet == null) country_zones.put(name, oldSet = new TreeSet<String>(col));
453             oldSet.add(zone);
454         }
455         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext();) {
456             String oldOne = it.next();
457             String newOne = old_new.get(oldOne);
458             Set<String> oldSet = new_old.get(newOne);
459             if (false && oldSet == null) {
460                 System.out.println("Warning: missing zone: " + newOne);
461                 new_old.put(newOne, oldSet = new TreeSet(col));
462             }
463             oldSet.add(oldOne);
464         }
465         for (Iterator<String> it3 = country_zones.keySet().iterator(); it3.hasNext();) {
466             String country = it3.next();
467             System.out.println(country);
468             Set<String> zones = country_zones.get(country);
469             for (Iterator<String> it = zones.iterator(); it.hasNext();) {
470                 String newOne = it.next();
471                 System.out.println("    tzid:\t" + newOne);
472                 Set<String> oldSet = new_old.get(newOne);
473                 for (Iterator<String> it2 = oldSet.iterator(); it2.hasNext();) {
474                     String oldOne = it2.next();
475                     System.out.println("        alias:\t" + oldOne);
476                 }
477             }
478         }
479     }
480 
printWindowsZones()481     static void printWindowsZones() {
482         System.out.println("\t<timezoneData>");
483         System.out.println("\t\t<mapTimezones type=\"windows\">");
484         for (int i = 0; i < ZONE_MAP.length; i += 3) {
485             System.out.println("\t\t\t<mapZone other=\"" + ZONE_MAP[i + 1]
486                 + "\" type=\"" + ZONE_MAP[i]
487                 + "\"/> <!-- " + ZONE_MAP[i + 2] + "-->");
488         }
489         System.out.println("\t\t</mapTimezones>");
490         System.out.println("\t</timezoneData>");
491 
492         for (int i = 0; i < ZONE_MAP.length; i += 3) {
493             int p1 = ZONE_MAP[i + 2].indexOf('(');
494             int p2 = ZONE_MAP[i + 2].indexOf(')');
495             System.out.println(
496                 ZONE_MAP[i]
497                     + "\t" + ZONE_MAP[i + 1]
498                     + "\t" + ZONE_MAP[i + 2].substring(0, p1)
499                     + "\t" + ZONE_MAP[i + 2].substring(p1 + 1, p2)
500                     + "\t" + ZONE_MAP[i + 2].substring(p2 + 1));
501         }
502 
503     }
504 
505     static String[] ZONE_MAP = {
506         "Etc/GMT+12", "Dateline", "S (GMT-12:00) International Date Line West",
507 
508         "Pacific/Apia", "Samoa", "S (GMT-11:00) Midway Island, Samoa",
509 
510         "Pacific/Honolulu", "Hawaiian", "S (GMT-10:00) Hawaii",
511 
512         "America/Anchorage", "Alaskan", "D (GMT-09:00) Alaska",
513 
514         "America/Los_Angeles", "Pacific", "D (GMT-08:00) Pacific Time (US & Canada); Tijuana",
515 
516         "America/Phoenix", "US Mountain", "S (GMT-07:00) Arizona",
517         "America/Denver", "Mountain", "D (GMT-07:00) Mountain Time (US & Canada)",
518         "America/Chihuahua", "Mexico Standard Time 2", "D (GMT-07:00) Chihuahua, La Paz, Mazatlan",
519 
520         "America/Managua", "Central America", "S (GMT-06:00) Central America",
521         "America/Regina", "Canada Central", "S (GMT-06:00) Saskatchewan",
522         "America/Mexico_City", "Mexico", "D (GMT-06:00) Guadalajara, Mexico City, Monterrey",
523         "America/Chicago", "Central", "D (GMT-06:00) Central Time (US & Canada)",
524 
525         "America/Indianapolis", "US Eastern", "S (GMT-05:00) Indiana (East)",
526         "America/Bogota", "SA Pacific", "S (GMT-05:00) Bogota, Lima, Quito",
527         "America/New_York", "Eastern", "D (GMT-05:00) Eastern Time (US & Canada)",
528 
529         "America/Caracas", "SA Western", "S (GMT-04:00) Caracas, La Paz",
530         "America/Santiago", "Pacific SA", "D (GMT-04:00) Santiago",
531         "America/Halifax", "Atlantic", "D (GMT-04:00) Atlantic Time (Canada)",
532 
533         "America/St_Johns", "Newfoundland", "D (GMT-03:30) Newfoundland",
534 
535         "America/Buenos_Aires", "SA Eastern", "S (GMT-03:00) Buenos Aires, Georgetown",
536         "America/Godthab", "Greenland", "D (GMT-03:00) Greenland",
537         "America/Sao_Paulo", "E. South America", "D (GMT-03:00) Brasilia",
538 
539         "America/Noronha", "Mid-Atlantic", "D (GMT-02:00) Mid-Atlantic",
540 
541         "Atlantic/Cape_Verde", "Cape Verde", "S (GMT-01:00) Cape Verde Is.",
542         "Atlantic/Azores", "Azores", "D (GMT-01:00) Azores",
543 
544         "Africa/Casablanca", "Greenwich", "S (GMT) Casablanca, Monrovia",
545         "Europe/London", "GMT", "D (GMT) Greenwich Mean Time : Dublin, Edinburgh, Lisbon, London",
546 
547         "Africa/Lagos", "W. Central Africa", "S (GMT+01:00) West Central Africa",
548         "Europe/Berlin", "W. Europe", "D (GMT+01:00) Amsterdam, Berlin, Bern, Rome, Stockholm, Vienna",
549         "Europe/Paris", "Romance", "D (GMT+01:00) Brussels, Copenhagen, Madrid, Paris",
550         "Europe/Sarajevo", "Central European", "D (GMT+01:00) Sarajevo, Skopje, Warsaw, Zagreb",
551         "Europe/Belgrade", "Central Europe", "D (GMT+01:00) Belgrade, Bratislava, Budapest, Ljubljana, Prague",
552 
553         "Africa/Johannesburg", "South Africa", "S (GMT+02:00) Harare, Pretoria",
554         "Asia/Jerusalem", "Israel", "S (GMT+02:00) Jerusalem",
555         "Europe/Istanbul", "GTB", "D (GMT+02:00) Athens, Istanbul, Minsk",
556         "Europe/Helsinki", "FLE", "D (GMT+02:00) Helsinki, Kyiv, Riga, Sofia, Tallinn, Vilnius",
557         "Africa/Cairo", "Egypt", "D (GMT+02:00) Cairo",
558         "Europe/Bucharest", "E. Europe", "D (GMT+02:00) Bucharest",
559 
560         "Africa/Nairobi", "E. Africa", "S (GMT+03:00) Nairobi",
561         "Asia/Riyadh", "Arab", "S (GMT+03:00) Kuwait, Riyadh",
562         "Europe/Moscow", "Russian", "D (GMT+03:00) Moscow, St. Petersburg, Volgograd",
563         "Asia/Baghdad", "Arabic", "D (GMT+03:00) Baghdad",
564 
565         "Asia/Tehran", "Iran", "D (GMT+03:30) Tehran",
566 
567         "Asia/Muscat", "Arabian", "S (GMT+04:00) Abu Dhabi, Muscat",
568         "Asia/Tbilisi", "Caucasus", "D (GMT+04:00) Baku, Tbilisi, Yerevan",
569 
570         "Asia/Kabul", "Afghanistan", "S (GMT+04:30) Kabul",
571 
572         "Asia/Karachi", "West Asia", "S (GMT+05:00) Islamabad, Karachi, Tashkent",
573         "Asia/Yekaterinburg", "Ekaterinburg", "D (GMT+05:00) Ekaterinburg",
574 
575         "Asia/Calcutta", "India", "S (GMT+05:30) Chennai, Kolkata, Mumbai, New Delhi",
576 
577         "Asia/Katmandu", "Nepal", "S (GMT+05:45) Kathmandu",
578 
579         "Asia/Colombo", "Sri Lanka", "S (GMT+06:00) Sri Jayawardenepura",
580         "Asia/Dhaka", "Central Asia", "S (GMT+06:00) Astana, Dhaka",
581         "Asia/Novosibirsk", "N. Central Asia", "D (GMT+06:00) Almaty, Novosibirsk",
582 
583         "Asia/Rangoon", "Myanmar", "S (GMT+06:30) Rangoon",
584 
585         "Asia/Bangkok", "SE Asia", "S (GMT+07:00) Bangkok, Hanoi, Jakarta",
586         "Asia/Krasnoyarsk", "North Asia", "D (GMT+07:00) Krasnoyarsk",
587 
588         "Australia/Perth", "W. Australia", "S (GMT+08:00) Perth",
589         "Asia/Taipei", "Taipei", "S (GMT+08:00) Taipei",
590         "Asia/Singapore", "Singapore", "S (GMT+08:00) Kuala Lumpur, Singapore",
591         "Asia/Hong_Kong", "China", "S (GMT+08:00) Beijing, Chongqing, Hong Kong, Urumqi",
592         "Asia/Irkutsk", "North Asia East", "D (GMT+08:00) Irkutsk, Ulaan Bataar",
593 
594         "Asia/Tokyo", "Tokyo", "S (GMT+09:00) Osaka, Sapporo, Tokyo",
595         "Asia/Seoul", "Korea", "S (GMT+09:00) Seoul",
596         "Asia/Yakutsk", "Yakutsk", "D (GMT+09:00) Yakutsk",
597 
598         "Australia/Darwin", "AUS Central", "S (GMT+09:30) Darwin",
599         "Australia/Adelaide", "Cen. Australia", "D (GMT+09:30) Adelaide",
600 
601         "Pacific/Guam", "West Pacific", "S (GMT+10:00) Guam, Port Moresby",
602         "Australia/Brisbane", "E. Australia", "S (GMT+10:00) Brisbane",
603         "Asia/Vladivostok", "Vladivostok", "D (GMT+10:00) Vladivostok",
604         "Australia/Hobart", "Tasmania", "D (GMT+10:00) Hobart",
605         "Australia/Sydney", "AUS Eastern", "D (GMT+10:00) Canberra, Melbourne, Sydney",
606 
607         "Asia/Magadan", "Central Pacific", "S (GMT+11:00) Magadan, Solomon Is., New Caledonia",
608 
609         "Pacific/Fiji", "Fiji", "S (GMT+12:00) Fiji, Kamchatka, Marshall Is.",
610         "Pacific/Auckland", "New Zealand", "D (GMT+12:00) Auckland, Wellington",
611 
612         "Pacific/Tongatapu", "Tonga", "S (GMT+13:00) Nuku'alofa",
613     };
614 
615     /**
616      * @throws IOException
617      *
618      */
printCurrentTimezoneLocalizations(PrintWriter log, String locale)619     private static void printCurrentTimezoneLocalizations(PrintWriter log, String locale) throws IOException {
620         StandardCodes sc = StandardCodes.make();
621 
622         Map<String, Set<String>> linkNew_Old = sc.getZoneLinkNew_OldSet();
623         TimezoneFormatter tzf = new TimezoneFormatter(cldrFactory, locale, true);
624         /*
625          * <hourFormat>+HHmm;-HHmm</hourFormat>
626          * <hoursFormat>{0}/{1}</hoursFormat>
627          * <gmtFormat>GMT{0}</gmtFormat>
628          * <regionFormat>{0}</regionFormat>
629          * <fallbackFormat>{0} ({1})</fallbackFormat>
630          * <abbreviationFallback type="standard"/>
631          * <preferenceOrdering type="America/Mexico_City America/Chihuahua America/New_York">
632          */
633         RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(new ULocale(locale));
634         col.setNumericCollation(true);
635         Set<String> orderedAliases = new TreeSet<String>(col);
636 
637         Map<String, String> zone_countries = StandardCodes.make().getZoneToCounty();
638         //Map<String, Set<String>> countries_zoneSet = StandardCodes.make().getCountryToZoneSet();
639 
640         Map<String, String> reordered = new TreeMap<String, String>(col);
641         CLDRFile desiredLocaleFile = cldrFactory.make(locale, true);
642 
643         for (Iterator<String> it = zone_countries.keySet().iterator(); it.hasNext();) {
644             String zoneID = it.next();
645             String country = zone_countries.get(zoneID);
646             String countryName = desiredLocaleFile.getName(CLDRFile.TERRITORY_NAME, country);
647             if (countryName == null) countryName = UTF16.valueOf(0x10FFFD) + country;
648             reordered.put(countryName + "0" + zoneID, zoneID);
649         }
650 
651         String[] field = new String[TimezoneFormatter.TYPE_LIMIT];
652         boolean first = true;
653         int count = 0;
654         for (Iterator<String> it = reordered.keySet().iterator(); it.hasNext();) {
655             String key = it.next();
656             String zoneID = reordered.get(key);
657             String country = zone_countries.get(zoneID);
658             String countryName = desiredLocaleFile.getName(CLDRFile.TERRITORY_NAME, country);
659             if (countryName == null) countryName = country;
660             log.println("<tr><th class='ID' colspan=\"4\"><table><tr><th class='I'>"
661                 + (++count) + "</th><th class='T'>" + TransliteratorUtilities.toHTML.transliterate(countryName)
662                 + "</th><th class='I'>\u200E" + TransliteratorUtilities.toHTML.transliterate(zoneID));
663             Set<String> s = linkNew_Old.get(zoneID);
664             if (s != null) {
665                 log.println("\u200E</th><td class='A'>\u200E");
666                 orderedAliases.clear();
667                 orderedAliases.addAll(s);
668                 boolean first2 = true;
669                 for (Iterator<String> it9 = s.iterator(); it9.hasNext();) {
670                     String alias = it9.next();
671                     if (first2)
672                         first2 = false;
673                     else
674                         log.println("; ");
675                     log.print(TransliteratorUtilities.toHTML.transliterate(alias));
676                 }
677             }
678             log.print("\u200E</td></tr></table></th></tr>");
679             if (first) {
680                 first = false;
681                 log.println(
682                     "<tr><th width=\"25%\">&nbsp;</th><th width=\"25%\">generic</th><th width=\"25%\">standard</th><th width=\"25%\">daylight</th></tr>");
683             } else {
684                 log.println("<tr><th>&nbsp;</th><th>generic</th><th>standard</th><th>daylight</th></tr>");
685             }
686             for (int i = 0; i < TimezoneFormatter.LENGTH_LIMIT; ++i) {
687                 log.println("<tr><th>" + TimezoneFormatter.LENGTH.get(i) + "</th>");
688                 for (int j = 0; j < TimezoneFormatter.TYPE_LIMIT; ++j) {
689                     field[j] = TransliteratorUtilities.toHTML.transliterate(tzf
690                         .getFormattedZone(zoneID, i, j, 0, false));
691                 }
692                 if (field[0].equals(field[1]) && field[1].equals(field[2])) {
693                     log.println("<td colspan=\"3\">" + field[0] + "</td>");
694                 } else {
695                     for (int j = 0; j < TimezoneFormatter.TYPE_LIMIT; ++j) {
696                         log.println("<td>" + field[j] + "</td>");
697                     }
698                 }
699                 log.println("</tr>");
700             }
701         }
702     }
703 
showOrderedTimezones()704     void showOrderedTimezones() {
705         StandardCodes.make();
706     }
707 
708     static CldrUtility.VariableReplacer langTag = new CldrUtility.VariableReplacer()
709         .add("$alpha", "[a-zA-Z]")
710         .add("$digit", "[0-9]")
711         .add("$alphanum", "[a-zA-Z0-9]")
712         .add("$x", "[xX]")
713         .add("$grandfathered", "en-GB-oed" +
714             "|i-(?:ami|bnn|default|enochian|hak|klingon|lux|mingo|navajo|pwn|tao|tay|tsu)" +
715             "|no-(?:bok|nyn)" +
716             "|sgn-(?:BE-(?:fr|nl)|CH-de)" +
717             "|zh-(?:gan|min(?:-nan)?|wuu|yue)")
718         .add("$lang", "$alpha{2,8}")
719         .add("$extlang", "(?:-$alpha{3})") // *3("-" 3ALPHA)
720         .add("$script", "(?:-$alpha{4})") // ["-" script], 4ALPHA
721         .add("$region", "(?:-$alpha{2}|-$digit{3})") // ["-" region], 2ALPHA / 3DIGIT
722         .add("$variant", "(?:-$digit$alphanum{3}|-$alphanum{5,8})") // *("-" variant), 5*8alphanum / (DIGIT 3alphanum)
723         .add("$extension", "(?:-[$alphanum&&[^xX]](?:-$alphanum{2,8})+)")
724         .add("$privateuse", "(?:$x(?:-$alphanum{1,8})+)")
725         .add("$privateuse2", "(?:-$privateuse)");
726     static String langTagPattern = langTag.replace(
727         "($lang)"
728             + CldrUtility.LINE_SEPARATOR + "\t($extlang{0,3})"
729             + CldrUtility.LINE_SEPARATOR + "\t($script?)"
730             + CldrUtility.LINE_SEPARATOR + "\t($region?)"
731             + CldrUtility.LINE_SEPARATOR + "\t($variant*)"
732             + CldrUtility.LINE_SEPARATOR + "\t($extension*)"
733             + CldrUtility.LINE_SEPARATOR + "\t($privateuse2?)"
734             + CldrUtility.LINE_SEPARATOR + "|($grandfathered)"
735             + CldrUtility.LINE_SEPARATOR + "|($privateuse)");
736     static String cleanedLangTagPattern = langTagPattern.replaceAll("[\\r\\t\\n\\s]", "");
737     static Matcher regexLanguageTagOld = PatternCache.get(cleanedLangTagPattern).matcher("");
738 
getZoneData()739     public static void getZoneData() {
740         StandardCodes sc = StandardCodes.make();
741         System.out.println("Links: Old->New");
742         Map<String, String> m = sc.getZoneLinkold_new();
743         int count = 0;
744         for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) {
745             String key = it.next();
746             String newOne = m.get(key);
747             System.out.println(++count + "\t" + key + " => " + newOne);
748         }
749         count = 0;
750         System.out.println();
751         System.out.println("Links: Old->New, not final");
752         Set<String> oldIDs = m.keySet();
753         for (Iterator<String> it = oldIDs.iterator(); it.hasNext();) {
754             ++count;
755             String key = it.next();
756             String newOne = m.get(key);
757             String further = m.get(newOne);
758             if (further == null) continue;
759             while (true) {
760                 String temp = m.get(further);
761                 if (temp == null) break;
762                 further = temp;
763             }
764             System.out.println(count + "\t" + key + " => " + newOne + " # NOT FINAL => " + further);
765         }
766 
767         Map<String, List<ZoneLine>> m2 = sc.getZone_rules();
768         System.out.println();
769         System.out.println("Zones with old IDs");
770         for (Iterator<String> it = m2.keySet().iterator(); it.hasNext();) {
771             String key = it.next();
772             if (oldIDs.contains(key)) System.out.println(key);
773         }
774 
775         Set<String> modernIDs = sc.getZoneData().keySet();
776         System.out.println();
777         System.out.println("Zones without countries");
778         TreeSet<String> temp = new TreeSet<String>(m2.keySet());
779         temp.removeAll(modernIDs);
780         System.out.println(temp);
781 
782         Set<String> countries = sc.getAvailableCodes("territory");
783         System.out.println();
784         System.out.println("Countries without zones");
785         temp.clear();
786         temp.addAll(countries);
787         temp.removeAll(sc.getOld3166());
788         for (Iterator<List<String>> it = sc.getZoneData().values().iterator(); it.hasNext();) {
789             List<String> x = it.next();
790             List<String> list = x;
791             temp.remove(list.get(2));
792         }
793         for (Iterator<String> it = temp.iterator(); it.hasNext();) {
794             String item = it.next();
795             if (UCharacter.isDigit(item.charAt(0))) it.remove();
796         }
797         System.out.println(temp);
798 
799         System.out.println();
800         System.out.println("Zone->RulesIDs");
801         m2 = sc.getZone_rules();
802         for (Iterator<String> it = m2.keySet().iterator(); it.hasNext();) {
803             String key = it.next();
804             System.out.println(key + " => " + XPathParts.NEWLINE + "\t"
805                 + getSeparated(m2.get(key), XPathParts.NEWLINE + "\t"));
806         }
807 
808         System.out.println();
809         System.out.println("RulesID->Rules");
810         m2 = sc.getZoneRuleID_rules();
811         for (Iterator<String> it = m2.keySet().iterator(); it.hasNext();) {
812             String key = it.next();
813             System.out.println(key + " => " + XPathParts.NEWLINE + "\t"
814                 + getSeparated(m2.get(key), XPathParts.NEWLINE + "\t"));
815         }
816 
817         System.out.println();
818         System.out.println("ZoneIDs->Abbreviations");
819 
820         // now get all the abbreviations
821         // Map rule_abbreviations = getAbbreviations(m);
822 
823         Map<String, List<RuleLine>> ruleID_Rules = sc.getZoneRuleID_rules();
824         Map<String, Set<String>> abb_zones = new TreeMap<String, Set<String>>();
825         m2 = sc.getZone_rules();
826         for (Iterator<String> it = m2.keySet().iterator(); it.hasNext();) {
827             String key = it.next();
828             Set<String> abbreviations = new TreeSet<String>();
829             // rule_abbreviations.put(key, abbreviations);
830             ZoneLine lastZoneLine = null;
831 
832             for (Iterator<ZoneLine> it2 = m2.get(key).iterator(); it2.hasNext();) {
833                 ZoneLine zoneLine = it2.next();
834                 //int thisYear = zoneLine.untilYear;
835                 String format = zoneLine.format;
836                 if (format.indexOf('/') >= 0) {
837                     List<String> abb = Arrays.asList(format.split("/"));
838                     for (Iterator<String> it3 = abb.iterator(); it3.hasNext();) {
839                         add(abbreviations, format.replaceAll("%s", it3.next()), key, lastZoneLine, zoneLine);
840                     }
841                 } else if (format.indexOf('%') >= 0) {
842                     Set<String> abb = getAbbreviations(ruleID_Rules, lastZoneLine, zoneLine);
843                     if (abb.size() == 0) {
844                         System.out.println("??? Didn't find %s values for " + format + " under " + key
845                             + ";" + CldrUtility.LINE_SEPARATOR + "\tLast:" + lastZoneLine + ";"
846                             + CldrUtility.LINE_SEPARATOR + "\tCurrent: " + zoneLine);
847                         abb = getAbbreviations(ruleID_Rules, lastZoneLine, zoneLine);
848                     }
849 
850                     if (abb == null) {
851                         System.out.println("??? " + zoneLine.rulesSave);
852                         add(abbreviations, format, key, lastZoneLine, zoneLine);
853                     } else {
854                         for (Iterator<String> it3 = abb.iterator(); it3.hasNext();) {
855                             add(abbreviations, format.replaceAll("%s", it3.next()), key, lastZoneLine,
856                                 zoneLine);
857                         }
858                     }
859                 } else {
860                     add(abbreviations, format, key, lastZoneLine, zoneLine);
861                 }
862                 lastZoneLine = zoneLine;
863             }
864             for (Iterator<String> it3 = abbreviations.iterator(); it3.hasNext();) {
865                 String abb = it3.next();
866                 if (abb.equals("")) {
867                     it3.remove();
868                     continue;
869                 }
870                 Set<String> zones = abb_zones.get(abb);
871                 if (zones == null) abb_zones.put(abb, zones = new TreeSet<String>());
872                 zones.add(key);
873             }
874             System.out.println(key + " => " + XPathParts.NEWLINE + "\t"
875                 + getSeparated(abbreviations, XPathParts.NEWLINE + "\t"));
876         }
877 
878         System.out.println();
879         System.out.println("Abbreviations->ZoneIDs");
880         for (Iterator<String> it = abb_zones.keySet().iterator(); it.hasNext();) {
881             String key = it.next();
882             System.out.println(key + " => " + XPathParts.NEWLINE + "\t"
883                 + getSeparated(abb_zones.get(key), XPathParts.NEWLINE + "\t"));
884         }
885 
886         System.out.println("Types: " + ZoneParser.RuleLine.types);
887         System.out.println("Saves: " + ZoneParser.RuleLine.days);
888         System.out.println("untilDays: " + ZoneParser.ZoneLine.untilDays);
889         System.out.println("rulesSaves: " + ZoneParser.ZoneLine.rulesSaves);
890 
891     }
892 
add(Set<String> abbreviations, String format, String zone, ZoneLine lastZoneLine, ZoneLine zoneLine)893     private static void add(Set<String> abbreviations, String format, String zone, ZoneLine lastZoneLine, ZoneLine zoneLine) {
894         if (format.length() < 3) {
895             System.out.println("??? Format too short: '" + format + "' under " + zone
896                 + ";" + CldrUtility.LINE_SEPARATOR + "\tLast:" + lastZoneLine + ";" + CldrUtility.LINE_SEPARATOR
897                 + "\tCurrent: " + zoneLine);
898             return;
899         }
900         abbreviations.add(format);
901     }
902 
getAbbreviations(Map<String, List<RuleLine>> rules, ZoneLine lastZoneLine, ZoneLine zoneLine)903     private static Set<String> getAbbreviations(Map<String, List<RuleLine>> rules, ZoneLine lastZoneLine, ZoneLine zoneLine) {
904         Set<String> result = new TreeSet<String>();
905         List<RuleLine> ruleList = rules.get(zoneLine.rulesSave);
906         for (Iterator<RuleLine> it2 = ruleList.iterator(); it2.hasNext();) {
907             RuleLine ruleLine = it2.next();
908             int from = ruleLine.fromYear;
909             int to = ruleLine.toYear;
910             // they overlap?
911             if (zoneLine.untilYear >= from && (lastZoneLine == null || lastZoneLine.untilYear <= to)) {
912                 result.add(ruleLine.letter == null ? "?!?" : ruleLine.letter);
913             }
914         }
915         return result;
916     }
917 
918     @SuppressWarnings("rawtypes")
getSeparated(Collection c, String separator)919     private static String getSeparated(Collection c, String separator) {
920         StringBuffer result = new StringBuffer();
921         boolean first = true;
922         for (Iterator it = c.iterator(); it.hasNext();) {
923             if (first)
924                 first = false;
925             else
926                 result.append(separator);
927             result.append(it.next());
928         }
929         return result.toString();
930     }
931 
getCities()932     private static void getCities() throws IOException {
933         StandardCodes sc = StandardCodes.make();
934         Set<String> territories = sc.getAvailableCodes("territory");
935         Map<String, List<String>> zoneData = sc.getZoneData();
936 
937         Set<String> s = new TreeSet<String>(sc.getTZIDComparator());
938         s.addAll(sc.getZoneData().keySet());
939         int counter = 0;
940         for (Iterator<String> it = s.iterator(); it.hasNext();) {
941             String key = it.next();
942             System.out.println(++counter + "\t" + key + "\t" + zoneData.get(key));
943         }
944         Set<String> missing2 = new TreeSet<String>(sc.getZoneData().keySet());
945         missing2.removeAll(sc.getZoneToCounty().keySet());
946         System.out.println(missing2);
947         missing2.clear();
948         missing2.addAll(sc.getZoneToCounty().keySet());
949         missing2.removeAll(sc.getZoneData().keySet());
950         System.out.println(missing2);
951         if (true) return;
952 
953         Map<String, Map<String, String>> country_city_data = new TreeMap<String, Map<String, String>>();
954         Map<String, String> territoryName_code = new HashMap<String, String>();
955         Map<String, String> zone_to_country = sc.getZoneToCounty();
956         for (Iterator<String> it = territories.iterator(); it.hasNext();) {
957             String code = it.next();
958             territoryName_code.put(sc.getData("territory", code), code);
959         }
960         Transliterator t = Transliterator.getInstance(
961             "hex-any/html; [\\u0022] remove");
962         Transliterator t2 = Transliterator.getInstance(
963             "NFD; [:m:]Remove; NFC");
964         BufferedReader br = FileUtilities.openUTF8Reader("c:/data/", "cities.txt");
965         counter = 0;
966         Set<String> missing = new TreeSet<String>();
967         while (true) {
968             String line = br.readLine();
969             if (line == null) break;
970             if (line.startsWith("place name")) continue;
971             List<String> list = CldrUtility.splitList(line, '\t', true);
972             String place = list.get(0);
973             place = t.transliterate(place);
974             String place2 = t2.transliterate(place);
975             String country = list.get(1);
976             String population = list.get(2);
977             String latitude = list.get(3);
978             String longitude = list.get(4);
979             String code = territoryName_code.get(country);
980             if (code == null) missing.add(country);
981             Map<String, String> city_data = country_city_data.get(code);
982             if (city_data == null) {
983                 city_data = new TreeMap<String, String>();
984                 country_city_data.put(code, city_data);
985             }
986             city_data.put(place2,
987                 place + "_" + population + "_" + latitude + "_" + longitude);
988         }
989         if (false) for (Iterator<String> it = missing.iterator(); it.hasNext();) {
990             System.out.println("\"" + it.next() + "\", \"XXX\",");
991         }
992 
993         for (Iterator<String> it = country_city_data.keySet().iterator(); it.hasNext();) {
994             String key = it.next();
995             Map<String, String> city_data = country_city_data.get(key);
996             for (Iterator<String> it2 = city_data.keySet().iterator(); it2.hasNext();) {
997                 String key2 = it2.next();
998                 String value = city_data.get(key2);
999                 System.out.println(++counter + "\t" + key + "\t"
1000                     + key2 + "\t" + value);
1001             }
1002         }
1003         for (Iterator<String> it = zone_to_country.keySet().iterator(); it.hasNext();) {
1004             String zone = it.next();
1005             if (zone.startsWith("Etc")) continue;
1006             String country = zone_to_country.get(zone);
1007             Map<String, String> city_data = country_city_data.get(country);
1008             if (city_data == null) {
1009                 System.out.println("Missing country: " + zone + "\t" + country);
1010                 continue;
1011             }
1012 
1013             List<String> pieces = CldrUtility.splitList(zone, '/', true);
1014             String city = pieces.get(pieces.size() - 1);
1015             city = city.replace('_', ' ');
1016             String data = city_data.get(city);
1017             if (data != null) continue;
1018             System.out.println();
1019             System.out.println("\"" + city + "\", \"XXX\" // "
1020                 + zone + ",\t" + sc.getData("territory", country));
1021             System.out.println(city_data);
1022         }
1023     }
1024 
1025     // static PrintWriter log;
1026 
printSupplementalData(String locale)1027     private static void printSupplementalData(String locale) throws IOException {
1028 
1029         PrintWriter log = null; // FileUtilities.openUTF8Writer(options[DESTDIR].value + "", locale +
1030         // "_timezonelist.xml");
1031         CLDRFile desiredLocaleFile = cldrFactory.make(locale, true).cloneAsThawed();
1032         desiredLocaleFile.removeDuplicates(resolvedRoot, false, null, null);
1033 
1034         CLDRFile english = cldrFactory.make("en", true);
1035         Collator col = Collator.getInstance(new ULocale(locale));
1036         CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false);
1037         XPathParts parts = new XPathParts(null, null);
1038         for (Iterator<String> it = supp.iterator(); it.hasNext();) {
1039             String path = it.next();
1040             parts.set(supp.getFullXPath(path));
1041             Map<String, String> m = parts.findAttributes("language");
1042             if (m == null) continue;
1043             if (false) System.out.println("Type: " + m.get("type")
1044                 + "\tscripts: " + m.get("scripts")
1045                 + "\tterritories: " + m.get("territories"));
1046         }
1047 
1048         // territories
1049         Map<String, Collection<String>> groups = new TreeMap<String, Collection<String>>();
1050         for (Iterator<String> it = supp.iterator(); it.hasNext();) {
1051             String path = it.next();
1052             parts.set(supp.getFullXPath(path));
1053             Map<String, String> m = parts.findAttributes("territoryContainment");
1054             if (m == null) continue;
1055             Map<String, String> attributes = parts.getAttributes(2);
1056             String type = attributes.get("type");
1057             Collection<String> contents = CldrUtility
1058                 .splitList(attributes.get("contains"), ' ', true, new ArrayList<String>());
1059             groups.put(type, contents);
1060             if (false) {
1061                 System.out.print("\t\t<group type=\"" + fixNumericKey(type)
1062                     + "\" contains=\"");
1063                 boolean first = true;
1064                 for (Iterator<String> it2 = contents.iterator(); it2.hasNext();) {
1065                     if (first)
1066                         first = false;
1067                     else
1068                         System.out.print(" ");
1069                     System.out.print(fixNumericKey(it2.next()));
1070                 }
1071                 System.out.println("\"> <!--" + desiredLocaleFile.getName(CLDRFile.TERRITORY_NAME, type) + " -->");
1072             }
1073         }
1074         Set<String> seen = new TreeSet<String>();
1075         printTimezonesToLocalize(log, desiredLocaleFile, groups, seen, col, false, english);
1076         StandardCodes sc = StandardCodes.make();
1077         Set<String> codes = sc.getAvailableCodes("territory");
1078         Set<String> missing = new TreeSet<String>(codes);
1079         missing.removeAll(seen);
1080         if (false) {
1081             if (missing.size() != 0) System.out.println("Missing: ");
1082             for (Iterator<String> it = missing.iterator(); it.hasNext();) {
1083                 String key = it.next();
1084                 // String name = english.getName(CLDRFile.TERRITORY_NAME, key, false);
1085                 System.out.println("\t" + key + "\t" + sc.getFullData("territory", key));
1086             }
1087         }
1088         if (log != null) log.close();
1089     }
1090 
1091     // <ldml><localeDisplayNames><territories>
1092     // <territory type="001" draft="true">World</territory>
1093     // <ldml><dates><timeZoneNames>
1094     // <zone type="America/Anchorage" draft="true"><exemplarCity draft="true">Anchorage</exemplarCity></zone>
1095 
printTimezonesToLocalize(PrintWriter log, CLDRFile localization, Map<String, Collection<String>> groups, Set<String> seen, Collator col, boolean showCode, CLDRFile english)1096     private static void printTimezonesToLocalize(PrintWriter log, CLDRFile localization, Map<String, Collection<String>> groups, Set<String> seen,
1097         Collator col, boolean showCode,
1098         CLDRFile english) throws IOException {
1099         @SuppressWarnings("unchecked")
1100         Set<String>[] missing = new Set[2];
1101         missing[0] = new TreeSet<String>();
1102         missing[1] = new TreeSet<String>(StandardCodes.make().getTZIDComparator());
1103         printWorldTimezoneCategorization(log, localization, groups, "001", 0, seen, col, showCode, zones_countrySet(),
1104             missing);
1105         if (missing[0].size() == 0 && missing[1].size() == 0) return;
1106         PrintWriter log2 = FileUtilities.openUTF8Writer(options[DESTDIR].value + "", localization.getLocaleID() + "_to_localize.xml");
1107         log2.println("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
1108         log2.println("<!DOCTYPE ldml SYSTEM \"../../common/dtd/ldml.dtd\">");
1109         log2.println("<ldml><identity><version number=\"" + CLDRFile.GEN_VERSION
1110             + "\"/><generation date=\"2005-01-01\"/><language type=\""
1111             + TransliteratorUtilities.toXML.transliterate(localization.getLocaleID()) + "\"/></identity>");
1112         log2.println("<!-- The following are strings that are not found in the locale (currently), " +
1113             "but need valid translations for localizing timezones. -->");
1114         if (missing[0].size() != 0) {
1115             log2.println("<localeDisplayNames><territories>");
1116             for (Iterator<String> it = missing[0].iterator(); it.hasNext();) {
1117                 String key = it.next();
1118                 log2.println("\t<territory type=\""
1119                     + key
1120                     + "\" draft=\"unconfirmed\">"
1121                     +
1122                     TransliteratorUtilities.toXML.transliterate("TODO " + english.getName(CLDRFile.TERRITORY_NAME, key))
1123                     + "</territory>");
1124             }
1125             log2.println("</territories></localeDisplayNames>");
1126         }
1127         if (true) {
1128             String lastCountry = "";
1129             log2.println("<dates><timeZoneNames>");
1130             log2.println("\t<hourFormat>TODO +HHmm;-HHmm</hourFormat>");
1131             log2.println("\t<hoursFormat>TODO {0}/{1}</hoursFormat>");
1132             log2.println("\t<gmtFormat>TODO GMT{0}</gmtFormat>");
1133             log2.println("\t<regionFormat>TODO {0}</regionFormat>");
1134             log2.println("\t<fallbackFormat>TODO {0} ({1})</fallbackFormat>");
1135             for (Iterator<String> it = missing[1].iterator(); it.hasNext();) {
1136                 String key = it.next();
1137                 List<String> data = StandardCodes.make().getZoneData().get(key);
1138                 String countryCode = data.get(2);
1139                 String country = english.getName(CLDRFile.TERRITORY_NAME, countryCode);
1140                 if (!country.equals(lastCountry)) {
1141                     lastCountry = country;
1142                     log2.println("\t<!-- " + country + "-->");
1143                 }
1144                 log2.println("\t<zone type=\"" + key + "\"><exemplarCity draft=\"unconfirmed\">"
1145                     + TransliteratorUtilities.toXML.transliterate("TODO " + getName(english, key, null))
1146                     + "</exemplarCity></zone>");
1147             }
1148             log2.println("</timeZoneNames></dates>");
1149         }
1150         log2.println("</ldml>");
1151         log2.close();
1152     }
1153 
1154     static String[] levelNames = { "world", "continent", "subcontinent", "country", "subzone" };
1155 
printWorldTimezoneCategorization(PrintWriter log, CLDRFile localization, Map<String, Collection<String>> groups, String key, int indent, Set<String> seen, Collator col, boolean showCode, Map<String, Set<String>> zone_countrySet, Set<String>[] missing)1156     private static void printWorldTimezoneCategorization(PrintWriter log, CLDRFile localization,
1157         Map<String, Collection<String>> groups, String key, int indent, Set<String> seen, Collator col, boolean showCode,
1158         Map<String, Set<String>> zone_countrySet, Set<String>[] missing) {
1159         // String fixedKey = fixNumericKey(key);
1160         seen.add(key);
1161         String name = getName(localization, key, missing);
1162         Collection<String> s = groups.get(key);
1163         String element = levelNames[indent];
1164 
1165         if (log != null)
1166             log.print(Utility.repeat("\t", indent) + "<" + element + " n=\"" + name
1167                 + (showCode ? " (" + key + ")" : "") + "\"");
1168         if (s == null) {
1169             s = zone_countrySet.get(key);
1170             if (s == null || s.size() == 1)
1171                 s = null; // skip singletons
1172         }
1173         if (s == null) {
1174             if (log != null) log.println("/>");
1175             return;
1176         }
1177 
1178         if (log != null) log.println(">");
1179         Map<String, String> reorder = new TreeMap<String, String>(col);
1180         for (Iterator<String> it = s.iterator(); it.hasNext();) {
1181             key = it.next();
1182             String value = getName(localization, key, missing);
1183             if (value == null) {
1184                 System.out.println("Missing value for: " + key);
1185                 value = key;
1186             }
1187             reorder.put(value, key);
1188         }
1189         for (Iterator<String> it = reorder.keySet().iterator(); it.hasNext();) {
1190             key = it.next();
1191             String value = reorder.get(key);
1192             printWorldTimezoneCategorization(log, localization, groups, value, indent + 1, seen, col, showCode,
1193                 zone_countrySet, missing);
1194         }
1195         if (log != null) log.println(Utility.repeat("\t", indent) + "</" + element + ">");
1196     }
1197 
1198     /**
1199      * @param localization
1200      * @param key
1201      * @param missing
1202      *            TODO
1203      * @return
1204      */
getName(CLDRFile localization, String key, Set<String>[] missing)1205     private static String getName(CLDRFile localization, String key, Set<String>[] missing) {
1206         String name;
1207         int pos = key.lastIndexOf('/');
1208         if (pos >= 0) {
1209             String v = localization.getStringValue("//ldml/dates/timeZoneNames/zone[@type=\"" + key
1210                 + "\"]/exemplarCity");
1211             if (v != null)
1212                 name = v;
1213             else {
1214 
1215                 // <ldml><dates><timezoneNames>
1216                 // <zone type="America/Anchorage">
1217                 // <exemplarCity draft="true">Anchorage</exemplarCity>
1218                 if (missing != null) missing[1].add(key);
1219                 name = key.substring(pos + 1);
1220                 name = name.replace('_', ' ');
1221             }
1222         } else {
1223             name = localization.getName(CLDRFile.TERRITORY_NAME, key);
1224             if (name == null) {
1225                 if (missing != null) missing[0].add(key);
1226                 name = key;
1227             }
1228         }
1229         return name;
1230     }
1231 
zones_countrySet()1232     static Map<String, Set<String>> zones_countrySet() {
1233         Map<String, List<String>> m = StandardCodes.make().getZoneData();
1234         Map<String, Set<String>> result = new TreeMap<String, Set<String>>();
1235         for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) {
1236             String tzid = it.next();
1237             List<String> list = m.get(tzid);
1238             String country = list.get(2);
1239             Set<String> zones = result.get(country);
1240             if (zones == null) {
1241                 zones = new TreeSet<String>();
1242                 result.put(country, zones);
1243             }
1244             zones.add(tzid);
1245         }
1246         return result;
1247     }
1248 
1249     /**
1250      * @param key
1251      * @return
1252      */
fixNumericKey(String key)1253     private static String fixNumericKey(String key) {
1254         // String key = (String) it.next();
1255         char c = key.charAt(0);
1256         if (c > '9') return key;
1257         String fixedKey = key.length() == 3 ? key : key.length() == 2 ? "0" + key : "00" + key;
1258         return fixedKey;
1259     }
1260 
compareLists()1261     private static void compareLists() throws IOException {
1262         BufferedReader in = FileUtilities.openUTF8Reader("", "language_list.txt");
1263         Factory cldrFactory = Factory.make(options[SOURCEDIR].value + "main\\", ".*");
1264         // CLDRKey.main(new String[]{"-mde.*"});
1265         Set<String> locales = cldrFactory.getAvailable();
1266         Set<String> cldr = new TreeSet<String>();
1267         LanguageTagParser parser = new LanguageTagParser();
1268         for (Iterator<String> it = locales.iterator(); it.hasNext();) {
1269             // if doesn't have exactly one _, skip
1270             String locale = it.next();
1271             parser.set(locale);
1272             if (parser.getScript().length() == 0 && parser.getRegion().length() == 0) continue;
1273             if (parser.getVariants().size() > 0) continue;
1274             cldr.add(locale.replace('_', '-'));
1275         }
1276 
1277         Set<String> tex = new TreeSet<String>();
1278         while (true) {
1279             String line = in.readLine();
1280             if (line == null) break;
1281             line = line.trim();
1282             if (line.length() == 0) continue;
1283             int p = line.indexOf(' ');
1284             tex.add(line.substring(0, p));
1285         }
1286         Set<String> inCldrButNotTex = new TreeSet<String>(cldr);
1287         inCldrButNotTex.removeAll(tex);
1288         System.out.println(" inCldrButNotTex " + inCldrButNotTex);
1289         Set<String> inTexButNotCLDR = new TreeSet<String>(tex);
1290         inTexButNotCLDR.removeAll(cldr);
1291         System.out.println(" inTexButNotCLDR " + inTexButNotCLDR);
1292     }
1293 
generateTransliterators()1294     void generateTransliterators() throws IOException {
1295         File translitSource = new File("C:\\ICU\\icu\\source\\data\\translit");
1296         Matcher m = PatternCache.get(".*Hebrew.*").matcher("");
1297         File[] list = translitSource.listFiles();
1298         for (int i = 0; i < list.length; ++i) {
1299             File file = list[i];
1300             String name = file.getName();
1301             if (!m.reset(name).matches()) continue;
1302             if (!name.endsWith(".txt")) continue;
1303             String fixedName = name.substring(name.length() - 4);
1304             BufferedReader input = FileUtilities.openUTF8Reader(file.getParent() + File.pathSeparator, name);
1305             SimpleXMLSource source = new SimpleXMLSource(null);
1306             CLDRFile outFile = new CLDRFile(source);
1307             int count = 0;
1308             while (true) {
1309                 String line = input.readLine();
1310                 //String contents = line;
1311                 if (line == null) break;
1312                 if (line.length() == 0) continue;
1313                 count++;
1314                 outFile.add("//supplementalData/transforms/transform/line[@_q=\"" + count + "\"]", line);
1315             }
1316             PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY + "/translit/", fixedName + ".xml");
1317             outFile.write(pw);
1318             pw.close();
1319         }
1320     }
1321 }
1322