1 package org.unicode.cldr.util;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collections;
8 import java.util.Comparator;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.Locale;
13 import java.util.Map;
14 import java.util.Set;
15 import java.util.TreeMap;
16 import java.util.TreeSet;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19 
20 import com.ibm.icu.util.ICUUncheckedIOException;
21 
22 public class ZoneParser {
23     static final boolean DEBUG = false;
24 
25     private String version;
26 
27     private Map<String, String> zone_to_country;
28 
29     private Map<String, Set<String>> country_to_zoneSet;
30 
31     /**
32      * @return mapping from zone id to country. If a zone has no country, then XX
33      *         is used.
34      */
getZoneToCounty()35     public Map<String, String> getZoneToCounty() {
36         if (zone_to_country == null)
37             make_zone_to_country();
38         return zone_to_country;
39     }
40 
41     /**
42      * @return mapping from country to zoneid. If a zone has no country, then XX
43      *         is used.
44      */
getCountryToZoneSet()45     public Map<String, Set<String>> getCountryToZoneSet() {
46         if (country_to_zoneSet == null)
47             make_zone_to_country();
48         return country_to_zoneSet;
49     }
50 
51     /**
52      * @return map from tzids to a list: latitude, longitude, country, comment?. + =
53      *         N or E
54      */
getZoneData()55     public Map<String, List<String>> getZoneData() {
56         if (zoneData == null)
57             makeZoneData();
58         return zoneData;
59     }
60 
getDeprecatedZoneIDs()61     public List<String> getDeprecatedZoneIDs() {
62         return Arrays.asList(FIX_DEPRECATED_ZONE_DATA);
63     }
64 
65     /**
66      *
67      */
make_zone_to_country()68     private void make_zone_to_country() {
69         zone_to_country = new TreeMap<>(TZIDComparator);
70         country_to_zoneSet = new TreeMap<>();
71         // Map aliasMap = getAliasMap();
72         Map<String, List<String>> zoneData = getZoneData();
73         for (String zone : zoneData.keySet()) {
74             String country = zoneData.get(zone).get(2);
75             zone_to_country.put(zone, country);
76             Set<String> s = country_to_zoneSet.get(country);
77             if (s == null)
78                 country_to_zoneSet.put(country, s = new TreeSet<>());
79             s.add(zone);
80         }
81         /*
82          * Set territories = getAvailableCodes("territory"); for (Iterator it =
83          * territories.iterator(); it.hasNext();) { String code = (String)
84          * it.next(); String[] zones = TimeZone.getAvailableIDs(code); for (int i =
85          * 0; i < zones.length; ++i) { if (aliasMap.get(zones[i]) != null) continue;
86          * zone_to_country.put(zones[i], code); } } String[] zones =
87          * TimeZone.getAvailableIDs(); for (int i = 0; i < zones.length; ++i) { if
88          * (aliasMap.get(zones[i]) != null) continue; if
89          * (zone_to_country.get(zones[i]) == null) { zone_to_country.put(zones[i],
90          * NO_COUNTRY); } } for (Iterator it = zone_to_country.keySet().iterator();
91          * it.hasNext();) { String tzid = (String) it.next(); String country =
92          * (String) zone_to_country.get(tzid); Set s = (Set)
93          * country_to_zoneSet.get(country); if (s == null)
94          * country_to_zoneSet.put(country, s = new TreeSet()); s.add(tzid); }
95          */
96         // protect
97         zone_to_country = Collections.unmodifiableMap(zone_to_country);
98         country_to_zoneSet = CldrUtility.protectCollection(country_to_zoneSet);
99     }
100 
101     /**
102      *
103      *
104      * private Map bogusZones = null;
105      *
106      * private Map getAliasMap() { if (bogusZones == null) { try { bogusZones =
107      * new TreeMap(); BufferedReader in =
108      * Utility.getUTF8Data"TimeZoneAliases.txt"); while (true) { String line =
109      * in.readLine(); if (line == null) break; line = line.trim(); int pos =
110      * line.indexOf('#'); if (pos >= 0) { skippedAliases.add(line); line =
111      * line.substring(0,pos).trim(); } if (line.length() == 0) continue; List
112      * pieces = Utility.splitList(line,';', true); bogusZones.put(pieces.get(0),
113      * pieces.get(1)); } in.close(); } catch (IOException e) { throw new
114      * IllegalArgumentException("Can't find timezone aliases"); } } return
115      * bogusZones; }
116      */
117 
118     Map<String, List<String>> zoneData;
119 
120     Set<String> skippedAliases = new TreeSet<>();
121 
122     /*
123      * # This file contains a table with the following columns: # 1. ISO 3166
124      * 2-character country code. See the file `iso3166.tab'. # 2. Latitude and
125      * longitude of the zone's principal location # in ISO 6709
126      * sign-degrees-minutes-seconds format, # either +-DDMM+-DDDMM or
127      * +-DDMMSS+-DDDMMSS, # first latitude (+ is north), then longitude (+ is
128      * east). # 3. Zone name used in value of TZ environment variable. # 4.
129      * Comments; present if and only if the country has multiple rows. # # Columns
130      * are separated by a single tab.
131      */
parseYear(String year, int defaultValue)132     static int parseYear(String year, int defaultValue) {
133         if ("only".startsWith(year))
134             return defaultValue;
135         if ("minimum".startsWith(year))
136             return Integer.MIN_VALUE;
137         if ("maximum".startsWith(year))
138             return Integer.MAX_VALUE;
139         return Integer.parseInt(year);
140     }
141 
142     public static class Time {
143         public int seconds;
144         public byte type;
145         static final byte WALL = 0, STANDARD = 1, UNIVERSAL = 2;
146 
Time(String in)147         Time(String in) {
148             if (in.equals("-")) return; // zero/WALL is the default
149             char suffix = in.charAt(in.length() - 1);
150             switch (suffix) {
151             case 'w':
152                 in = in.substring(0, in.length() - 1);
153                 break;
154             case 's':
155                 in = in.substring(0, in.length() - 1);
156                 type = STANDARD;
157                 break;
158             case 'u':
159             case 'g':
160             case 'z':
161                 in = in.substring(0, in.length() - 1);
162                 type = UNIVERSAL;
163                 break;
164             }
165             seconds = parseSeconds(in, false);
166         }
167 
parseSeconds(String in, boolean allowNegative)168         public static int parseSeconds(String in, boolean allowNegative) {
169             boolean negative = false;
170             if (in.startsWith("-")) {
171                 assert (allowNegative);
172                 negative = true;
173                 in = in.substring(1);
174             }
175             String[] pieces = in.split(":");
176             int multiplier = 3600;
177             int result = 0;
178             for (int i = 0; i < pieces.length; ++i) {
179                 result += multiplier * Integer.parseInt(pieces[i]);
180                 multiplier /= 60;
181                 assert (multiplier >= 0);
182             }
183             if (negative) result = -result;
184             return result;
185         }
186 
187         @Override
toString()188         public String toString() {
189             return BoilerplateUtilities.toStringHelper(this);
190         }
191     }
192 
193     static final String[] months = { "january", "february", "march", "april", "may", "june", "july", "august",
194         "september", "october", "november", "december" };
195     static final String[] weekdays = { "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday" };
196 
findStartsWith(String value, String[] array, boolean exact)197     static int findStartsWith(String value, String[] array, boolean exact) {
198         value = value.toLowerCase(Locale.ENGLISH);
199         for (int i = 0; i < array.length; ++i) {
200             if (array[i].startsWith(value)) return i;
201         }
202         throw new IllegalArgumentException("Can't find " + value + " in " + Arrays.asList(months));
203     }
204 
205     static Pattern dayPattern = PatternCache.get("([0-9]+)|(last)([a-z]+)|([a-z]+)([<=>]+)([0-9]+)");
206     static final String[] relations = { "<=", ">=" };
207 
208     public static class Day implements Comparable<Object> {
209         public int number;
210         public byte relation;
211         public int weekDay;
212         static final byte NONE = 0, LEQ = 2, GEQ = 4;
213 
Day(String value)214         Day(String value) {
215             value = value.toLowerCase();
216             Matcher matcher = dayPattern.matcher(value);
217             if (!matcher.matches()) {
218                 throw new IllegalArgumentException();
219             }
220             if (matcher.group(1) != null) {
221                 number = Integer.parseInt(matcher.group(1));
222                 return;
223             }
224             if (matcher.group(2) != null) {
225                 weekDay = findStartsWith(matcher.group(3), weekdays, false);
226                 number = 31;
227                 relation = LEQ;
228                 return;
229             }
230             if (matcher.group(4) != null) {
231                 weekDay = findStartsWith(matcher.group(4), weekdays, false);
232                 relation = (byte) findStartsWith(matcher.group(5), relations, false);
233                 number = Integer.parseInt(matcher.group(6));
234                 return;
235             }
236             throw new IllegalArgumentException();
237         }
238 
239         @Override
toString()240         public String toString() {
241             return BoilerplateUtilities.toStringHelper(this);
242         }
243 
244         @Override
compareTo(Object other)245         public int compareTo(Object other) {
246             return toString().compareTo(other.toString());
247         }
248     }
249 
250     /**
251      *
252      A rule line has the form
253      *
254      * Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
255      *
256      * For example:
257      *
258      * Rule US 1967 1973 - Apr lastSun 2:00 1:00 D
259      *
260      * The fields that make up a rule line are:
261      *
262      * NAME Gives the (arbitrary) name of the set of rules this
263      * rule is part of.
264      *
265      * FROM Gives the first year in which the rule applies. Any
266      * integer year can be supplied; the Gregorian calendar
267      * is assumed. The word minimum (or an abbreviation)
268      * means the minimum year representable as an integer.
269      * The word maximum (or an abbreviation) means the
270      * maximum year representable as an integer. Rules can
271      * describe times that are not representable as time
272      * values, with the unrepresentable times ignored; this
273      * allows rules to be portable among hosts with
274      * differing time value types.
275      *
276      * TO Gives the final year in which the rule applies. In
277      * addition to minimum and maximum (as above), the word
278      * only (or an abbreviation) may be used to repeat the
279      * value of the FROM field.
280      *
281      * TYPE Gives the type of year in which the rule applies.
282      * If TYPE is - then the rule applies in all years
283      * between FROM and TO inclusive. If TYPE is something
284      * else, then zic executes the command
285      * yearistype year type
286      * to check the type of a year: an exit status of zero
287      * is taken to mean that the year is of the given type;
288      * an exit status of one is taken to mean that the year
289      * is not of the given type.
290      *
291      * IN Names the month in which the rule takes effect.
292      * Month names may be abbreviated.
293      *
294      * ON Gives the day on which the rule takes effect.
295      * Recognized forms include:
296      *
297      * 5 the fifth of the month
298      * lastSun the last Sunday in the month
299      * lastMon the last Monday in the month
300      * Sun>=8 first Sunday on or after the eighth
301      * Sun<=25 last Sunday on or before the 25th
302      *
303      * Names of days of the week may be abbreviated or
304      * spelled out in full. Note that there must be no
305      * spaces within the ON field.
306      *
307      * AT Gives the time of day at which the rule takes
308      * effect. Recognized forms include:
309      *
310      * 2 time in hours
311      * 2:00 time in hours and minutes
312      * 15:00 24-hour format time (for times after noon)
313      * 1:28:14 time in hours, minutes, and seconds
314      * - equivalent to 0
315      *
316      * where hour 0 is midnight at the start of the day,
317      * and hour 24 is midnight at the end of the day. Any
318      * of these forms may be followed by the letter w if
319      * the given time is local "wall clock" time, s if the
320      * given time is local "standard" time, or u (or g or
321      * z) if the given time is universal time; in the
322      * absence of an indicator, wall clock time is assumed.
323      *** cannot be negative
324      *
325      * SAVE Gives the amount of time to be added to local
326      * standard time when the rule is in effect. This
327      * field has the same format as the AT field (although,
328      * of course, the w and s suffixes are not used).
329      *** can be positive or negative
330      *
331      * LETTER/S
332      * Gives the "variable part" (for example, the "S" or
333      * "D" in "EST" or "EDT") of time zone abbreviations to
334      * be used when this rule is in effect. If this field
335      * is -, the variable part is null.
336      *
337      *
338      *
339      */
340 
341     public static class RuleLine {
342         public static Set<String> types = new TreeSet<>();
343         public static Set<Day> days = new TreeSet<>();
344         static Set<Integer> saves = new TreeSet<>();
345 
RuleLine(List<String> l)346         RuleLine(List<String> l) {
347             fromYear = parseYear(l.get(0), 0);
348             toYear = parseYear(l.get(1), fromYear);
349             type = l.get(2);
350             if (type.equals("-")) type = null;
351             month = 1 + findStartsWith(l.get(3), months, false);
352             day = new Day(l.get(4));
353             time = new Time(l.get(5));
354             save = Time.parseSeconds(l.get(6), true);
355             letter = l.get(7);
356             if (letter.equals("-")) letter = null;
357             if (type != null) types.add(type);
358             days.add(day);
359         }
360 
361         @Override
toString()362         public String toString() {
363             return BoilerplateUtilities.toStringHelper(this);
364         }
365 
366         public int fromYear;
367 
368         public int toYear;
369 
370         public String type;
371 
372         public int month;
373 
374         public Day day;
375 
376         public Time time;
377 
378         public int save;
379 
380         public String letter;
381 
382         public static final int FIELD_COUNT = 8; // excluding Rule, Name
383     }
384 
385     /**
386      * A zone line has the form
387      *
388      * Zone NAME GMTOFF RULES/SAVE FORMAT [UNTIL]
389      *
390      * For example:
391      *
392      * Zone Australia/Adelaide 9:30 Aus CST 1971 Oct 31 2:00
393      *
394      * The fields that make up a zone line are:
395      *
396      * NAME The name of the time zone. This is the name used in
397      * creating the time conversion information file for the
398      * zone.
399      *
400      * GMTOFF
401      * The amount of time to add to UTC to get standard time
402      * in this zone. This field has the same format as the
403      * AT and SAVE fields of rule lines; begin the field with
404      * a minus sign if time must be subtracted from UTC.
405      *
406      * RULES/SAVE
407      * The name of the rule(s) that apply in the time zone
408      * or, alternately, an amount of time to add to local
409      * standard time. If this field is - then standard time
410      * always applies in the time zone.
411      *
412      * FORMAT
413      * The format for time zone abbreviations in this time
414      * zone. The pair of characters %s is used to show where
415      * the "variable part" of the time zone abbreviation
416      * goes. Alternately, a slash (/) separates standard and
417      * daylight abbreviations.
418      *
419      * UNTIL The time at which the UTC offset or the rule(s) change
420      * for a location. It is specified as a year, a month, a
421      * day, and a time of day. If this is specified, the
422      * time zone information is generated from the given UTC
423      * offset and rule change until the time specified. The
424      * month, day, and time of day have the same format as
425      * the IN, ON, and AT columns of a rule; trailing columns
426      * can be omitted, and default to the earliest possible
427      * value for the missing columns.
428      *
429      * The next line must be a "continuation" line; this has
430      * the same form as a zone line except that the string
431      * "Zone" and the name are omitted, as the continuation
432      * line will place information starting at the time
433      * specified as the UNTIL field in the previous line in
434      * the file used by the previous line. Continuation
435      * lines may contain an UNTIL field, just as zone lines
436      * do, indicating that the next line is a further
437      * continuation.
438      */
439     public static class ZoneLine {
440         public static Set<Day> untilDays = new TreeSet<>();
441         public static Set<String> rulesSaves = new TreeSet<>();
442 
ZoneLine(List<String> l)443         ZoneLine(List<String> l) {
444             gmtOff = Time.parseSeconds(l.get(0), true);
445             rulesSave = l.get(1);
446             if (rulesSave.equals("-"))
447                 rulesSave = "0";
448             else if (rulesSave.charAt(0) < 'A') rulesSave = "" + Time.parseSeconds(rulesSave, false);
449 
450             format = l.get(2);
451             switch (l.size()) {
452             case 7:
453                 untilTime = new Time(l.get(6)); // fall through
454             case 6:
455                 untilDay = new Day(l.get(5)); // fall through
456                 untilDays.add(untilDay);
457             case 5:
458                 untilMonth = 1 + findStartsWith(l.get(4), months, false); // fall through
459             case 4:
460                 untilYear = parseYear(l.get(3), Integer.MAX_VALUE); // fall through
461             case 3:
462                 break; // ok
463             default:
464                 throw new IllegalArgumentException("Wrong field count: " + l);
465             }
466             rulesSaves.add(rulesSave);
467         }
468 
469         @Override
toString()470         public String toString() {
471             return BoilerplateUtilities.toStringHelper(this);
472         }
473 
474         public int gmtOff;
475 
476         public String rulesSave;
477 
478         public String format;
479 
480         public int untilYear = Integer.MAX_VALUE; // indicating continuation
481 
482         public int untilMonth;
483 
484         public Day untilDay;
485 
486         public Time untilTime;
487 
488         public String comment;
489 
490         public static final int FIELD_COUNT = 3; // excluding Zone, Name
491 
492         public static final int FIELD_COUNT_UNTIL = 7; // excluding Zone, Name
493     }
494 
495     Map<String, List<RuleLine>> ruleID_rules = new TreeMap<>();
496 
497     Map<String, List<ZoneLine>> zone_rules = new TreeMap<>();
498 
499     Map<String, String> linkold_new = new TreeMap<>();
500 
501     Map<String, Set<String>> linkNew_oldSet = new TreeMap<>();
502 
503     public class Transition {
504         public long date;
505         public long offset;
506         public String abbreviation;
507     }
508 
509     public class TransitionList {
510 
addTransitions(ZoneLine lastZoneLine, ZoneLine zoneLine, int startYear, int endYear)511         void addTransitions(ZoneLine lastZoneLine, ZoneLine zoneLine, int startYear, int endYear) {
512             // add everything between the zonelines
513             if (lastZoneLine == null) {
514                 return;
515             }
516             startYear = Math.max(startYear, lastZoneLine.untilYear);
517             endYear = Math.min(endYear, zoneLine.untilYear);
518             int gmtOffset = lastZoneLine.gmtOff;
519             for (int year = startYear; year <= endYear; ++year) {
520                 resolveTime(gmtOffset, lastZoneLine.untilYear, lastZoneLine.untilMonth,
521                     lastZoneLine.untilDay, lastZoneLine.untilTime);
522             }
523         }
524 
resolveTime(int gmtOffset, int untilYear, int untilMonth, Day untilDay, Time untilTime)525         private long resolveTime(int gmtOffset, int untilYear, int untilMonth, Day untilDay, Time untilTime) {
526             return 0;
527         }
528     }
529 
getTransitions(String zoneID, int startYear, int endYear)530     public TransitionList getTransitions(String zoneID, int startYear, int endYear) {
531         TransitionList results = new TransitionList();
532         List<ZoneLine> rules = zone_rules.get(zoneID);
533         ZoneLine lastZoneLine = null;
534         for (ZoneLine zoneLine : rules) {
535             results.addTransitions(lastZoneLine, zoneLine, startYear, endYear);
536             lastZoneLine = zoneLine;
537         }
538         return results;
539     }
540 
getTZIDComparator()541     public Comparator<String> getTZIDComparator() {
542         return TZIDComparator;
543     }
544 
545     private static List<String> errorData = Arrays.asList(new String[] {
546         new Double(Double.MIN_VALUE).toString(), new Double(Double.MIN_VALUE).toString(), "" });
547 
548     private Comparator<String> TZIDComparator = new Comparator<String>() {
549         Map<String, List<String>> data = getZoneData();
550 
551         @Override
552         public int compare(String s1, String s2) {
553             List<String> data1 = data.get(s1);
554             if (data1 == null)
555                 data1 = errorData;
556             List<String> data2 = data.get(s2);
557             if (data2 == null)
558                 data2 = errorData;
559 
560             int result;
561             // country
562             String country1 = data1.get(2);
563             String country2 = data2.get(2);
564 
565             if ((result = country1.compareTo(country2)) != 0)
566                 return result;
567             // longitude
568             Double d1 = Double.valueOf(data1.get(1));
569             Double d2 = Double.valueOf(data2.get(1));
570             if ((result = d1.compareTo(d2)) != 0)
571                 return result;
572             // latitude
573             d1 = Double.valueOf(data1.get(0));
574             d2 = Double.valueOf(data2.get(0));
575             if ((result = d1.compareTo(d2)) != 0)
576                 return result;
577             // name
578             return s1.compareTo(s2);
579         }
580     };
581 
582     public static MapComparator<String> regionalCompare = new MapComparator<>();
583     static {
584         regionalCompare.add("America");
585         regionalCompare.add("Atlantic");
586         regionalCompare.add("Europe");
587         regionalCompare.add("Africa");
588         regionalCompare.add("Asia");
589         regionalCompare.add("Indian");
590         regionalCompare.add("Australia");
591         regionalCompare.add("Pacific");
592         regionalCompare.add("Arctic");
593         regionalCompare.add("Antarctica");
594         regionalCompare.add("Etc");
595     }
596 
597     private static String[] TZFiles = { "africa", "antarctica", "asia",
598         "australasia", "backward", "etcetera", "europe", "northamerica",
599         "southamerica" };
600 
601     private static Map<String, String> FIX_UNSTABLE_TZIDS;
602 
603     private static Set<String> SKIP_LINKS = new HashSet<>(Arrays.asList(
604         new String[] {
605             "America/Montreal", "America/Toronto",
606             "America/Santa_Isabel", "America/Tijuana" }));
607 
608     private static Set<String> PREFERRED_BASES = new HashSet<>(Arrays.asList(new String[] { "Europe/London" }));
609 
610     private static String[][] ADD_ZONE_ALIASES_DATA = {
611         { "Etc/UCT", "Etc/UTC" },
612 
613         { "EST", "Etc/GMT+5" },
614         { "MST", "Etc/GMT+7" },
615         { "HST", "Etc/GMT+10" },
616 
617         { "SystemV/AST4", "Etc/GMT+4" },
618         { "SystemV/EST5", "Etc/GMT+5" },
619         { "SystemV/CST6", "Etc/GMT+6" },
620         { "SystemV/MST7", "Etc/GMT+7" },
621         { "SystemV/PST8", "Etc/GMT+8" },
622         { "SystemV/YST9", "Etc/GMT+9" },
623         { "SystemV/HST10", "Etc/GMT+10" },
624     };
625 
626     static String[] FIX_DEPRECATED_ZONE_DATA = {
627         "Africa/Timbuktu",
628         "America/Argentina/ComodRivadavia",
629         "America/Santa_Isabel",
630         "Europe/Belfast",
631         "Pacific/Yap",
632         "Antarctica/South_Pole",
633         "America/Shiprock",
634         "America/Montreal",
635         "Asia/Chongqing",
636         "Asia/Harbin",
637         "Asia/Kashgar"
638     };
639     static {
640         // The format is <new name>, <old name>
641         String[][] FIX_UNSTABLE_TZID_DATA = new String[][] {
642             { "America/Atikokan", "America/Coral_Harbour" },
643             { "America/Argentina/Buenos_Aires", "America/Buenos_Aires" },
644             { "America/Argentina/Catamarca", "America/Catamarca" },
645             { "America/Argentina/Cordoba", "America/Cordoba" },
646             { "America/Argentina/Jujuy", "America/Jujuy" },
647             { "America/Argentina/Mendoza", "America/Mendoza" },
648             { "America/Nuuk", "America/Godthab" },
649             { "America/Kentucky/Louisville", "America/Louisville" },
650             { "America/Indiana/Indianapolis", "America/Indianapolis" },
651             { "Africa/Asmara", "Africa/Asmera" },
652             { "Atlantic/Faroe", "Atlantic/Faeroe" },
653             { "Asia/Kolkata", "Asia/Calcutta" },
654             { "Asia/Ho_Chi_Minh", "Asia/Saigon" },
655             { "Asia/Yangon", "Asia/Rangoon" },
656             { "Asia/Kathmandu", "Asia/Katmandu" },
657             { "Pacific/Pohnpei", "Pacific/Ponape" },
658             { "Pacific/Chuuk", "Pacific/Truk" },
659             { "Pacific/Honolulu", "Pacific/Johnston" }
660         };
661         FIX_UNSTABLE_TZIDS = CldrUtility.asMap(FIX_UNSTABLE_TZID_DATA);
662     }
663 
664     /**
665      *
666      */
makeZoneData()667     private void makeZoneData() {
668         try {
669             // get version
670             BufferedReader versionIn = CldrUtility.getUTF8Data("tzdb-version.txt");
671             version = versionIn.readLine();
672             if (!version.matches("[0-9]{4}[a-z]")) {
673                 throw new IllegalArgumentException(String.format("Bad Version number: %s, should be of the form 2007x",
674                     version));
675             }
676             versionIn.close();
677 
678             // String deg = "([+-][0-9]+)";//
679             String deg = "([+-])([0-9][0-9][0-9]?)([0-9][0-9])([0-9][0-9])?";//
680             Matcher m = PatternCache.get(deg + deg).matcher("");
681             zoneData = new TreeMap<>();
682             BufferedReader in = CldrUtility.getUTF8Data("zone.tab");
683             while (true) {
684                 String line = in.readLine();
685                 if (line == null)
686                     break;
687                 line = line.trim();
688                 int pos = line.indexOf('#');
689                 if (pos >= 0) {
690                     skippedAliases.add(line);
691                     line = line.substring(0, pos).trim();
692                 }
693                 if (line.length() == 0)
694                     continue;
695                 List<String> pieces = CldrUtility.splitList(line, '\t', true);
696                 String country = pieces.get(0);
697                 String latLong = pieces.get(1);
698                 String tzid = pieces.get(2);
699                 String ntzid = FIX_UNSTABLE_TZIDS.get(tzid);
700                 if (ntzid != null)
701                     tzid = ntzid;
702                 String comment = pieces.size() < 4 ? null : (String) pieces.get(3);
703                 pieces.clear();
704                 if (!m.reset(latLong).matches())
705                     throw new IllegalArgumentException("Bad zone.tab, lat/long format: "
706                         + line);
707 
708                 pieces.add(getDegrees(m, true).toString());
709                 pieces.add(getDegrees(m, false).toString());
710                 pieces.add(country);
711                 if (comment != null)
712                     pieces.add(comment);
713                 if (zoneData.containsKey(tzid))
714                     throw new IllegalArgumentException("Bad zone.tab, duplicate entry: "
715                         + line);
716                 zoneData.put(tzid, pieces);
717             }
718             in.close();
719             // add Etcs
720             for (int i = -14; i <= 12; ++i) {
721                 List<String> pieces = new ArrayList<>();
722                 int latitude = 0;
723                 int longitude = i * 15;
724                 if (longitude <= -180) {
725                     longitude += 360;
726                 }
727                 pieces.add(new Double(latitude).toString()); // lat
728                 // remember that the sign of the TZIDs is wrong
729                 pieces.add(new Double(-longitude).toString()); // long
730                 pieces.add(StandardCodes.NO_COUNTRY); // country
731 
732                 zoneData.put("Etc/GMT" + (i == 0 ? "" : i < 0 ? "" + i : "+" + i),
733                     pieces);
734             }
735             // add Unknown / UTC
736             List<String> pieces = new ArrayList<>();
737             pieces.add(new Double(0).toString()); // lat
738             pieces.add(new Double(0).toString()); // long
739             pieces.add(StandardCodes.NO_COUNTRY); // country
740             zoneData.put("Etc/Unknown", pieces);
741             zoneData.put("Etc/UTC", pieces);
742 
743             zoneData = CldrUtility.protectCollection(zoneData); // protect for later
744 
745             // now get links
746             Pattern whitespace = PatternCache.get("\\s+");
747             XEquivalenceClass<String, String> linkedItems = new XEquivalenceClass<>("None");
748             for (int i = 0; i < TZFiles.length; ++i) {
749                 in = CldrUtility.getUTF8Data(TZFiles[i]);
750                 String zoneID = null;
751                 while (true) {
752                     String line = in.readLine();
753                     if (line == null)
754                         break;
755                     String originalLine = line;
756                     int commentPos = line.indexOf("#");
757                     String comment = null;
758                     if (commentPos >= 0) {
759                         comment = line.substring(commentPos + 1).trim();
760                         line = line.substring(0, commentPos);
761                     }
762                     line = line.trim();
763                     if (line.length() == 0)
764                         continue;
765                     String[] items = whitespace.split(line);
766                     if (zoneID != null || items[0].equals("Zone")) {
767                         List<String> l = new ArrayList<>();
768                         l.addAll(Arrays.asList(items));
769 
770                         // Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 15 0:01
771                         // 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
772                         if (zoneID == null) {
773                             l.remove(0); // "Zone"
774                             zoneID = l.get(0);
775                             String ntzid = FIX_UNSTABLE_TZIDS.get(zoneID);
776                             if (ntzid != null)
777                                 zoneID = ntzid;
778                             l.remove(0);
779                         }
780                         List<ZoneLine> zoneRules = zone_rules.get(zoneID);
781                         if (zoneRules == null) {
782                             zoneRules = new ArrayList<>();
783                             zone_rules.put(zoneID, zoneRules);
784                         }
785 
786                         if (l.size() < ZoneLine.FIELD_COUNT
787                             || l.size() > ZoneLine.FIELD_COUNT_UNTIL) {
788                             System.out.println("***Zone incorrect field count:");
789                             System.out.println(l);
790                             System.out.println(originalLine);
791                         }
792 
793                         ZoneLine zoneLine = new ZoneLine(l);
794                         zoneLine.comment = comment;
795                         zoneRules.add(zoneLine);
796                         if (l.size() == ZoneLine.FIELD_COUNT) {
797                             zoneID = null; // no continuation line
798                         }
799                     } else if (items[0].equals("Rule")) {
800                         // # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
801                         // Rule Algeria 1916 only - Jun 14 23:00s 1:00 S
802 
803                         String ruleID = items[1];
804                         List<RuleLine> ruleList = ruleID_rules.get(ruleID);
805                         if (ruleList == null) {
806                             ruleList = new ArrayList<>();
807                             ruleID_rules.put(ruleID, ruleList);
808                         }
809                         List<String> l = new ArrayList<>();
810                         l.addAll(Arrays.asList(items));
811                         l.remove(0);
812                         l.remove(0);
813                         if (l.size() != RuleLine.FIELD_COUNT) {
814                             System.out.println("***Rule incorrect field count:");
815                             System.out.println(l);
816                         }
817                         if (comment != null)
818                             l.add(comment);
819                         RuleLine ruleLine = new RuleLine(l);
820                         ruleList.add(ruleLine);
821 
822                     } else if (items[0].equals("Link")) {
823                         String old = items[2];
824                         String newOne = items[1];
825                         if (!(SKIP_LINKS.contains(old) && SKIP_LINKS.contains(newOne))) {
826                             //System.out.println("Original " + old + "\t=>\t" + newOne);
827                             linkedItems.add(old, newOne);
828                         }
829                         /*
830                          * String conflict = (String) linkold_new.get(old); if (conflict !=
831                          * null) { System.out.println("Conflict with old: " + old + " => " +
832                          * conflict + ", " + newOne); } System.out.println(old + "\t=>\t" +
833                          * newOne); linkold_new.put(old, newOne);
834                          */
835                     } else {
836                         if (DEBUG)
837                             System.out.println("Unknown zone line: " + line);
838                     }
839                 }
840                 in.close();
841             }
842             // add in stuff that should be links
843             for (int i = 0; i < ADD_ZONE_ALIASES_DATA.length; ++i) {
844                 linkedItems.add(ADD_ZONE_ALIASES_DATA[i][0],
845                     ADD_ZONE_ALIASES_DATA[i][1]);
846             }
847 
848             Set<String> isCanonical = zoneData.keySet();
849 
850             // walk through the sets, and
851             // if any set contains two canonical items, split it.
852             // if any contains one, make it the primary
853             // if any contains zero, problem!
854             for (Set<String> equivalents : linkedItems.getEquivalenceSets()) {
855                 Set<String> canonicals = new TreeSet<>(equivalents);
856                 canonicals.retainAll(isCanonical);
857                 if (canonicals.size() == 0)
858                     throw new IllegalArgumentException("No canonicals in: " + equivalents);
859                 if (canonicals.size() > 1) {
860                     if (DEBUG) {
861                         System.out.println("Too many canonicals in: " + equivalents);
862                         System.out
863                             .println("\t*Don't* put these into the same equivalence class: "
864                                 + canonicals);
865                     }
866                     Set<String> remainder = new TreeSet<>(equivalents);
867                     remainder.removeAll(isCanonical);
868                     if (remainder.size() != 0) {
869                         if (DEBUG) {
870                             System.out
871                                 .println("\tThe following should be equivalent to others: "
872                                     + remainder);
873                         }
874                     }
875                 }
876                 {
877                     String newOne;
878                     // get the item that we want to hang all the aliases off of.
879                     // normally this is the first (alphabetically) one, but
880                     // it may be overridden with PREFERRED_BASES
881                     Set<String> preferredItems = new HashSet<>(PREFERRED_BASES);
882                     preferredItems.retainAll(canonicals);
883                     if (preferredItems.size() > 0) {
884                         newOne = preferredItems.iterator().next();
885                     } else {
886                         newOne = canonicals.iterator().next();
887                     }
888                     for (String oldOne : equivalents) {
889                         if (canonicals.contains(oldOne))
890                             continue;
891                         // System.out.println("Mapping " + oldOne + "\t=>\t" + newOne);
892                         linkold_new.put(oldOne, newOne);
893                     }
894                 }
895             }
896 
897             /*
898              * // fix the links from old to new, to remove chains for (Iterator it =
899              * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem =
900              * it.next(); Object newItem = linkold_new.get(oldItem); while (true) {
901              * Object linkItem = linkold_new.get(newItem); if (linkItem == null)
902              * break; if (true) System.out.println("Connecting link chain: " + oldItem +
903              * "\t=> " + newItem + "\t=> " + linkItem); newItem = linkItem;
904              * linkold_new.put(oldItem, newItem); } }
905              * // reverse the links *from* canonical names for (Iterator it =
906              * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem =
907              * it.next(); if (!isCanonical.contains(oldItem)) continue; Object newItem =
908              * linkold_new.get(oldItem); }
909              *
910              * // fix unstable TZIDs Set itemsToRemove = new HashSet(); Map
911              * itemsToAdd = new HashMap(); for (Iterator it =
912              * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem =
913              * it.next(); Object newItem = linkold_new.get(oldItem); Object modOldItem =
914              * RESTORE_UNSTABLE_TZIDS.get(oldItem); Object modNewItem =
915              * FIX_UNSTABLE_TZIDS.get(newItem); if (modOldItem == null && modNewItem ==
916              * null) continue; if (modOldItem == null) { // just fix old entry
917              * itemsToAdd.put(oldItem, modNewItem); continue; } // otherwise have to
918              * nuke and redo itemsToRemove.add(oldItem); if (modNewItem == null)
919              * modNewItem = newItem; itemsToAdd.put(modOldItem, modNewItem); } // now
920              * make fixes (we couldn't earlier because we were iterating
921              * Utility.removeAll(linkold_new, itemsToRemove);
922              * linkold_new.putAll(itemsToAdd);
923              * // now remove all links that are from canonical zones
924              * Utility.removeAll(linkold_new, zoneData.keySet());
925              */
926 
927             // generate list of new to old
928             for (Iterator<String> it = linkold_new.keySet().iterator(); it.hasNext();) {
929                 String oldZone = it.next();
930                 String newZone = linkold_new.get(oldZone);
931                 Set<String> s = linkNew_oldSet.get(newZone);
932                 if (s == null)
933                     linkNew_oldSet.put(newZone, s = new HashSet<>());
934                 s.add(oldZone);
935             }
936 
937             // PROTECT EVERYTHING
938             linkNew_oldSet = CldrUtility.protectCollection(linkNew_oldSet);
939             linkold_new = CldrUtility.protectCollection(linkold_new);
940             ruleID_rules = CldrUtility.protectCollection(ruleID_rules);
941             zone_rules = CldrUtility.protectCollection(zone_rules);
942             // TODO protect zone info later
943         } catch (IOException e) {
944             throw new ICUUncheckedIOException(
945                 "Can't find timezone aliases: " + e.toString(), e);
946         }
947     }
948 
949     /**
950      * @param m
951      */
952     private Double getDegrees(Matcher m, boolean lat) {
953         int startIndex = lat ? 1 : 5;
954         double amount = Integer.parseInt(m.group(startIndex + 1))
955             + Integer.parseInt(m.group(startIndex + 2)) / 60.0;
956         if (m.group(startIndex + 3) != null)
957             amount += Integer.parseInt(m.group(startIndex + 3)) / 3600.0;
958         if (m.group(startIndex).equals("-"))
959             amount = -amount;
960         return new Double(amount);
961     }
962 
963     /**
964      * @return Returns the linkold_new.
965      */
966     public Map<String, String> getZoneLinkold_new() {
967         getZoneData();
968         return linkold_new;
969     }
970 
971     /**
972      * @return Returns the linkold_new.
973      */
974     public Map<String, Set<String>> getZoneLinkNew_OldSet() {
975         getZoneData();
976         return linkNew_oldSet;
977     }
978 
979     /**
980      * @return Returns the ruleID_rules.
981      */
982     public Map<String, List<RuleLine>> getZoneRuleID_rules() {
983         getZoneData();
984         return ruleID_rules;
985     }
986 
987     /**
988      * @return Returns the zone_rules.
989      */
990     public Map<String, List<ZoneLine>> getZone_rules() {
991         getZoneData();
992         return zone_rules;
993     }
994 
995     public String getVersion() {
996         return version;
997     }
998 
999 }