1 package org.unicode.cldr.util;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collections;
8 import java.util.Comparator;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.Locale;
13 import java.util.Map;
14 import java.util.Set;
15 import java.util.TreeMap;
16 import java.util.TreeSet;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19 
20 import com.ibm.icu.util.ICUUncheckedIOException;
21 
22 public class ZoneParser {
23     static final boolean DEBUG = false;
24 
25     private String version;
26 
27     private Map<String, String> zone_to_country;
28 
29     private Map<String, Set<String>> country_to_zoneSet;
30 
31     /**
32      * @return mapping from zone id to country. If a zone has no country, then XX
33      *         is used.
34      */
getZoneToCounty()35     public Map<String, String> getZoneToCounty() {
36         if (zone_to_country == null)
37             make_zone_to_country();
38         return zone_to_country;
39     }
40 
41     /**
42      * @return mapping from country to zoneid. If a zone has no country, then XX
43      *         is used.
44      */
getCountryToZoneSet()45     public Map<String, Set<String>> getCountryToZoneSet() {
46         if (country_to_zoneSet == null)
47             make_zone_to_country();
48         return country_to_zoneSet;
49     }
50 
51     /**
52      * @return map from tzids to a list: latitude, longitude, country, comment?. + =
53      *         N or E
54      */
getZoneData()55     public Map<String, List<String>> getZoneData() {
56         if (zoneData == null)
57             makeZoneData();
58         return zoneData;
59     }
60 
getDeprecatedZoneIDs()61     public List<String> getDeprecatedZoneIDs() {
62         return Arrays.asList(FIX_DEPRECATED_ZONE_DATA);
63     }
64 
65     /**
66      *
67      */
make_zone_to_country()68     private void make_zone_to_country() {
69         zone_to_country = new TreeMap<String, String>(TZIDComparator);
70         country_to_zoneSet = new TreeMap<String, Set<String>>();
71         // Map aliasMap = getAliasMap();
72         Map<String, List<String>> zoneData = getZoneData();
73         for (String zone : zoneData.keySet()) {
74             String country = (String) zoneData.get(zone).get(2);
75             zone_to_country.put(zone, country);
76             Set<String> s = country_to_zoneSet.get(country);
77             if (s == null)
78                 country_to_zoneSet.put(country, s = new TreeSet<String>());
79             s.add(zone);
80         }
81         /*
82          * Set territories = getAvailableCodes("territory"); for (Iterator it =
83          * territories.iterator(); it.hasNext();) { String code = (String)
84          * it.next(); String[] zones = TimeZone.getAvailableIDs(code); for (int i =
85          * 0; i < zones.length; ++i) { if (aliasMap.get(zones[i]) != null) continue;
86          * zone_to_country.put(zones[i], code); } } String[] zones =
87          * TimeZone.getAvailableIDs(); for (int i = 0; i < zones.length; ++i) { if
88          * (aliasMap.get(zones[i]) != null) continue; if
89          * (zone_to_country.get(zones[i]) == null) { zone_to_country.put(zones[i],
90          * NO_COUNTRY); } } for (Iterator it = zone_to_country.keySet().iterator();
91          * it.hasNext();) { String tzid = (String) it.next(); String country =
92          * (String) zone_to_country.get(tzid); Set s = (Set)
93          * country_to_zoneSet.get(country); if (s == null)
94          * country_to_zoneSet.put(country, s = new TreeSet()); s.add(tzid); }
95          */
96         // protect
97         zone_to_country = Collections.unmodifiableMap(zone_to_country);
98         country_to_zoneSet = CldrUtility.protectCollection(country_to_zoneSet);
99     }
100 
101     /**
102      *
103      *
104      * private Map bogusZones = null;
105      *
106      * private Map getAliasMap() { if (bogusZones == null) { try { bogusZones =
107      * new TreeMap(); BufferedReader in =
108      * Utility.getUTF8Data"TimeZoneAliases.txt"); while (true) { String line =
109      * in.readLine(); if (line == null) break; line = line.trim(); int pos =
110      * line.indexOf('#'); if (pos >= 0) { skippedAliases.add(line); line =
111      * line.substring(0,pos).trim(); } if (line.length() == 0) continue; List
112      * pieces = Utility.splitList(line,';', true); bogusZones.put(pieces.get(0),
113      * pieces.get(1)); } in.close(); } catch (IOException e) { throw new
114      * IllegalArgumentException("Can't find timezone aliases"); } } return
115      * bogusZones; }
116      */
117 
118     Map<String, List<String>> zoneData;
119 
120     Set<String> skippedAliases = new TreeSet<String>();
121 
122     /*
123      * # This file contains a table with the following columns: # 1. ISO 3166
124      * 2-character country code. See the file `iso3166.tab'. # 2. Latitude and
125      * longitude of the zone's principal location # in ISO 6709
126      * sign-degrees-minutes-seconds format, # either +-DDMM+-DDDMM or
127      * +-DDMMSS+-DDDMMSS, # first latitude (+ is north), then longitude (+ is
128      * east). # 3. Zone name used in value of TZ environment variable. # 4.
129      * Comments; present if and only if the country has multiple rows. # # Columns
130      * are separated by a single tab.
131      */
parseYear(String year, int defaultValue)132     static int parseYear(String year, int defaultValue) {
133         if ("only".startsWith(year))
134             return defaultValue;
135         if ("minimum".startsWith(year))
136             return Integer.MIN_VALUE;
137         if ("maximum".startsWith(year))
138             return Integer.MAX_VALUE;
139         return Integer.parseInt(year);
140     }
141 
142     public static class Time {
143         public int seconds;
144         public byte type;
145         static final byte WALL = 0, STANDARD = 1, UNIVERSAL = 2;
146 
Time(String in)147         Time(String in) {
148             if (in.equals("-")) return; // zero/WALL is the default
149             char suffix = in.charAt(in.length() - 1);
150             switch (suffix) {
151             case 'w':
152                 in = in.substring(0, in.length() - 1);
153                 break;
154             case 's':
155                 in = in.substring(0, in.length() - 1);
156                 type = STANDARD;
157                 break;
158             case 'u':
159             case 'g':
160             case 'z':
161                 in = in.substring(0, in.length() - 1);
162                 type = UNIVERSAL;
163                 break;
164             }
165             seconds = parseSeconds(in, false);
166         }
167 
parseSeconds(String in, boolean allowNegative)168         public static int parseSeconds(String in, boolean allowNegative) {
169             boolean negative = false;
170             if (in.startsWith("-")) {
171                 assert (allowNegative);
172                 negative = true;
173                 in = in.substring(1);
174             }
175             String[] pieces = in.split(":");
176             int multiplier = 3600;
177             int result = 0;
178             for (int i = 0; i < pieces.length; ++i) {
179                 result += multiplier * Integer.parseInt(pieces[i]);
180                 multiplier /= 60;
181                 assert (multiplier >= 0);
182             }
183             if (negative) result = -result;
184             return result;
185         }
186 
toString()187         public String toString() {
188             return BoilerplateUtilities.toStringHelper(this);
189         }
190     }
191 
192     static final String[] months = { "january", "february", "march", "april", "may", "june", "july", "august",
193         "september", "october", "november", "december" };
194     static final String[] weekdays = { "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday" };
195 
findStartsWith(String value, String[] array, boolean exact)196     static int findStartsWith(String value, String[] array, boolean exact) {
197         value = value.toLowerCase(Locale.ENGLISH);
198         for (int i = 0; i < array.length; ++i) {
199             if (array[i].startsWith(value)) return i;
200         }
201         throw new IllegalArgumentException("Can't find " + value + " in " + Arrays.asList(months));
202     }
203 
204     static Pattern dayPattern = PatternCache.get("([0-9]+)|(last)([a-z]+)|([a-z]+)([<=>]+)([0-9]+)");
205     static final String[] relations = { "<=", ">=" };
206 
207     public static class Day implements Comparable<Object> {
208         public int number;
209         public byte relation;
210         public int weekDay;
211         static final byte NONE = 0, LEQ = 2, GEQ = 4;
212 
Day(String value)213         Day(String value) {
214             value = value.toLowerCase();
215             Matcher matcher = dayPattern.matcher(value);
216             if (!matcher.matches()) {
217                 throw new IllegalArgumentException();
218             }
219             if (matcher.group(1) != null) {
220                 number = Integer.parseInt(matcher.group(1));
221                 return;
222             }
223             if (matcher.group(2) != null) {
224                 weekDay = findStartsWith(matcher.group(3), weekdays, false);
225                 number = 31;
226                 relation = LEQ;
227                 return;
228             }
229             if (matcher.group(4) != null) {
230                 weekDay = findStartsWith(matcher.group(4), weekdays, false);
231                 relation = (byte) findStartsWith(matcher.group(5), relations, false);
232                 number = Integer.parseInt(matcher.group(6));
233                 return;
234             }
235             throw new IllegalArgumentException();
236         }
237 
toString()238         public String toString() {
239             return BoilerplateUtilities.toStringHelper(this);
240         }
241 
compareTo(Object other)242         public int compareTo(Object other) {
243             return toString().compareTo(other.toString());
244         }
245     }
246 
247     /**
248      *
249      A rule line has the form
250      *
251      * Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
252      *
253      * For example:
254      *
255      * Rule US 1967 1973 - Apr lastSun 2:00 1:00 D
256      *
257      * The fields that make up a rule line are:
258      *
259      * NAME Gives the (arbitrary) name of the set of rules this
260      * rule is part of.
261      *
262      * FROM Gives the first year in which the rule applies. Any
263      * integer year can be supplied; the Gregorian calendar
264      * is assumed. The word minimum (or an abbreviation)
265      * means the minimum year representable as an integer.
266      * The word maximum (or an abbreviation) means the
267      * maximum year representable as an integer. Rules can
268      * describe times that are not representable as time
269      * values, with the unrepresentable times ignored; this
270      * allows rules to be portable among hosts with
271      * differing time value types.
272      *
273      * TO Gives the final year in which the rule applies. In
274      * addition to minimum and maximum (as above), the word
275      * only (or an abbreviation) may be used to repeat the
276      * value of the FROM field.
277      *
278      * TYPE Gives the type of year in which the rule applies.
279      * If TYPE is - then the rule applies in all years
280      * between FROM and TO inclusive. If TYPE is something
281      * else, then zic executes the command
282      * yearistype year type
283      * to check the type of a year: an exit status of zero
284      * is taken to mean that the year is of the given type;
285      * an exit status of one is taken to mean that the year
286      * is not of the given type.
287      *
288      * IN Names the month in which the rule takes effect.
289      * Month names may be abbreviated.
290      *
291      * ON Gives the day on which the rule takes effect.
292      * Recognized forms include:
293      *
294      * 5 the fifth of the month
295      * lastSun the last Sunday in the month
296      * lastMon the last Monday in the month
297      * Sun>=8 first Sunday on or after the eighth
298      * Sun<=25 last Sunday on or before the 25th
299      *
300      * Names of days of the week may be abbreviated or
301      * spelled out in full. Note that there must be no
302      * spaces within the ON field.
303      *
304      * AT Gives the time of day at which the rule takes
305      * effect. Recognized forms include:
306      *
307      * 2 time in hours
308      * 2:00 time in hours and minutes
309      * 15:00 24-hour format time (for times after noon)
310      * 1:28:14 time in hours, minutes, and seconds
311      * - equivalent to 0
312      *
313      * where hour 0 is midnight at the start of the day,
314      * and hour 24 is midnight at the end of the day. Any
315      * of these forms may be followed by the letter w if
316      * the given time is local "wall clock" time, s if the
317      * given time is local "standard" time, or u (or g or
318      * z) if the given time is universal time; in the
319      * absence of an indicator, wall clock time is assumed.
320      *** cannot be negative
321      *
322      * SAVE Gives the amount of time to be added to local
323      * standard time when the rule is in effect. This
324      * field has the same format as the AT field (although,
325      * of course, the w and s suffixes are not used).
326      *** can be positive or negative
327      *
328      * LETTER/S
329      * Gives the "variable part" (for example, the "S" or
330      * "D" in "EST" or "EDT") of time zone abbreviations to
331      * be used when this rule is in effect. If this field
332      * is -, the variable part is null.
333      *
334      *
335      *
336      */
337 
338     public static class RuleLine {
339         public static Set<String> types = new TreeSet<String>();
340         public static Set<Day> days = new TreeSet<Day>();
341         static Set<Integer> saves = new TreeSet<Integer>();
342 
RuleLine(List<String> l)343         RuleLine(List<String> l) {
344             fromYear = parseYear(l.get(0), 0);
345             toYear = parseYear(l.get(1), fromYear);
346             type = l.get(2);
347             if (type.equals("-")) type = null;
348             month = 1 + findStartsWith((String) l.get(3), months, false);
349             day = new Day(l.get(4));
350             time = new Time(l.get(5));
351             save = Time.parseSeconds(l.get(6), true);
352             letter = l.get(7);
353             if (letter.equals("-")) letter = null;
354             if (type != null) types.add(type);
355             days.add(day);
356         }
357 
toString()358         public String toString() {
359             return BoilerplateUtilities.toStringHelper(this);
360         }
361 
362         public int fromYear;
363 
364         public int toYear;
365 
366         public String type;
367 
368         public int month;
369 
370         public Day day;
371 
372         public Time time;
373 
374         public int save;
375 
376         public String letter;
377 
378         public static final int FIELD_COUNT = 8; // excluding Rule, Name
379     }
380 
381     /**
382      * A zone line has the form
383      *
384      * Zone NAME GMTOFF RULES/SAVE FORMAT [UNTIL]
385      *
386      * For example:
387      *
388      * Zone Australia/Adelaide 9:30 Aus CST 1971 Oct 31 2:00
389      *
390      * The fields that make up a zone line are:
391      *
392      * NAME The name of the time zone. This is the name used in
393      * creating the time conversion information file for the
394      * zone.
395      *
396      * GMTOFF
397      * The amount of time to add to UTC to get standard time
398      * in this zone. This field has the same format as the
399      * AT and SAVE fields of rule lines; begin the field with
400      * a minus sign if time must be subtracted from UTC.
401      *
402      * RULES/SAVE
403      * The name of the rule(s) that apply in the time zone
404      * or, alternately, an amount of time to add to local
405      * standard time. If this field is - then standard time
406      * always applies in the time zone.
407      *
408      * FORMAT
409      * The format for time zone abbreviations in this time
410      * zone. The pair of characters %s is used to show where
411      * the "variable part" of the time zone abbreviation
412      * goes. Alternately, a slash (/) separates standard and
413      * daylight abbreviations.
414      *
415      * UNTIL The time at which the UTC offset or the rule(s) change
416      * for a location. It is specified as a year, a month, a
417      * day, and a time of day. If this is specified, the
418      * time zone information is generated from the given UTC
419      * offset and rule change until the time specified. The
420      * month, day, and time of day have the same format as
421      * the IN, ON, and AT columns of a rule; trailing columns
422      * can be omitted, and default to the earliest possible
423      * value for the missing columns.
424      *
425      * The next line must be a "continuation" line; this has
426      * the same form as a zone line except that the string
427      * "Zone" and the name are omitted, as the continuation
428      * line will place information starting at the time
429      * specified as the UNTIL field in the previous line in
430      * the file used by the previous line. Continuation
431      * lines may contain an UNTIL field, just as zone lines
432      * do, indicating that the next line is a further
433      * continuation.
434      */
435     public static class ZoneLine {
436         public static Set<Day> untilDays = new TreeSet<Day>();
437         public static Set<String> rulesSaves = new TreeSet<String>();
438 
ZoneLine(List<String> l)439         ZoneLine(List<String> l) {
440             gmtOff = Time.parseSeconds(l.get(0), true);
441             rulesSave = (String) l.get(1);
442             if (rulesSave.equals("-"))
443                 rulesSave = "0";
444             else if (rulesSave.charAt(0) < 'A') rulesSave = "" + Time.parseSeconds(rulesSave, false);
445 
446             format = (String) l.get(2);
447             switch (l.size()) {
448             case 7:
449                 untilTime = new Time(l.get(6)); // fall through
450             case 6:
451                 untilDay = new Day(l.get(5)); // fall through
452                 untilDays.add(untilDay);
453             case 5:
454                 untilMonth = 1 + findStartsWith((String) l.get(4), months, false); // fall through
455             case 4:
456                 untilYear = parseYear(l.get(3), Integer.MAX_VALUE); // fall through
457             case 3:
458                 break; // ok
459             default:
460                 throw new IllegalArgumentException("Wrong field count: " + l);
461             }
462             rulesSaves.add(rulesSave);
463         }
464 
toString()465         public String toString() {
466             return BoilerplateUtilities.toStringHelper(this);
467         }
468 
469         public int gmtOff;
470 
471         public String rulesSave;
472 
473         public String format;
474 
475         public int untilYear = Integer.MAX_VALUE; // indicating continuation
476 
477         public int untilMonth;
478 
479         public Day untilDay;
480 
481         public Time untilTime;
482 
483         public String comment;
484 
485         public static final int FIELD_COUNT = 3; // excluding Zone, Name
486 
487         public static final int FIELD_COUNT_UNTIL = 7; // excluding Zone, Name
488     }
489 
490     Map<String, List<RuleLine>> ruleID_rules = new TreeMap<String, List<RuleLine>>();
491 
492     Map<String, List<ZoneLine>> zone_rules = new TreeMap<String, List<ZoneLine>>();
493 
494     Map<String, String> linkold_new = new TreeMap<String, String>();
495 
496     Map<String, Set<String>> linkNew_oldSet = new TreeMap<String, Set<String>>();
497 
498     public class Transition {
499         public long date;
500         public long offset;
501         public String abbreviation;
502     }
503 
504     public class TransitionList {
505 
addTransitions(ZoneLine lastZoneLine, ZoneLine zoneLine, int startYear, int endYear)506         void addTransitions(ZoneLine lastZoneLine, ZoneLine zoneLine, int startYear, int endYear) {
507             // add everything between the zonelines
508             if (lastZoneLine == null) {
509                 return;
510             }
511             startYear = Math.max(startYear, lastZoneLine.untilYear);
512             endYear = Math.min(endYear, zoneLine.untilYear);
513             int gmtOffset = lastZoneLine.gmtOff;
514             for (int year = startYear; year <= endYear; ++year) {
515                 resolveTime(gmtOffset, lastZoneLine.untilYear, lastZoneLine.untilMonth,
516                     lastZoneLine.untilDay, lastZoneLine.untilTime);
517             }
518         }
519 
resolveTime(int gmtOffset, int untilYear, int untilMonth, Day untilDay, Time untilTime)520         private long resolveTime(int gmtOffset, int untilYear, int untilMonth, Day untilDay, Time untilTime) {
521             return 0;
522         }
523     }
524 
getTransitions(String zoneID, int startYear, int endYear)525     public TransitionList getTransitions(String zoneID, int startYear, int endYear) {
526         TransitionList results = new TransitionList();
527         List<ZoneLine> rules = zone_rules.get(zoneID);
528         ZoneLine lastZoneLine = null;
529         for (ZoneLine zoneLine : rules) {
530             results.addTransitions(lastZoneLine, zoneLine, startYear, endYear);
531             lastZoneLine = zoneLine;
532         }
533         return results;
534     }
535 
getTZIDComparator()536     public Comparator<String> getTZIDComparator() {
537         return TZIDComparator;
538     }
539 
540     private static List<String> errorData = Arrays.asList(new String[] {
541         new Double(Double.MIN_VALUE).toString(), new Double(Double.MIN_VALUE).toString(), "" });
542 
543     private Comparator<String> TZIDComparator = new Comparator<String>() {
544         Map<String, List<String>> data = getZoneData();
545 
546         public int compare(String s1, String s2) {
547             List<String> data1 = data.get(s1);
548             if (data1 == null)
549                 data1 = errorData;
550             List<String> data2 = data.get(s2);
551             if (data2 == null)
552                 data2 = errorData;
553 
554             int result;
555             // country
556             String country1 = (String) data1.get(2);
557             String country2 = (String) data2.get(2);
558 
559             if ((result = country1.compareTo(country2)) != 0)
560                 return result;
561             // longitude
562             Double d1 = Double.valueOf(data1.get(1));
563             Double d2 = Double.valueOf(data2.get(1));
564             if ((result = d1.compareTo(d2)) != 0)
565                 return result;
566             // latitude
567             d1 = Double.valueOf(data1.get(0));
568             d2 = Double.valueOf(data2.get(0));
569             if ((result = d1.compareTo(d2)) != 0)
570                 return result;
571             // name
572             return s1.compareTo(s2);
573         }
574     };
575 
576     public static MapComparator<String> regionalCompare = new MapComparator<String>();
577     static {
578         regionalCompare.add("America");
579         regionalCompare.add("Atlantic");
580         regionalCompare.add("Europe");
581         regionalCompare.add("Africa");
582         regionalCompare.add("Asia");
583         regionalCompare.add("Indian");
584         regionalCompare.add("Australia");
585         regionalCompare.add("Pacific");
586         regionalCompare.add("Arctic");
587         regionalCompare.add("Antarctica");
588         regionalCompare.add("Etc");
589     }
590 
591     private static String[] TZFiles = { "africa", "antarctica", "asia",
592         "australasia", "backward", "etcetera", "europe", "northamerica",
593         "pacificnew", "southamerica", "systemv" };
594 
595     private static Map<String, String> FIX_UNSTABLE_TZIDS;
596 
597     private static Set<String> SKIP_LINKS = new HashSet<String>(Arrays.asList(
598         new String[] {
599             "America/Montreal", "America/Toronto",
600             "America/Santa_Isabel", "America/Tijuana" }));
601 
602     private static Set<String> PREFERRED_BASES = new HashSet<String>(Arrays.asList(new String[] { "Europe/London" }));
603 
604     private static String[][] ADD_ZONE_ALIASES_DATA = {
605         { "Etc/UCT", "Etc/UTC" },
606 
607         { "EST", "Etc/GMT+5" },
608         { "MST", "Etc/GMT+7" },
609         { "HST", "Etc/GMT+10" },
610 
611         { "SystemV/AST4", "Etc/GMT+4" },
612         { "SystemV/EST5", "Etc/GMT+5" },
613         { "SystemV/CST6", "Etc/GMT+6" },
614         { "SystemV/MST7", "Etc/GMT+7" },
615         { "SystemV/PST8", "Etc/GMT+8" },
616         { "SystemV/YST9", "Etc/GMT+9" },
617         { "SystemV/HST10", "Etc/GMT+10" },
618     };
619 
620     static String[] FIX_DEPRECATED_ZONE_DATA = {
621         "Africa/Timbuktu",
622         "America/Argentina/ComodRivadavia",
623         "America/Santa_Isabel",
624         "Europe/Belfast",
625         "Pacific/Yap",
626         "Antarctica/South_Pole",
627         "America/Shiprock",
628         "America/Montreal",
629         "Asia/Chongqing",
630         "Asia/Harbin",
631         "Asia/Kashgar"
632     };
633     static {
634         // The format is <new name>, <old name>
635         String[][] FIX_UNSTABLE_TZID_DATA = new String[][] {
636             { "America/Atikokan", "America/Coral_Harbour" },
637             { "America/Argentina/Buenos_Aires", "America/Buenos_Aires" },
638             { "America/Argentina/Catamarca", "America/Catamarca" },
639             { "America/Argentina/Cordoba", "America/Cordoba" },
640             { "America/Argentina/Jujuy", "America/Jujuy" },
641             { "America/Argentina/Mendoza", "America/Mendoza" },
642             { "America/Kentucky/Louisville", "America/Louisville" },
643             { "America/Indiana/Indianapolis", "America/Indianapolis" },
644             { "Africa/Asmara", "Africa/Asmera" },
645             { "Atlantic/Faroe", "Atlantic/Faeroe" },
646             { "Asia/Kolkata", "Asia/Calcutta" },
647             { "Asia/Ho_Chi_Minh", "Asia/Saigon" },
648             { "Asia/Yangon", "Asia/Rangoon" },
649             { "Asia/Kathmandu", "Asia/Katmandu" },
650             { "Pacific/Pohnpei", "Pacific/Ponape" },
651             { "Pacific/Chuuk", "Pacific/Truk" },
652             { "Pacific/Honolulu", "Pacific/Johnston" }
653         };
654         FIX_UNSTABLE_TZIDS = CldrUtility.asMap(FIX_UNSTABLE_TZID_DATA);
655     }
656 
657     /**
658      *
659      */
makeZoneData()660     private void makeZoneData() {
661         try {
662             // get version
663             BufferedReader versionIn = CldrUtility.getUTF8Data("tzdb-version.txt");
664             version = versionIn.readLine();
665             if (!version.matches("[0-9]{4}[a-z]")) {
666                 throw new IllegalArgumentException(String.format("Bad Version number: %s, should be of the form 2007x",
667                     version));
668             }
669             versionIn.close();
670 
671             // String deg = "([+-][0-9]+)";//
672             String deg = "([+-])([0-9][0-9][0-9]?)([0-9][0-9])([0-9][0-9])?";//
673             Matcher m = PatternCache.get(deg + deg).matcher("");
674             zoneData = new TreeMap<String, List<String>>();
675             BufferedReader in = CldrUtility.getUTF8Data("zone.tab");
676             while (true) {
677                 String line = in.readLine();
678                 if (line == null)
679                     break;
680                 line = line.trim();
681                 int pos = line.indexOf('#');
682                 if (pos >= 0) {
683                     skippedAliases.add(line);
684                     line = line.substring(0, pos).trim();
685                 }
686                 if (line.length() == 0)
687                     continue;
688                 List<String> pieces = CldrUtility.splitList(line, '\t', true);
689                 String country = pieces.get(0);
690                 String latLong = pieces.get(1);
691                 String tzid = pieces.get(2);
692                 String ntzid = FIX_UNSTABLE_TZIDS.get(tzid);
693                 if (ntzid != null)
694                     tzid = ntzid;
695                 String comment = pieces.size() < 4 ? null : (String) pieces.get(3);
696                 pieces.clear();
697                 if (!m.reset(latLong).matches())
698                     throw new IllegalArgumentException("Bad zone.tab, lat/long format: "
699                         + line);
700 
701                 pieces.add(getDegrees(m, true).toString());
702                 pieces.add(getDegrees(m, false).toString());
703                 pieces.add(country);
704                 if (comment != null)
705                     pieces.add(comment);
706                 if (zoneData.containsKey(tzid))
707                     throw new IllegalArgumentException("Bad zone.tab, duplicate entry: "
708                         + line);
709                 zoneData.put(tzid, pieces);
710             }
711             in.close();
712             // add Etcs
713             for (int i = -14; i <= 12; ++i) {
714                 List<String> pieces = new ArrayList<String>();
715                 int latitude = 0;
716                 int longitude = i * 15;
717                 if (longitude <= -180) {
718                     longitude += 360;
719                 }
720                 pieces.add(new Double(latitude).toString()); // lat
721                 // remember that the sign of the TZIDs is wrong
722                 pieces.add(new Double(-longitude).toString()); // long
723                 pieces.add(StandardCodes.NO_COUNTRY); // country
724 
725                 zoneData.put("Etc/GMT" + (i == 0 ? "" : i < 0 ? "" + i : "+" + i),
726                     pieces);
727             }
728             // add Unknown / UTC
729             List<String> pieces = new ArrayList<String>();
730             pieces.add(new Double(0).toString()); // lat
731             pieces.add(new Double(0).toString()); // long
732             pieces.add(StandardCodes.NO_COUNTRY); // country
733             zoneData.put("Etc/Unknown", pieces);
734             zoneData.put("Etc/UTC", pieces);
735 
736             zoneData = CldrUtility.protectCollection(zoneData); // protect for later
737 
738             // now get links
739             Pattern whitespace = PatternCache.get("\\s+");
740             XEquivalenceClass<String, String> linkedItems = new XEquivalenceClass<String, String>("None");
741             for (int i = 0; i < TZFiles.length; ++i) {
742                 in = CldrUtility.getUTF8Data(TZFiles[i]);
743                 String zoneID = null;
744                 while (true) {
745                     String line = in.readLine();
746                     if (line == null)
747                         break;
748                     String originalLine = line;
749                     int commentPos = line.indexOf("#");
750                     String comment = null;
751                     if (commentPos >= 0) {
752                         comment = line.substring(commentPos + 1).trim();
753                         line = line.substring(0, commentPos);
754                     }
755                     line = line.trim();
756                     if (line.length() == 0)
757                         continue;
758                     String[] items = whitespace.split(line);
759                     if (zoneID != null || items[0].equals("Zone")) {
760                         List<String> l = new ArrayList<String>();
761                         l.addAll(Arrays.asList(items));
762 
763                         // Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 15 0:01
764                         // 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
765                         if (zoneID == null) {
766                             l.remove(0); // "Zone"
767                             zoneID = (String) l.get(0);
768                             String ntzid = (String) FIX_UNSTABLE_TZIDS.get(zoneID);
769                             if (ntzid != null)
770                                 zoneID = ntzid;
771                             l.remove(0);
772                         }
773                         List<ZoneLine> zoneRules = zone_rules.get(zoneID);
774                         if (zoneRules == null) {
775                             zoneRules = new ArrayList<ZoneLine>();
776                             zone_rules.put(zoneID, zoneRules);
777                         }
778 
779                         if (l.size() < ZoneLine.FIELD_COUNT
780                             || l.size() > ZoneLine.FIELD_COUNT_UNTIL) {
781                             System.out.println("***Zone incorrect field count:");
782                             System.out.println(l);
783                             System.out.println(originalLine);
784                         }
785 
786                         ZoneLine zoneLine = new ZoneLine(l);
787                         zoneLine.comment = comment;
788                         zoneRules.add(zoneLine);
789                         if (l.size() == ZoneLine.FIELD_COUNT) {
790                             zoneID = null; // no continuation line
791                         }
792                     } else if (items[0].equals("Rule")) {
793                         // # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
794                         // Rule Algeria 1916 only - Jun 14 23:00s 1:00 S
795 
796                         String ruleID = items[1];
797                         List<RuleLine> ruleList = ruleID_rules.get(ruleID);
798                         if (ruleList == null) {
799                             ruleList = new ArrayList<RuleLine>();
800                             ruleID_rules.put(ruleID, ruleList);
801                         }
802                         List<String> l = new ArrayList<String>();
803                         l.addAll(Arrays.asList(items));
804                         l.remove(0);
805                         l.remove(0);
806                         if (l.size() != RuleLine.FIELD_COUNT) {
807                             System.out.println("***Rule incorrect field count:");
808                             System.out.println(l);
809                         }
810                         if (comment != null)
811                             l.add(comment);
812                         RuleLine ruleLine = new RuleLine(l);
813                         ruleList.add(ruleLine);
814 
815                     } else if (items[0].equals("Link")) {
816                         String old = items[2];
817                         String newOne = items[1];
818                         if (!(SKIP_LINKS.contains(old) && SKIP_LINKS.contains(newOne))) {
819                             //System.out.println("Original " + old + "\t=>\t" + newOne);
820                             linkedItems.add(old, newOne);
821                         }
822                         /*
823                          * String conflict = (String) linkold_new.get(old); if (conflict !=
824                          * null) { System.out.println("Conflict with old: " + old + " => " +
825                          * conflict + ", " + newOne); } System.out.println(old + "\t=>\t" +
826                          * newOne); linkold_new.put(old, newOne);
827                          */
828                     } else {
829                         if (DEBUG)
830                             System.out.println("Unknown zone line: " + line);
831                     }
832                 }
833                 in.close();
834             }
835             // add in stuff that should be links
836             for (int i = 0; i < ADD_ZONE_ALIASES_DATA.length; ++i) {
837                 linkedItems.add(ADD_ZONE_ALIASES_DATA[i][0],
838                     ADD_ZONE_ALIASES_DATA[i][1]);
839             }
840 
841             Set<String> isCanonical = zoneData.keySet();
842 
843             // walk through the sets, and
844             // if any set contains two canonical items, split it.
845             // if any contains one, make it the primary
846             // if any contains zero, problem!
847             for (Set<String> equivalents : linkedItems.getEquivalenceSets()) {
848                 Set<String> canonicals = new TreeSet<String>(equivalents);
849                 canonicals.retainAll(isCanonical);
850                 if (canonicals.size() == 0)
851                     throw new IllegalArgumentException("No canonicals in: " + equivalents);
852                 if (canonicals.size() > 1) {
853                     if (DEBUG) {
854                         System.out.println("Too many canonicals in: " + equivalents);
855                         System.out
856                             .println("\t*Don't* put these into the same equivalence class: "
857                                 + canonicals);
858                     }
859                     Set<String> remainder = new TreeSet<String>(equivalents);
860                     remainder.removeAll(isCanonical);
861                     if (remainder.size() != 0) {
862                         if (DEBUG) {
863                             System.out
864                                 .println("\tThe following should be equivalent to others: "
865                                     + remainder);
866                         }
867                     }
868                 }
869                 {
870                     String newOne;
871                     // get the item that we want to hang all the aliases off of.
872                     // normally this is the first (alphabetically) one, but
873                     // it may be overridden with PREFERRED_BASES
874                     Set<String> preferredItems = new HashSet<String>(PREFERRED_BASES);
875                     preferredItems.retainAll(canonicals);
876                     if (preferredItems.size() > 0) {
877                         newOne = preferredItems.iterator().next();
878                     } else {
879                         newOne = canonicals.iterator().next();
880                     }
881                     for (String oldOne : equivalents) {
882                         if (canonicals.contains(oldOne))
883                             continue;
884                         // System.out.println("Mapping " + oldOne + "\t=>\t" + newOne);
885                         linkold_new.put(oldOne, newOne);
886                     }
887                 }
888             }
889 
890             /*
891              * // fix the links from old to new, to remove chains for (Iterator it =
892              * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem =
893              * it.next(); Object newItem = linkold_new.get(oldItem); while (true) {
894              * Object linkItem = linkold_new.get(newItem); if (linkItem == null)
895              * break; if (true) System.out.println("Connecting link chain: " + oldItem +
896              * "\t=> " + newItem + "\t=> " + linkItem); newItem = linkItem;
897              * linkold_new.put(oldItem, newItem); } }
898              * // reverse the links *from* canonical names for (Iterator it =
899              * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem =
900              * it.next(); if (!isCanonical.contains(oldItem)) continue; Object newItem =
901              * linkold_new.get(oldItem); }
902              *
903              * // fix unstable TZIDs Set itemsToRemove = new HashSet(); Map
904              * itemsToAdd = new HashMap(); for (Iterator it =
905              * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem =
906              * it.next(); Object newItem = linkold_new.get(oldItem); Object modOldItem =
907              * RESTORE_UNSTABLE_TZIDS.get(oldItem); Object modNewItem =
908              * FIX_UNSTABLE_TZIDS.get(newItem); if (modOldItem == null && modNewItem ==
909              * null) continue; if (modOldItem == null) { // just fix old entry
910              * itemsToAdd.put(oldItem, modNewItem); continue; } // otherwise have to
911              * nuke and redo itemsToRemove.add(oldItem); if (modNewItem == null)
912              * modNewItem = newItem; itemsToAdd.put(modOldItem, modNewItem); } // now
913              * make fixes (we couldn't earlier because we were iterating
914              * Utility.removeAll(linkold_new, itemsToRemove);
915              * linkold_new.putAll(itemsToAdd);
916              * // now remove all links that are from canonical zones
917              * Utility.removeAll(linkold_new, zoneData.keySet());
918              */
919 
920             // generate list of new to old
921             for (Iterator<String> it = linkold_new.keySet().iterator(); it.hasNext();) {
922                 String oldZone = it.next();
923                 String newZone = linkold_new.get(oldZone);
924                 Set<String> s = linkNew_oldSet.get(newZone);
925                 if (s == null)
926                     linkNew_oldSet.put(newZone, s = new HashSet<String>());
927                 s.add(oldZone);
928             }
929 
930             // PROTECT EVERYTHING
931             linkNew_oldSet = CldrUtility.protectCollection(linkNew_oldSet);
932             linkold_new = CldrUtility.protectCollection(linkold_new);
933             ruleID_rules = CldrUtility.protectCollection(ruleID_rules);
934             zone_rules = CldrUtility.protectCollection(zone_rules);
935             // TODO protect zone info later
936         } catch (IOException e) {
937             throw new ICUUncheckedIOException(
938                 "Can't find timezone aliases: " + e.toString(), e);
939         }
940     }
941 
942     /**
943      * @param m
944      */
945     private Double getDegrees(Matcher m, boolean lat) {
946         int startIndex = lat ? 1 : 5;
947         double amount = Integer.parseInt(m.group(startIndex + 1))
948             + Integer.parseInt(m.group(startIndex + 2)) / 60.0;
949         if (m.group(startIndex + 3) != null)
950             amount += Integer.parseInt(m.group(startIndex + 3)) / 3600.0;
951         if (m.group(startIndex).equals("-"))
952             amount = -amount;
953         return new Double(amount);
954     }
955 
956     /**
957      * @return Returns the linkold_new.
958      */
959     public Map<String, String> getZoneLinkold_new() {
960         getZoneData();
961         return linkold_new;
962     }
963 
964     /**
965      * @return Returns the linkold_new.
966      */
967     public Map<String, Set<String>> getZoneLinkNew_OldSet() {
968         getZoneData();
969         return linkNew_oldSet;
970     }
971 
972     /**
973      * @return Returns the ruleID_rules.
974      */
975     public Map<String, List<RuleLine>> getZoneRuleID_rules() {
976         getZoneData();
977         return ruleID_rules;
978     }
979 
980     /**
981      * @return Returns the zone_rules.
982      */
983     public Map<String, List<ZoneLine>> getZone_rules() {
984         getZoneData();
985         return zone_rules;
986     }
987 
988     public String getVersion() {
989         return version;
990     }
991 
992 }