1 package org.unicode.cldr.tool;
2 
3 import java.io.IOException;
4 import java.io.PrintWriter;
5 import java.util.Arrays;
6 import java.util.HashMap;
7 import java.util.List;
8 import java.util.Map;
9 import java.util.Map.Entry;
10 import java.util.Set;
11 import java.util.TreeSet;
12 
13 import org.unicode.cldr.draft.FileUtilities;
14 import org.unicode.cldr.tool.SubdivisionNode.SubDivisionExtractor;
15 import org.unicode.cldr.tool.SubdivisionNode.SubdivisionSet;
16 import org.unicode.cldr.util.CLDRPaths;
17 import org.unicode.cldr.util.StandardCodes.LstrType;
18 import org.unicode.cldr.util.SupplementalDataInfo;
19 import org.unicode.cldr.util.Validity;
20 import org.unicode.cldr.util.Validity.Status;
21 
22 import com.google.common.collect.HashMultimap;
23 import com.google.common.collect.Multimap;
24 import com.ibm.icu.impl.Relation;
25 import com.ibm.icu.impl.Row.R2;
26 
27 public class GenerateSubdivisions {
28     private static final String ISO_COUNTRY_CODES = CLDRPaths.CLDR_PRIVATE_DIRECTORY + "iso_country_codes/";
29     static final String ISO_SUBDIVISION_CODES = ISO_COUNTRY_CODES + "iso_country_codes.xml";
30 
31 
32     // TODO: consider whether to use the last archive directory to generate
33     // There are pros and cons.
34     // Pros are that we don't introduce "fake" deprecated elements that are introduced and deprecated during the 6 month CLDR cycle
35     // Cons are that we may have to repeat work
36 
37 
38     static final class SubdivisionInfo {
39         static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/supplemental/");
40 
41         static final Map<String, R2<List<String>, String>> SUBDIVISION_ALIASES_FORMER = SDI.getLocaleAliasInfo().get("subdivision");
42 
43         static final SubdivisionNames SUBDIVISION_NAMES_ENGLISH_FORMER = new SubdivisionNames("en");
44 
45         static final Validity VALIDITY_FORMER = Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/");
46 
47         static final Relation<String, String> formerRegionToSubdivisions = Relation.of(new HashMap<String, Set<String>>(), TreeSet.class, SubdivisionNode.ROOT_COL);
48         static {
49             Map<Status, Set<String>> oldSubdivisionData = VALIDITY_FORMER.getStatusToCodes(LstrType.subdivision);
50             for (Entry<Status, Set<String>> e : oldSubdivisionData.entrySet()) {
51                 final Status status = e.getKey();
52                 if (status != Status.unknown) { // special is a hack
53                     for (String sdCode : e.getValue()) {
54                         final String region = SubdivisionNames.getRegionFromSubdivision(sdCode);
formerRegionToSubdivisions.put(region, sdCode)55                         formerRegionToSubdivisions.put(region, sdCode);
56                     }
57                 }
58             }
formerRegionToSubdivisions.freeze()59             formerRegionToSubdivisions.freeze();
60         }
61 
62         static final Multimap<String, String> subdivisionIdToOld = HashMultimap.create();
63         static {
64             for (Entry<String, R2<List<String>, String>> entry : SUBDIVISION_ALIASES_FORMER.entrySet()) {
65                 String oldId = entry.getKey();
66                 for (String newId : entry.getValue().get0()) {
subdivisionIdToOld.put(newId, oldId)67                     subdivisionIdToOld.put(newId, oldId);
68                 }
69             }
70         }
71     }
72 
main(String[] args)73     public static void main(String[] args) throws IOException {
74         // TODO Restructure so that this call is done first to process the iso data
75         // then the extraction uses that data.
76         // also restructure the SubdivisionInfo to not be static
77         boolean preprocess = args.length > 0;
78         if (preprocess) {
79             for (String source : Arrays.asList(
80                 "2015-05-04_iso_country_code_ALL_xml",
81                 "2016-01-13_iso_country_code_ALL_xml",
82                 "2016-12-09_iso_country_code_ALL_xml",
83                 "2017-02-12_iso_country_code_ALL_xml",
84                 "2017-09-15_iso_country_code_ALL_xml",
85                 "2018-02-20_iso_country_code_ALL_xml",
86                 "2018-09-02_iso_country_code_ALL_xml")) {
87                 SubdivisionSet sdset1 = new SubdivisionSet(CLDRPaths.CLDR_PRIVATE_DIRECTORY + source + "/iso_country_codes.xml");
88                 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/" + source + ".txt")) {
89                     sdset1.print(pw);
90                 }
91             }
92             return;
93         }
94 
95         SubdivisionSet sdset1 = new SubdivisionSet(GenerateSubdivisions.ISO_SUBDIVISION_CODES);
96         SubDivisionExtractor sdset = new SubDivisionExtractor(sdset1,
97             SubdivisionInfo.VALIDITY_FORMER,
98             SubdivisionInfo.SUBDIVISION_ALIASES_FORMER,
99             SubdivisionInfo.formerRegionToSubdivisions);
100 
101         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/subdivisions.xml")) {
102             sdset.printXml(pw);
103         }
104         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/subdivisionAliases.txt")) {
105             sdset.printAliases(pw);
106         }
107         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en.xml")) {
108             sdset.printEnglish(pw);
109         }
110         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/categories.txt")) {
111             sdset.printSamples(pw);
112         }
113         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en.txt")) {
114             sdset.printEnglishComp(pw);
115         }
116         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en-full.txt")) {
117             sdset.printEnglishCompFull(pw);
118         }
119         try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/missing-mid.txt")) {
120             sdset.printMissingMIDs(pw);
121         }
122     }
123 
124 }
125