1 package org.unicode.cldr.test;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.util.Arrays;
6 import java.util.Collections;
7 import java.util.HashMap;
8 import java.util.Iterator;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Objects;
12 import java.util.Set;
13 import java.util.TreeMap;
14 import java.util.TreeSet;
15 import java.util.regex.Matcher;
16 
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRPaths;
19 import org.unicode.cldr.util.CldrUtility;
20 import org.unicode.cldr.util.Factory;
21 import org.unicode.cldr.util.Pair;
22 import org.unicode.cldr.util.PatternCache;
23 import org.unicode.cldr.util.StandardCodes;
24 import org.unicode.cldr.util.SupplementalDataInfo;
25 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
26 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
27 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
28 import org.unicode.cldr.util.XPathParts;
29 
30 import com.ibm.icu.impl.Relation;
31 
32 public class TestSupplementalData {
33     static CLDRFile english;
34     private static SupplementalDataInfo supplementalData;
35     private static StandardCodes sc;
36 
main(String[] args)37     public static void main(String[] args) throws IOException {
38         // genData();
39         // if (true) return;
40         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
41         english = cldrFactory.make("en", true);
42         root = cldrFactory.make("root", true);
43         supplementalData = SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
44         sc = StandardCodes.make();
45 
46         showMultiZones();
47         checkPlurals();
48 
49         System.out.println("Skipped Elements: " + supplementalData.getSkippedElements());
50         checkAgainstLanguageScript();
51         checkTerritoryMapping();
52 
53         checkTelephoneCodeData();
54     }
55 
showMultiZones()56     private static void showMultiZones() {
57         // reverse the list
58         Relation<String, String> territoryToZones = Relation.<String, String> of(new TreeMap<String, Set<String>>(), TreeSet.class);
59         for (String zone : supplementalData.getCanonicalZones()) {
60             territoryToZones.put(supplementalData.getZone_territory(zone), zone);
61         }
62         // gather the data
63         // this could be slightly simpler using supplementalData.get
64         Set<String> singulars = new TreeSet<>();
65         for (String region : territoryToZones.keySet()) {
66             final Set<String> zones = territoryToZones.getAll(region);
67             if (zones.size() == 1 || region.equals("001")) {
68                 singulars.addAll(zones);
69                 continue;
70             }
71             System.out.println(region + "\t" + english.getName("territory", region));
72             System.out.println("\t" + zones);
73         }
74         XPathParts xpp = XPathParts.getFrozenInstance(root.getFullXPath("//ldml/dates/timeZoneNames/singleCountries"));
75         List<String> singleCountries = Arrays.asList(xpp.getAttributeValue(-1, "list").split("\\s+"));
76         singulars.addAll(singleCountries);
77         singulars.remove("Etc/Unknown"); // remove special case
78         System.out.println("Excluded Zones (not necessary in Survey tool): " + singulars);
79         Set<String> otherExclusions = root.getExcludedZones();
80         if (!otherExclusions.equals(singulars)) {
81             throw new IllegalArgumentException("problem with excluded zones");
82         }
83         for (Iterator<String> it = english.iterator("//ldml/dates/timeZoneNames/zone"); it.hasNext();) {
84             String distinguishedPath = it.next();
85             if (root.isPathExcludedForSurvey(distinguishedPath)) {
86                 System.out.println("EX\t" + distinguishedPath);
87             } else {
88                 System.out.println("\t" + distinguishedPath);
89             }
90         }
91     }
92 
checkPlurals()93     private static void checkPlurals() {
94         Relation<PluralInfo, String> pluralsToLocale = Relation.<PluralInfo, String> of(new HashMap<PluralInfo, Set<String>>(), TreeSet.class);
95         for (String locale : new TreeSet<>(supplementalData.getPluralLocales())) {
96             PluralInfo pluralInfo = supplementalData.getPlurals(locale);
97             System.out.println(locale + ":\t" + pluralInfo);
98             pluralsToLocale.put(pluralInfo, locale);
99         }
100         String locale = "en_US";
101         PluralInfo pluralInfo = supplementalData.getPlurals(locale);
102         System.out.println(locale + ":\t" + pluralInfo);
103 
104         for (PluralInfo pluralInfo2 : pluralsToLocale.keySet()) {
105             System.out.println("Locales: \t" + pluralsToLocale.getAll(pluralInfo2));
106             final Map<Count, String> typeToExamples = pluralInfo2.getCountToStringExamplesMap();
107             for (Count type : typeToExamples.keySet()) {
108                 System.out.println("\tPlural Code: \t" + type + " \t=>\t" + typeToExamples.get(type));
109             }
110             System.out.println();
111         }
112 
113     }
114 
checkTelephoneCodeData()115     private static void checkTelephoneCodeData() {
116         System.out.println("==== territories for telephoneCodeData ====");
117         System.out.println(supplementalData.getTerritoriesForTelephoneCodeInfo());
118         System.out.println("==== telephone code data for 001 ====");
119         System.out.println(supplementalData.getTelephoneCodeInfoForTerritory("001"));
120         System.out.println("==== telephone code data for US ====");
121         System.out.println(supplementalData.getTelephoneCodeInfoForTerritory("US"));
122         System.out.println("==== all telephoneCodeData ====");
123         System.out.println(supplementalData.getTerritoryToTelephoneCodeInfo());
124     }
125 
126     static Matcher numericTerritory = PatternCache.get("[0-9]{3}").matcher("");
127     private static CLDRFile root;
128 
checkTerritoryMapping()129     private static void checkTerritoryMapping() {
130         Relation<String, String> alpha3 = supplementalData.getAlpha3TerritoryMapping();
131         Set<String> temp = new TreeSet<>(sc.getAvailableCodes("territory"));
132         for (Iterator<String> it = temp.iterator(); it.hasNext();) {
133             String code = it.next();
134             if (numericTerritory.reset(code).matches()) {
135                 it.remove();
136                 continue;
137             }
138             // if (sc.getFullData("territory", code).get(0).equals("PRIVATE USE")) {
139             // it.remove();
140             // continue;
141             // }
142         }
143         showAnyDifferences("alpha3", alpha3.keySet(), "sc", temp);
144     }
145 
showAnyDifferences(String title, Set<String> set, String title2, Set<String> set2)146     private static void showAnyDifferences(String title, Set<String> set, String title2, Set<String> set2) {
147         if (!set.equals(set2)) {
148             showFirstMinusSecond("Failure " + title + "-" + title2 + ": ", set, set2);
149             showFirstMinusSecond("Failure " + title2 + "-" + title + ": ", set2, set);
150         }
151     }
152 
showFirstMinusSecond(String title, Set<String> name, Set<String> availableCodes)153     private static void showFirstMinusSecond(String title, Set<String> name, Set<String> availableCodes) {
154         Set<String> temp = getFirstMinusSecond(name, availableCodes);
155         if (!temp.isEmpty()) {
156             System.out.println(title + getFirstMinusSecond(name, availableCodes));
157         }
158     }
159 
getFirstMinusSecond(Set<String> name, Set<String> availableCodes)160     private static Set<String> getFirstMinusSecond(Set<String> name, Set<String> availableCodes) {
161         Set<String> temp = new TreeSet<>(name);
162         temp.removeAll(availableCodes);
163         return temp;
164     }
165 
checkAgainstLanguageScript()166     static void checkAgainstLanguageScript() {
167         Relation<String, String> otherTerritoryToLanguages = Relation.<String, String> of(new TreeMap<String, Set<String>>(), TreeSet.class, null);
168         // get other language data
169         for (String language : sc.getGoodAvailableCodes("language")) {
170             Set<BasicLanguageData> newLanguageData = supplementalData.getBasicLanguageData(language);
171             if (newLanguageData != null) {
172                 for (BasicLanguageData languageData : newLanguageData) {
173                     Set<String> territories = new TreeSet<>(languageData.getTerritories());
174                     territories.addAll(languageData.getTerritories());
175                     if (territories != null) {
176                         Set<String> scripts = new TreeSet<>(languageData.getScripts());
177                         scripts.addAll(languageData.getScripts());
178                         if (scripts == null || scripts.size() < 2) {
179                             otherTerritoryToLanguages.putAll(territories, language);
180                         } else {
181                             for (String script : scripts) {
182                                 otherTerritoryToLanguages.putAll(territories, language + "_" + script);
183                             }
184                         }
185                     }
186                 }
187             }
188         }
189         // compare them, listing differences
190         for (String territory : sc.getGoodAvailableCodes("territory")) {
191             Set<String> languages = supplementalData.getTerritoryToLanguages(territory);
192             Set<String> otherLanguages = otherTerritoryToLanguages.getAll(territory);
193             if (otherLanguages == null) otherLanguages = Collections.emptySet();
194             if (!Objects.equals(languages, otherLanguages)) {
195                 Set<String> languagesLeftover = new TreeSet<>(languages);
196                 languagesLeftover.removeAll(otherLanguages);
197                 Set<String> otherLanguagesLeftover = new TreeSet<>(otherLanguages);
198                 otherLanguagesLeftover.removeAll(languages);
199                 String territoryString = english.getName(CLDRFile.TERRITORY_NAME, territory);
200                 if (otherLanguagesLeftover.size() != 0) {
201                     for (String other : otherLanguagesLeftover) {
202                         String name = english.getName(other);
203                         System.out.println(territoryString + "\t" + territory + "\t" + name + "\t" + other);
204                     }
205                 }
206             }
207         }
208     }
209 
210     /**
211      * Temporary function to transform data
212      *
213      * @throws IOException
214      */
genData()215     public static void genData() throws IOException {
216         BufferedReader codes = CldrUtility.getUTF8Data("territory_codes.txt");
217         Set<Pair> sorted = new TreeSet<>();
218         while (true) {
219             String line = codes.readLine();
220             if (line == null)
221                 break;
222             line = line.split("#")[0].trim();
223             if (line.length() == 0)
224                 continue;
225             String[] sourceValues = line.split("\\s+");
226             String[] values = new String[5];
227             for (int i = 0; i < values.length; ++i) {
228                 if (i >= sourceValues.length || sourceValues[i].equals("-"))
229                     values[i] = null;
230                 else
231                     values[i] = sourceValues[i];
232             }
233             String alpha2 = values[0];
234             String numeric = values[1];
235             String alpha3 = values[2];
236             String internet = values[3];
237             if (internet != null) {
238                 internet = internet.replace("/", " ");
239             }
240             if (internet != null)
241                 internet = internet.toUpperCase();
242             String fips10 = values[4];
243             Pair item = new Pair(alpha2, new Pair(numeric, new Pair(alpha3, new Pair(fips10, internet))));
244             sorted.add(item);
245         }
246         for (Pair item : sorted) {
247             // <territoryCodes type="CM" numeric="120" alpha3="CMR"/>
248             System.out.print("<territoryCodes");
249             Comparable first = item.getFirst();
250             showNonNull("type", first, null);
251             item = (Pair) item.getSecond();
252             showNonNull("numeric", item.getFirst(), null);
253             item = (Pair) item.getSecond();
254             showNonNull("alpha3", item.getFirst(), null);
255             item = (Pair) item.getSecond();
256             showNonNull("fips10", item.getFirst(), first);
257             showNonNull("internet", item.getSecond(), first);
258             System.out.println("/>");
259         }
260         codes.close();
261     }
262 
showNonNull(String title, Object first, Object noDup)263     private static void showNonNull(String title, Object first, Object noDup) {
264         if (first != null && !first.equals(noDup)) {
265             System.out.print(" " + title + "=\"" + first + "\"");
266         }
267     }
268 }