1 package org.unicode.cldr.test;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.util.Arrays;
6 import java.util.Collections;
7 import java.util.HashMap;
8 import java.util.Iterator;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Objects;
12 import java.util.Set;
13 import java.util.TreeMap;
14 import java.util.TreeSet;
15 import java.util.regex.Matcher;
16 
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRPaths;
19 import org.unicode.cldr.util.CldrUtility;
20 import org.unicode.cldr.util.Factory;
21 import org.unicode.cldr.util.Pair;
22 import org.unicode.cldr.util.PatternCache;
23 import org.unicode.cldr.util.StandardCodes;
24 import org.unicode.cldr.util.SupplementalDataInfo;
25 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
26 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
27 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
28 import org.unicode.cldr.util.XPathParts;
29 
30 import com.ibm.icu.impl.Relation;
31 
32 public class TestSupplementalData {
33     static CLDRFile english;
34     private static SupplementalDataInfo supplementalData;
35     private static StandardCodes sc;
36 
main(String[] args)37     public static void main(String[] args) throws IOException {
38         // genData();
39         // if (true) return;
40         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
41         english = cldrFactory.make("en", true);
42         root = cldrFactory.make("root", true);
43         supplementalData = SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
44         sc = StandardCodes.make();
45 
46         showMultiZones();
47         checkPlurals();
48 
49         System.out.println("Skipped Elements: " + supplementalData.getSkippedElements());
50         checkAgainstLanguageScript();
51         checkTerritoryMapping();
52 
53         checkTelephoneCodeData();
54     }
55 
showMultiZones()56     private static void showMultiZones() {
57         // reverse the list
58         Relation<String, String> territoryToZones = Relation.<String, String> of(new TreeMap<String, Set<String>>(), TreeSet.class);
59         for (String zone : supplementalData.getCanonicalZones()) {
60             territoryToZones.put(supplementalData.getZone_territory(zone), zone);
61         }
62         // gather the data
63         // this could be slightly simpler using supplementalData.get
64         Set<String> singulars = new TreeSet<String>();
65         for (String region : territoryToZones.keySet()) {
66             final Set<String> zones = territoryToZones.getAll(region);
67             if (zones.size() == 1 || region.equals("001")) {
68                 singulars.addAll(zones);
69                 continue;
70             }
71             System.out.println(region + "\t" + english.getName("territory", region));
72             System.out.println("\t" + zones);
73         }
74         List<String> singleCountries = Arrays.asList(
75             new XPathParts()
76                 .set(root.getFullXPath("//ldml/dates/timeZoneNames/singleCountries"))
77                 .getAttributeValue(-1, "list")
78                 .split("\\s+"));
79         singulars.addAll(singleCountries);
80         singulars.remove("Etc/Unknown"); // remove special case
81         System.out.println("Excluded Zones (not necessary in Survey tool): " + singulars);
82         Set<String> otherExclusions = root.getExcludedZones();
83         if (!otherExclusions.equals(singulars)) {
84             throw new IllegalArgumentException("problem with excluded zones");
85         }
86         for (Iterator<String> it = english.iterator("//ldml/dates/timeZoneNames/zone"); it.hasNext();) {
87             String distinguishedPath = it.next();
88             if (root.isPathExcludedForSurvey(distinguishedPath)) {
89                 System.out.println("EX\t" + distinguishedPath);
90             } else {
91                 System.out.println("\t" + distinguishedPath);
92             }
93         }
94     }
95 
checkPlurals()96     private static void checkPlurals() {
97         Relation<PluralInfo, String> pluralsToLocale = Relation.<PluralInfo, String> of(new HashMap<PluralInfo, Set<String>>(), TreeSet.class);
98         for (String locale : new TreeSet<String>(supplementalData.getPluralLocales())) {
99             PluralInfo pluralInfo = supplementalData.getPlurals(locale);
100             System.out.println(locale + ":\t" + pluralInfo);
101             pluralsToLocale.put(pluralInfo, locale);
102         }
103         String locale = "en_US";
104         PluralInfo pluralInfo = supplementalData.getPlurals(locale);
105         System.out.println(locale + ":\t" + pluralInfo);
106 
107         for (PluralInfo pluralInfo2 : pluralsToLocale.keySet()) {
108             System.out.println("Locales: \t" + pluralsToLocale.getAll(pluralInfo2));
109             final Map<Count, String> typeToExamples = pluralInfo2.getCountToStringExamplesMap();
110             for (Count type : typeToExamples.keySet()) {
111                 System.out.println("\tPlural Code: \t" + type + " \t=>\t" + typeToExamples.get(type));
112             }
113             System.out.println();
114         }
115 
116     }
117 
checkTelephoneCodeData()118     private static void checkTelephoneCodeData() {
119         System.out.println("==== territories for telephoneCodeData ====");
120         System.out.println(supplementalData.getTerritoriesForTelephoneCodeInfo());
121         System.out.println("==== telephone code data for 001 ====");
122         System.out.println(supplementalData.getTelephoneCodeInfoForTerritory("001"));
123         System.out.println("==== telephone code data for US ====");
124         System.out.println(supplementalData.getTelephoneCodeInfoForTerritory("US"));
125         System.out.println("==== all telephoneCodeData ====");
126         System.out.println(supplementalData.getTerritoryToTelephoneCodeInfo());
127     }
128 
129     static Matcher numericTerritory = PatternCache.get("[0-9]{3}").matcher("");
130     private static CLDRFile root;
131 
checkTerritoryMapping()132     private static void checkTerritoryMapping() {
133         Relation<String, String> alpha3 = supplementalData.getAlpha3TerritoryMapping();
134         Set<String> temp = new TreeSet<String>(sc.getAvailableCodes("territory"));
135         for (Iterator<String> it = temp.iterator(); it.hasNext();) {
136             String code = it.next();
137             if (numericTerritory.reset(code).matches()) {
138                 it.remove();
139                 continue;
140             }
141             // if (sc.getFullData("territory", code).get(0).equals("PRIVATE USE")) {
142             // it.remove();
143             // continue;
144             // }
145         }
146         showAnyDifferences("alpha3", alpha3.keySet(), "sc", temp);
147     }
148 
showAnyDifferences(String title, Set<String> set, String title2, Set<String> set2)149     private static void showAnyDifferences(String title, Set<String> set, String title2, Set<String> set2) {
150         if (!set.equals(set2)) {
151             showFirstMinusSecond("Failure " + title + "-" + title2 + ": ", set, set2);
152             showFirstMinusSecond("Failure " + title2 + "-" + title + ": ", set2, set);
153         }
154     }
155 
showFirstMinusSecond(String title, Set<String> name, Set<String> availableCodes)156     private static void showFirstMinusSecond(String title, Set<String> name, Set<String> availableCodes) {
157         Set<String> temp = getFirstMinusSecond(name, availableCodes);
158         if (!temp.isEmpty()) {
159             System.out.println(title + getFirstMinusSecond(name, availableCodes));
160         }
161     }
162 
getFirstMinusSecond(Set<String> name, Set<String> availableCodes)163     private static Set<String> getFirstMinusSecond(Set<String> name, Set<String> availableCodes) {
164         Set<String> temp = new TreeSet<String>(name);
165         temp.removeAll(availableCodes);
166         return temp;
167     }
168 
checkAgainstLanguageScript()169     static void checkAgainstLanguageScript() {
170         Relation<String, String> otherTerritoryToLanguages = Relation.<String, String> of(new TreeMap<String, Set<String>>(), TreeSet.class, null);
171         // get other language data
172         for (String language : sc.getGoodAvailableCodes("language")) {
173             Set<BasicLanguageData> newLanguageData = supplementalData.getBasicLanguageData(language);
174             if (newLanguageData != null) {
175                 for (BasicLanguageData languageData : newLanguageData) {
176                     Set<String> territories = new TreeSet<String>(languageData.getTerritories());
177                     territories.addAll(languageData.getTerritories());
178                     if (territories != null) {
179                         Set<String> scripts = new TreeSet<String>(languageData.getScripts());
180                         scripts.addAll(languageData.getScripts());
181                         if (scripts == null || scripts.size() < 2) {
182                             otherTerritoryToLanguages.putAll(territories, language);
183                         } else {
184                             for (String script : scripts) {
185                                 otherTerritoryToLanguages.putAll(territories, language + "_" + script);
186                             }
187                         }
188                     }
189                 }
190             }
191         }
192         // compare them, listing differences
193         for (String territory : sc.getGoodAvailableCodes("territory")) {
194             Set<String> languages = supplementalData.getTerritoryToLanguages(territory);
195             Set<String> otherLanguages = otherTerritoryToLanguages.getAll(territory);
196             if (otherLanguages == null) otherLanguages = Collections.emptySet();
197             if (!Objects.equals(languages, otherLanguages)) {
198                 Set<String> languagesLeftover = new TreeSet<String>(languages);
199                 languagesLeftover.removeAll(otherLanguages);
200                 Set<String> otherLanguagesLeftover = new TreeSet<String>(otherLanguages);
201                 otherLanguagesLeftover.removeAll(languages);
202                 String territoryString = english.getName(CLDRFile.TERRITORY_NAME, territory);
203                 if (otherLanguagesLeftover.size() != 0) {
204                     for (String other : otherLanguagesLeftover) {
205                         String name = english.getName(other);
206                         System.out.println(territoryString + "\t" + territory + "\t" + name + "\t" + other);
207                     }
208                 }
209             }
210         }
211     }
212 
213     /**
214      * Temporary function to transform data
215      *
216      * @throws IOException
217      */
genData()218     public static void genData() throws IOException {
219         BufferedReader codes = CldrUtility.getUTF8Data("territory_codes.txt");
220         Set<Pair> sorted = new TreeSet<Pair>();
221         while (true) {
222             String line = codes.readLine();
223             if (line == null)
224                 break;
225             line = line.split("#")[0].trim();
226             if (line.length() == 0)
227                 continue;
228             String[] sourceValues = line.split("\\s+");
229             String[] values = new String[5];
230             for (int i = 0; i < values.length; ++i) {
231                 if (i >= sourceValues.length || sourceValues[i].equals("-"))
232                     values[i] = null;
233                 else
234                     values[i] = sourceValues[i];
235             }
236             String alpha2 = values[0];
237             String numeric = values[1];
238             String alpha3 = values[2];
239             String internet = values[3];
240             if (internet != null) {
241                 internet = internet.replace("/", " ");
242             }
243             if (internet != null)
244                 internet = internet.toUpperCase();
245             String fips10 = values[4];
246             Pair item = new Pair(alpha2, new Pair(numeric, new Pair(alpha3, new Pair(fips10, internet))));
247             sorted.add(item);
248         }
249         for (Pair item : sorted) {
250             // <territoryCodes type="CM" numeric="120" alpha3="CMR"/>
251             System.out.print("<territoryCodes");
252             Comparable first = item.getFirst();
253             showNonNull("type", first, null);
254             item = (Pair) item.getSecond();
255             showNonNull("numeric", item.getFirst(), null);
256             item = (Pair) item.getSecond();
257             showNonNull("alpha3", item.getFirst(), null);
258             item = (Pair) item.getSecond();
259             showNonNull("fips10", item.getFirst(), first);
260             showNonNull("internet", item.getSecond(), first);
261             System.out.println("/>");
262         }
263         codes.close();
264     }
265 
showNonNull(String title, Object first, Object noDup)266     private static void showNonNull(String title, Object first, Object noDup) {
267         if (first != null && !first.equals(noDup)) {
268             System.out.print(" " + title + "=\"" + first + "\"");
269         }
270     }
271 }