1 package org.unicode.cldr.tool;
2 
3 import java.io.PrintWriter;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 import java.util.Comparator;
7 import java.util.HashSet;
8 import java.util.Iterator;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Random;
12 import java.util.Set;
13 import java.util.TreeMap;
14 import java.util.TreeSet;
15 
16 import org.unicode.cldr.draft.FileUtilities;
17 import org.unicode.cldr.util.ArrayComparator;
18 import org.unicode.cldr.util.CLDRFile;
19 import org.unicode.cldr.util.CLDRPaths;
20 import org.unicode.cldr.util.Factory;
21 import org.unicode.cldr.util.Level;
22 import org.unicode.cldr.util.Organization;
23 import org.unicode.cldr.util.StandardCodes;
24 import org.unicode.cldr.util.SupplementalDataInfo;
25 import org.unicode.cldr.util.XPathParts;
26 
27 import com.ibm.icu.text.BreakIterator;
28 import com.ibm.icu.text.Collator;
29 import com.ibm.icu.text.NumberFormat;
30 import com.ibm.icu.text.RuleBasedCollator;
31 import com.ibm.icu.text.UTF16;
32 import com.ibm.icu.text.UnicodeSet;
33 import com.ibm.icu.util.ULocale;
34 
35 public class GenerateG2xG2 {
36     static CLDRFile english;
37     static CLDRFile root;
38 
main(String[] args)39     public static void main(String[] args) throws Exception {
40         if (showLocales(-1)) return;
41         // showCollator();
42 
43         String sourceLanguage = "G5";
44         String targetLanguage = "G5";
45         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
46         english = cldrFactory.make("en", true);
47         root = cldrFactory.make("root", true);
48         StandardCodes sc = StandardCodes.make();
49         Map<Organization, Map<String, Level>> type_code_value = sc.getLocaleTypes();
50         Set<String> sourceSet = new TreeSet<>();
51         Set<String> targetLanguageSet = new TreeSet<>();
52         targetLanguageSet.add("no");
53         addPriority("G2", "nn");
54         addPriority("G2", "no");
55         targetLanguageSet.add("nn");
56         Set<String> targetScriptSet = new TreeSet<>();
57         Set<String> targetRegionSet = new TreeSet<>();
58         Set<String> targetTZSet = new TreeSet<>();
59         Set<String> targetCurrencySet = new TreeSet<>();
60         for (Organization type : type_code_value.keySet()) {
61             Map<String, Level> code_value = type_code_value.get(type);
62             if (!type.equals(Organization.ibm)) continue;
63             for (String locale : code_value.keySet()) {
64                 if (locale.equals("no")) continue;
65                 String priority = code_value.get(locale).toString();
66                 ULocale ulocale = new ULocale(locale);
67                 String language = ulocale.getLanguage();
68                 String script = ulocale.getScript();
69                 String territory = ulocale.getCountry();
70                 if (sourceLanguage.compareTo(priority) >= 0) {
71                     if (language.equals("no")) language = "nn";
72                     locale = new ULocale(language, script).toString();
73                     sourceSet.add(locale);
74                     addPriority(priority, locale);
75                 }
76                 if (targetLanguage.compareTo(priority) >= 0) {
77                     targetLanguageSet.add(language);
78                     targetScriptSet.add(script);
79                     targetRegionSet.add(territory);
80                     addPriority(priority, language);
81                     addPriority(priority, script);
82                     addPriority("G4", territory); // will normally be overridden
83                 }
84             }
85         }
86         // set the priorities for territories
87         Map<String, List<String>> worldBankInfo = sc.getWorldBankInfo();
88         Set<String> euCodes = new HashSet<>(Arrays.asList(new String[] { "AT", "BE", "CY", "CZ", "DK", "EE",
89             "FI", "FR", "DE", "GR", "HU", "IT", "LV", "LT", "LU", "MT", "NL", "PL", "PT", "SI", "ES", "SE", "GB" }));
90         for (String countryCode : worldBankInfo.keySet()) {
91             if (priorityMap.get(countryCode) == null) continue; // only use ones we already have: defaults G4
92             List<String> values = worldBankInfo.get(countryCode);
93             double gdp = Double.parseDouble(values.get(1));
94             if (gdp >= 1E+13)
95                 addPriority("G0", countryCode);
96             else if (gdp >= 1E+12)
97                 addPriority("G1", countryCode);
98             else if (gdp >= 1E+11)
99                 addPriority("G2", countryCode);
100             else if (euCodes.contains(countryCode)) addPriority("G3", countryCode);
101             // else if (gdp >= 1E+10) addPriority("G4", countryCode);
102         }
103         // fill in the currencies, and TZs for the countries that have multiple zones
104         Map<String, Set<String>> c2z = sc.getCountryToZoneSet();
105         SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance();
106         Set<String> mainTimeZones = supplementalDataInfo.getCanonicalTimeZones();
107         for (Iterator<String> it = targetRegionSet.iterator(); it.hasNext();) {
108             String country = it.next();
109             String priority = priorityMap.get(country);
110             for (Iterator<String> it2 = getCurrency(country).iterator(); it2.hasNext();) {
111                 String currency = it2.next();
112                 targetCurrencySet.add(currency);
113                 addPriority(priority, currency);
114             }
115             Set<String> s = c2z.get(country);
116             if (s.size() == 1) continue;
117             for (Iterator<String> it2 = s.iterator(); it2.hasNext();) {
118                 String tzid = it2.next();
119                 if (!mainTimeZones.contains(tzid)) continue;
120                 targetTZSet.add(tzid);
121                 addPriority(priority, tzid);
122             }
123         }
124         // print out missing translations.
125         PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "G2xG2.txt");
126         // show priorities
127         Comparator<String> comp = new UTF16.StringComparator();
128         @SuppressWarnings("unchecked")
129         Set<String[]> priority_set = new TreeSet<String[]>(new ArrayComparator(new Comparator[] { comp, comp, comp }));
130         for (Iterator<String> it = priorityMap.keySet().iterator(); it.hasNext();) {
131             String code = it.next();
132             String priority = priorityMap.get(code);
133             if (priority == null) continue;
134             int type = getType(code);
135             // if (type != CLDRFile.TERRITORY_NAME) continue;
136             priority_set.add(new String[] { priority, type + "", code });
137         }
138         String lastPriority = "";
139         //String lastType = "";
140         for (Iterator<String[]> it = priority_set.iterator(); it.hasNext();) {
141             String[] items = it.next();
142             if (!lastPriority.equals(items[0])) {
143                 lastPriority = items[0];
144                 pw.println();
145                 // pw.println(lastPriority);
146             }
147             String typeName = getTypeName(items[2]);
148             pw.println(lastPriority + "\t" + typeName + "\t" + items[2] + "\t(" + getItemName(english, items[2]) + ")");
149         }
150         pw.flush();
151         // print out missing translations.
152         for (Iterator<String> it = sourceSet.iterator(); it.hasNext();) {
153             String sourceLocale = it.next();
154             System.out.print(sourceLocale + ", ");
155             CLDRFile sourceData = cldrFactory.make(sourceLocale, true);
156             pw.println();
157             String title = sourceLocale;
158             checkItems(pw, title, sourceData, CLDRFile.LANGUAGE_NAME, targetLanguageSet);
159             checkItems(pw, title, sourceData, CLDRFile.SCRIPT_NAME, targetScriptSet);
160             checkItems(pw, title, sourceData, CLDRFile.TERRITORY_NAME, targetRegionSet);
161             checkItems(pw, title, sourceData, CLDRFile.CURRENCY_NAME, targetCurrencySet);
162             // only check timezones if exemplar characters don't include a-z
163             String v = sourceData.getStringValue("//ldml/characters/exemplarCharacters");
164             UnicodeSet exemplars = new UnicodeSet(v);
165             if (exemplars.contains('a', 'z')) continue;
166             checkItems(pw, title, sourceData, CLDRFile.TZ_EXEMPLAR, targetTZSet);
167         }
168         pw.println();
169         pw.println("Sizes - incremental");
170         pw.println();
171         int runningTotalCount = 0;
172         int runningMissingCount = 0;
173         NumberFormat percent = NumberFormat.getPercentInstance();
174         percent.setMinimumFractionDigits(1);
175         NumberFormat nf = NumberFormat.getInstance();
176         nf.setGroupingUsed(true);
177         nf.setMinimumFractionDigits(0);
178         for (Iterator<String> it = totalMap.keySet().iterator(); it.hasNext();) {
179             String key = it.next();
180             Totals t = totalMap.get(key);
181             runningTotalCount = t.totalCount;
182             runningMissingCount = t.missingCount;
183             pw.println(key.substring(0, 2) + "\t" + key.substring(2) + "\t" + runningMissingCount
184                 + "\t" + runningTotalCount
185                 + "\t" + percent.format(runningMissingCount / (0.0 + runningTotalCount)));
186         }
187         pw.close();
188         System.out.println();
189         System.out.println("Done");
190     }
191 
showLocales(int choice)192     private static boolean showLocales(int choice) throws Exception {
193         ULocale desiredDisplayLocale = ULocale.ENGLISH;
194         Set<String> testSet = new TreeSet<>();
195         StandardCodes sc = StandardCodes.make();
196         {
197             Set<String> countries = sc.getGoodAvailableCodes("territory");
198             Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
199             english = cldrFactory.make("en", true);
200             for (Iterator<String> it = countries.iterator(); it.hasNext();) {
201                 String territory = it.next();
202                 if (territory.charAt(0) < 'A') continue;
203                 String locale = "haw-" + territory;
204                 System.out.print(locale + ": " + english.getName(locale) + ", ");
205             }
206             if (true) return true;
207         }
208 
209         if (choice == -1) {
210 
211             testSet.addAll(sc.getGoodAvailableCodes("currency"));
212             Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
213             english = cldrFactory.make("en", false);
214             for (Iterator it = testSet.iterator(); it.hasNext();) {
215                 String country = (String) it.next();
216                 System.out.println(country + "\t" + english.getName(CLDRFile.CURRENCY_NAME, country));
217             }
218             return true;
219         } else if (choice == 0) { // get available
220             ULocale[] list = BreakIterator.getAvailableULocales();
221             for (int i = 0; i < list.length; ++i) {
222                 testSet.add(list[i].toString());
223             }
224         } else {
225             boolean USE_3066bis = choice == 2;
226             // produce random list of RFC3066 language tags
227             Set<String> legacy = sc.getAvailableCodes("legacy");
228             List<String> language_subtags = new ArrayList<>(sc.getGoodAvailableCodes("language"));
229             List<String> script_subtags = new ArrayList<>(sc.getGoodAvailableCodes("script"));
230             List<String> region_subtags = new ArrayList<>(sc.getGoodAvailableCodes("territory"));
231             for (String possibility : legacy) {
232                 System.out.println(possibility);
233                 if (new ULocale(possibility).getScript().length() != 0) {
234                     System.out.println("\tAdding");
235                     testSet.add(possibility);
236                 }
237             }
238             if (!USE_3066bis) for (Iterator it = region_subtags.iterator(); it.hasNext();) {
239                 String possibility = (String) it.next();
240                 if (possibility.compareTo("A") < 0) it.remove();
241             }
242             Random rand = new Random();
243             for (int i = 0; i < 200; ++i) {
244                 int r = rand.nextInt(language_subtags.size());
245                 String result = language_subtags.get(rand.nextInt(language_subtags.size()));
246                 if (USE_3066bis && rand.nextDouble() > 0.5) {
247                     result += "-" + script_subtags.get(rand.nextInt(script_subtags.size()));
248                 }
249                 if (rand.nextDouble() > 0.1) {
250                     result += "-" + region_subtags.get(rand.nextInt(region_subtags.size()));
251                 }
252                 testSet.add(result);
253             }
254         }
255         for (Iterator<String> it = testSet.iterator(); it.hasNext();) {
256             ULocale language = new ULocale(it.next());
257             System.out.println(language + " \t" + language.getDisplayName(desiredDisplayLocale));
258         }
259         return true;
260     }
261 
showCollator()262     private static void showCollator() throws Exception {
263         RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(new ULocale("zh"));
264         showExample(col);
265         String rules = col.getRules(false);
266         // System.out.println(com.ibm.icu.impl.Utility.escape(rules));
267         rules += "& \u93CA < A <<< a & \u7C3F < B <<< b";
268         RuleBasedCollator col2 = new RuleBasedCollator(rules);
269         showExample(col2);
270     }
271 
showExample(RuleBasedCollator col)272     private static void showExample(RuleBasedCollator col) {
273         String samples = "a A b B \u5416 \u93CA \u516b \u7C3F";
274         Set<String> s = new TreeSet<>(col);
275         s.addAll(Arrays.asList(samples.split(" ")));
276         System.out.println(com.ibm.icu.impl.Utility.escape(s.toString()));
277     }
278 
279     static Map<String, String> priorityMap = new TreeMap<>();
280 
addPriority(String priority, String code)281     static void addPriority(String priority, String code) {
282         if (code.length() == 0) return;
283         String oldPriority = priorityMap.get(code);
284         if (oldPriority == null || priority.compareTo(oldPriority) < 0) priorityMap.put(code, priority);
285         System.out.println(code + ": " + priority);
286     }
287 
288     static class Totals {
289         int totalCount;
290         int missingCount;
291     }
292 
293     static Map<String, Totals> totalMap = new TreeMap<>();
294 
checkItems(PrintWriter pw, String sourceLocale, CLDRFile sourceData, int type, Set<String> targetItemSet)295     static void checkItems(PrintWriter pw, String sourceLocale, CLDRFile sourceData, int type, Set<String> targetItemSet) {
296         for (Iterator<String> it2 = targetItemSet.iterator(); it2.hasNext();) {
297             String item = it2.next();
298             if (item.length() == 0) continue;
299             String key = priorityMap.get(sourceLocale) + "" + priorityMap.get(item);
300             Totals t = totalMap.get(key);
301             if (t == null) totalMap.put(key, t = new Totals());
302             t.totalCount++;
303             String translation = getItemName(sourceData, type, item);
304             String rootName = getItemName(root, type, item);
305             if (rootName.equals(translation)) {
306                 t.missingCount++;
307                 pw.println(priorityMap.get(sourceLocale)
308                     + "\t" + sourceLocale +
309                     "\t(" + english.getName(sourceLocale) + ": "
310                     + sourceData.getName(sourceLocale) + ")"
311                     + "\t" + priorityMap.get(item)
312                     + "\t" + item
313                     + "\t(" + getItemName(english, type, item) + ")");
314             }
315         }
316     }
317 
getItemName(CLDRFile data, String item)318     private static String getItemName(CLDRFile data, String item) {
319         return getItemName(data, getType(item), item);
320     }
321 
getType(String item)322     private static int getType(String item) {
323         int type = CLDRFile.LANGUAGE_NAME;
324         if (item.indexOf('/') >= 0)
325             type = CLDRFile.TZ_EXEMPLAR; // America/Los_Angeles
326         else if (item.length() == 4)
327             type = CLDRFile.SCRIPT_NAME; // Hant
328         else if (item.charAt(0) <= '9')
329             type = CLDRFile.TERRITORY_NAME; // 001
330         else if (item.charAt(0) < 'a') {
331             if (item.length() == 3)
332                 type = CLDRFile.CURRENCY_NAME;
333             else
334                 type = CLDRFile.TERRITORY_NAME; // US or USD
335         }
336         return type;
337     }
338 
getTypeName(String item)339     private static String getTypeName(String item) {
340         switch (getType(item)) {
341         case CLDRFile.LANGUAGE_NAME:
342             return "Lang";
343         case CLDRFile.TZ_EXEMPLAR:
344             return "Zone";
345         case CLDRFile.SCRIPT_NAME:
346             return "Script";
347         case CLDRFile.TERRITORY_NAME:
348             return "Region";
349         case CLDRFile.CURRENCY_NAME:
350             return "Curr.";
351         }
352         return "?";
353     }
354 
getItemName(CLDRFile data, int type, String item)355     private static String getItemName(CLDRFile data, int type, String item) {
356         String result;
357         if (type == CLDRFile.LANGUAGE_NAME) {
358             result = data.getName(item);
359         } else if (type != CLDRFile.TZ_EXEMPLAR) {
360             result = data.getName(type, item);
361         } else {
362             String prefix = "//ldml/dates/timeZoneNames/zone[@type=\"" + item + "\"]/exemplarCity";
363             result = data.getStringValue(prefix);
364         }
365         return result == null ? item : result;
366     }
367 
368     static Map<String, List<String>> territory_currency = null;
369 
getCurrency(String territory)370     private static List<String> getCurrency(String territory) {
371         if (territory_currency == null) {
372             territory_currency = new TreeMap<>();
373             Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
374             CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false);
375             for (String path : supp) {
376                 if (path.indexOf("/currencyData") >= 0) {
377                     // <region iso3166="AR">
378                     // <currency iso4217="ARS" from="1992-01-01"/>
379                     if (path.indexOf("/region") >= 0) {
380                         XPathParts parts = XPathParts.getFrozenInstance(supp.getFullXPath(path));
381                         Map<String, String> attributes = parts.getAttributes(parts.size() - 2);
382                         String iso3166 = attributes.get("iso3166");
383                         attributes = parts.getAttributes(parts.size() - 1);
384                         String iso4217 = attributes.get("iso4217");
385                         String to = attributes.get("to");
386                         if (to != null) {
387                             continue;
388                         }
389                         List<String> info = territory_currency.get(iso3166);
390                         if (info == null) {
391                             territory_currency.put(iso3166, info = new ArrayList<>());
392                         }
393                         info.add(iso4217);
394                     }
395                 }
396             }
397         }
398         return territory_currency.get(territory);
399     }
400 }