1 package org.unicode.cldr.tool;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.PrintWriter;
6 import java.text.ParseException;
7 import java.util.Arrays;
8 import java.util.Collection;
9 import java.util.Comparator;
10 import java.util.Date;
11 import java.util.HashMap;
12 import java.util.Iterator;
13 import java.util.List;
14 import java.util.Locale;
15 import java.util.Map;
16 import java.util.Set;
17 import java.util.TreeMap;
18 import java.util.TreeSet;
19 
20 import org.unicode.cldr.util.CLDRFile;
21 import org.unicode.cldr.util.CLDRPaths;
22 import org.unicode.cldr.util.CldrUtility;
23 import org.unicode.cldr.util.Factory;
24 import org.unicode.cldr.util.Iso639Data;
25 import org.unicode.cldr.util.Iso639Data.Scope;
26 import org.unicode.cldr.util.Iso639Data.Type;
27 import org.unicode.cldr.util.Log;
28 import org.unicode.cldr.util.StandardCodes;
29 import org.unicode.cldr.util.StandardCodes.LstrType;
30 import org.unicode.cldr.util.SupplementalDataInfo;
31 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
32 import org.unicode.cldr.util.Validity;
33 import org.unicode.cldr.util.Validity.Status;
34 import org.unicode.cldr.util.XPathParts;
35 
36 import com.google.common.collect.ImmutableMap;
37 import com.ibm.icu.impl.Relation;
38 import com.ibm.icu.impl.Utility;
39 import com.ibm.icu.text.Collator;
40 import com.ibm.icu.text.DateFormat;
41 import com.ibm.icu.text.DecimalFormat;
42 import com.ibm.icu.text.NumberFormat;
43 import com.ibm.icu.text.SimpleDateFormat;
44 import com.ibm.icu.text.Transliterator;
45 import com.ibm.icu.util.ULocale;
46 
47 public class GenerateEnums {
48     private static final String CODE_INDENT = "  ";
49 
50     private static final String DATA_INDENT = "    ";
51 
52     private static final String LIST_INDENT = "              ";
53 
54     private StandardCodes sc = StandardCodes.make();
55 
56     private Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
57 
58 //    private Factory supplementalFactory = Factory.make(
59 //        CLDRPaths.SUPPLEMENTAL_DIRECTORY, ".*");
60 
61     private Set<String> cldrCodes = new TreeSet<>();
62 
63     // private Map enum_canonical = new TreeMap();
64     private Map<String, String> enum_alpha3 = new TreeMap<>();
65 
66     private Map<String, String> enum_UN = new TreeMap<>();
67 
68     // private Map enum_FIPS10 = new TreeMap();
69 
70     // private Map enum_TLD = new TreeMap();
71 
72     private CLDRFile english = factory.make("en", false);
73 
74     private CLDRFile supplementalMetadata = factory.make("supplementalMetadata",
75         false);
76 
77     private CLDRFile supplementalData = factory.make("supplementalData", false);
78 
79     private Relation<String, String> unlimitedCurrencyCodes;
80 
81     private Set<String> scripts = new TreeSet<>();
82 
83     private Set<String> languages = new TreeSet<>();
84 
main(String[] args)85     public static void main(String[] args) throws IOException {
86         GenerateEnums gen = new GenerateEnums();
87         gen.showLanguageInfo();
88         gen.loadCLDRData();
89         gen.showCounts();
90         gen.showCurrencies();
91         gen.showLanguages();
92         gen.showScripts();
93         gen.showRegionCodeInfo();
94         System.out.println("DONE");
95     }
96 
showCounts()97     private void showCounts() {
98         System.out.format("Language Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes(
99             "language").size());
100         System.out.format("Script Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes(
101             "script").size());
102         System.out.format("Territory Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes(
103             "territory").size());
104     }
105 
showCurrencies()106     private void showCurrencies() throws IOException {
107         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/currency_enum.txt");
108         Log.println();
109         Log.println("Currency Data");
110         Log.println();
111         showGeneratedCommentStart(CODE_INDENT);
112         compareSets("currencies from sup.data", currencyCodes, "valid currencies",
113             validCurrencyCodes);
114         Set<String> unused = new TreeSet<>(validCurrencyCodes);
115         unused.removeAll(currencyCodes);
116         showCurrencies(currencyCodes);
117         Log.println();
118         showCurrencies(unused);
119         Map<String, String> sorted = new TreeMap<>(Collator
120             .getInstance(ULocale.ENGLISH));
121         for (String code : validCurrencyCodes) {
122             if (unused.contains(code) && !code.equals("CLF"))
123                 continue; // we include CLF for compatibility
124             sorted.put(getName(code), code);
125         }
126         int lineLength = "  /** Belgian Franc */                                            BEF,"
127             .length();
128         for (String name : sorted.keySet()) {
129             printRow(Log.getLog(), sorted.get(name), name, "currency", null,
130                 lineLength);
131         }
132         showGeneratedCommentEnd(CODE_INDENT);
133         Log.close();
134     }
135 
getName(String code)136     private String getName(String code) {
137         String result = english.getName(CLDRFile.CURRENCY_NAME, code);
138         if (result == null) {
139             result = code;
140             System.out.println("Failed to find: " + code);
141         }
142         return result;
143     }
144 
showCurrencies(Set<String> both)145     private void showCurrencies(Set<String> both) {
146         // /** Afghani */ AFN,
147         for (Iterator<String> it = both.iterator(); it.hasNext();) {
148             String code = it.next();
149             String englishName = getName(code);
150             if (englishName == null) {
151             }
152             Set<String> regions = unlimitedCurrencyCodes.getAll(code);
153             System.out
154                 .println(code
155                     + "\t"
156                     + englishName
157                     + "\t"
158                     + (validCurrencyCodes.contains(code) ? currencyCodes
159                         .contains(code) ? "" : "valid-only" : "supp-only")
160                     + "\t"
161                     + (regions != null ? regions : "unused"));
162         }
163     }
164 
showScripts()165     private void showScripts() throws IOException {
166         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/script_enum.txt");
167         Log.println();
168         Log.println("Script Data");
169         Log.println();
170 
171         showGeneratedCommentStart(CODE_INDENT);
172         Map<String, String> code_replacements = new TreeMap<>();
173         int len = "  /** Arabic */                                        Arab,"
174             .length();
175         for (Iterator<String> it = scripts.iterator(); it.hasNext();) {
176             String code = it.next();
177             String englishName = english.getName(CLDRFile.SCRIPT_NAME, code);
178             if (englishName == null)
179                 continue;
180             printRow(Log.getLog(), code, null, "script", code_replacements, len);
181             // Log.println(" /**" + englishName + "*/ " + code + ",");
182         }
183         showGeneratedCommentEnd(CODE_INDENT);
184         Log.close();
185     }
186 
showLanguageInfo()187     private void showLanguageInfo() throws IOException {
188         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_info.txt");
189         System.out.println();
190         System.out.println("Language Converter");
191         System.out.println();
192         StringBuilder buffer = new StringBuilder();
193         // language information
194         for (String language : sc.getAvailableCodes("language")) {
195             Scope scope = Iso639Data.getScope(language);
196             if (scope == Scope.PrivateUse) {
197                 continue;
198             }
199             buffer.setLength(0);
200             String alpha3 = Iso639Data.toAlpha3(language);
201             if (alpha3 != null) {
202                 buffer.append(".add(\"" + alpha3 + "\")");
203             }
204             Type type = Iso639Data.getType(language);
205             if (type != Type.Living) {
206                 buffer.append(".add(Type." + type + ")");
207             }
208             if (scope != Scope.Individual) {
209                 buffer.append(".add(Scope." + scope + ")");
210             }
211             if (buffer.length() > 0) {
212                 Log.println("\t\tto(\"" + language + "\")" + buffer + ";");
213             }
214         }
215         Log.close();
216     }
217 
showLanguages()218     private void showLanguages() throws IOException {
219         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_enum.txt");
220         System.out.println();
221         System.out.println("Language Data");
222         System.out.println();
223 
224         for (Iterator<String> it = languages.iterator(); it.hasNext();) {
225             String code = it.next();
226             String englishName = english.getName(CLDRFile.LANGUAGE_NAME, code);
227             if (englishName == null)
228                 continue;
229             System.out.println("     /**" + englishName + "*/    " + code + ",");
230         }
231 
232         showGeneratedCommentStart(LIST_INDENT);
233         /*
234          * get the form: "anp frr frs gsw krl zxx aa ab ace ach ada ady ae af afa
235          * afh" + " ain ak akk ale alg alt am an ang apa ar arc arn arp art arw" + "
236          * as ast ath aus av awa ay az ba bad bai bal ban bas bat be"
237          */
238         StringBuffer buffer = new StringBuffer();
239         int lineLimit = 70 - LIST_INDENT.length();
240         char lastChar = 0;
241         for (Iterator<String> it = languages.iterator(); it.hasNext();) {
242             String code = it.next();
243             if (code.equals("root")) {
244                 continue;
245             }
246             if (code.charAt(0) != lastChar
247                 || buffer.length() + 1 + code.length() > lineLimit) {
248                 if (buffer.length() != 0)
249                     Log.println(LIST_INDENT + "+ \"" + buffer + "\"");
250                 buffer.setLength(0);
251                 lastChar = code.charAt(0);
252             }
253             buffer.append(code).append(' ');
254         }
255         // remove the very last space
256         if (buffer.charAt(buffer.length() - 1) == ' ') {
257             buffer.setLength(buffer.length() - 1);
258         }
259         Log.println(LIST_INDENT + "+ \"" + buffer + "\"");
260 
261         showGeneratedCommentEnd(LIST_INDENT);
262         Log.close();
263     }
264 
265     @SuppressWarnings("rawtypes")
join(Collection collection, String separator)266     private Object join(Collection collection, String separator) {
267         if (collection == null)
268             return null;
269         StringBuffer result = new StringBuffer();
270         boolean first = true;
271         for (Iterator it = collection.iterator(); it.hasNext();) {
272             if (first)
273                 first = false;
274             else
275                 result.append(separator);
276             result.append(it.next());
277         }
278         return result.toString();
279     }
280 
281     static NumberFormat threeDigit = new DecimalFormat("000");
282 
loadCLDRData()283     public void loadCLDRData() throws IOException {
284         // BufferedReader codes = Utility.getUTF8Data("territory_codes.txt");
285         // while (true) {
286         // String line = codes.readLine();
287         // if (line == null)
288         // break;
289         // line = line.split("#")[0].trim();
290         // if (line.length() == 0)
291         // continue;
292         // String[] sourceValues = line.split("\\s+");
293         // String[] values = new String[5];
294         // for (int i = 0; i < values.length; ++i) {
295         // if (i >= sourceValues.length || sourceValues[i].equals("-"))
296         // values[i] = null;
297         // else
298         // values[i] = sourceValues[i];
299         // }
300         // String alpha2 = values[0];
301         // cldrCodes.add(alpha2);
302         // if (isPrivateUseRegion(alpha2))
303         // continue;
304         // String numeric = values[1];
305         // String alpha3 = values[2];
306         // String internet = values[3];
307         // if (internet != null)
308         // internet = internet.toUpperCase();
309         // String fips10 = values[4];
310         // String enumValue = enumName(alpha2);
311         // enum_alpha3.put(enumValue, alpha3);
312         // enum_UN.put(enumValue, numeric);
313         // enum_FIPS10.put(enumValue, fips10);
314         // enum_TLD.put(enumValue, internet);
315         // }
316         // codes.close();
317         DecimalFormat threeDigits = new DecimalFormat("000");
318         for (String value : supplementalDataInfo.getNumericTerritoryMapping().keySet()) {
319             cldrCodes.add(value);
320             if (isPrivateUseRegion(value)) continue;
321             enum_UN.put(value,
322                 threeDigits.format(supplementalDataInfo.getNumericTerritoryMapping().getAll(value).iterator().next()));
323         }
324         for (String value : supplementalDataInfo.getAlpha3TerritoryMapping().keySet()) {
325             cldrCodes.add(value);
326             if (isPrivateUseRegion(value)) continue;
327             enum_alpha3.put(value, supplementalDataInfo.getAlpha3TerritoryMapping().getAll(value).iterator().next());
328         }
329 
330         BufferedReader codes = CldrUtility.getUTF8Data("UnMacroRegions.txt");
331         Map<String, String> macro_name = new TreeMap<>();
332         while (true) {
333             String line = codes.readLine();
334             if (line == null)
335                 break;
336             line = line.trim();
337             if (line.length() == 0)
338                 continue;
339             if (line.charAt(0) < '0' || line.charAt(0) > '9') {
340                 System.out.println("GenerateEnums: Skipping: " + line);
341                 continue;
342             }
343             String[] sourceValues = line.split("\\s+");
344             int code = Integer.parseInt(sourceValues[0]);
345             String codeName = threeDigit.format(code);
346             macro_name.put(codeName, line);
347         }
348         codes.close();
349 //        String values = supplementalDataInfo.getValidityInfo().get("$territory").get1().trim();
350         Map<Status, Set<String>> validRegions = Validity.getInstance().getStatusToCodes(LstrType.region);
351         Set<String> regions = new TreeSet<>();
352         regions.addAll(validRegions.get(Status.regular));
353         regions.addAll(validRegions.get(Status.macroregion));
354 //        String[] validTerritories = values.split("\\s+");
355 //        for (int i = 0; i < validTerritories.length; ++i) {
356         for (String region : regions) {
357             if (corrigendum.contains(region)) {
358                 System.out.println("Skipping " + region + "\t\t"
359                     + getEnglishName(region));
360                 continue; // exception, corrigendum
361             }
362             if (isPrivateUseRegion(region))
363                 continue;
364             if (region.charAt(0) < 'A') {// numeric
365                 enum_UN.put(enumName(region), region);
366                 cldrCodes.add(region);
367             } else {
368                 if (enum_alpha3.get(region) == null) {
369                     System.out.println("Missing alpha3 for: " + region);
370                 }
371             }
372         }
373         checkDuplicates(enum_UN);
374         checkDuplicates(enum_alpha3);
375         Set<String> availableCodes = new TreeSet<>(sc.getAvailableCodes("territory"));
376         compareSets("RFC 4646", availableCodes, "CLDR", cldrCodes);
377         Set<String> missing = new TreeSet<>(availableCodes);
378         missing.removeAll(cldrCodes);
379         // don't care list: "003"
380         // missing.remove("003");
381         // missing.remove("172");
382         // Remove the following. They don't have numeric or alpha3 codes so they can't be found.
383         missing.remove("EA");
384         missing.remove("EZ");
385         missing.remove("IC");
386         missing.remove("QU");
387         missing.remove("UN");
388 
389         if (missing.size() != 0) {
390             throw new IllegalArgumentException("Codes in Registry but not in CLDR: "
391                 + missing);
392         }
393 
394         Set<String> UNValues = new TreeSet<>(enum_UN.values());
395 
396         for (Iterator<String> it = macro_name.keySet().iterator(); it.hasNext();) {
397             Object key = it.next();
398             Object value = macro_name.get(key);
399             if (!UNValues.contains(key)) {
400                 System.out.println("Macro " + key + "\t" + value);
401             }
402 
403         }
404 
405         for (Iterator<String> it = enum_UN.keySet().iterator(); it.hasNext();) {
406             String region = it.next();
407             String englishName = getEnglishName(region);
408             if (englishName == null) {
409                 englishName = "NULL"; // for debugging\
410             }
411             String rfcName = getRFC3066Name(region);
412             if (!englishName.equals(rfcName)) {
413                 System.out.println("Different names: {\"" + region + "\",\t\""
414                     + englishName + " (" + rfcName + ")\"},");
415             }
416         }
417 
418         getContainment();
419 
420         DateFormat[] simpleFormats = { new SimpleDateFormat("yyyy-MM-dd"),
421             new SimpleDateFormat("yyyy-MM"), new SimpleDateFormat("yyyy"), };
422         Date today = new Date();
423         Date longAgo = new Date(1000 - 1900, 1, 1);
424         currencyCodes = new TreeSet<>();
425         unlimitedCurrencyCodes = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, null);
426         for (Iterator<String> it = supplementalData
427             .iterator("//supplementalData/currencyData/region"); it.hasNext();) {
428             String path = it.next();
429             XPathParts parts = XPathParts.getFrozenInstance(path);
430             String region = parts.findAttributeValue("region", "iso3166");
431             String code = parts.findAttributeValue("currency", "iso4217");
432             String to = parts.findAttributeValue("currency", "to");
433             main: if (to == null) {
434                 unlimitedCurrencyCodes.put(code, region);
435             } else {
436                 for (int i = 0; i < simpleFormats.length; ++i) {
437                     try {
438                         Date foo = simpleFormats[i].parse(to);
439                         if (foo.compareTo(longAgo) < 0) {
440                             System.out.println("Date Error: can't parse " + to);
441                             break main;
442                         } else if (foo.compareTo(today) >= 0) {
443                             unlimitedCurrencyCodes.put(code, region);
444                         }
445                         break main;
446                     } catch (ParseException e) {
447                     }
448                 }
449                 System.out.println("Date Error: can't parse " + to);
450             }
451             currencyCodes.add(code);
452         }
453 
454         validCurrencyCodes = new TreeSet<>();
455         Set<String> bcp47CurrencyCodes = supplementalDataInfo.getBcp47Keys().getAll("cu");
456         for (String code : bcp47CurrencyCodes) {
457             validCurrencyCodes.add(code.toUpperCase());
458         }
459 
460         scripts = supplementalDataInfo.getCLDRScriptCodes();
461         languages = supplementalDataInfo.getCLDRLanguageCodes();
462 
463         // Set availableCodes = new TreeSet(sc.getAvailableCodes("territory"));
464         // availableCodes.add("003");
465         // for (Iterator it = availableCodes.iterator(); it.hasNext();) {
466         // String code = (String) next())
467         // canonicalRegion_UN.put(alpha2, numeric);
468         // }
469 
470         // for (Iterator it = availableCodes.iterator(); it.hasNext();) {
471         // String code = (String)it.next();
472         // RegionCode region = map_id_canonical_RFC.get(code);
473         // if (region != null) continue; // skip others
474         // region = new RegionCode(code);
475         // map_id_canonical_RFC.put(code,region);
476         // map_canonical_id_RFC.put(region,code);
477         // if ("A".compareTo(code) > 0) {
478         // map_id_canonical_UN.put(code,region);
479         // map_canonical_id_UN.put(region,code);
480         // } else {
481         // map_id_canonical_A2.put(code,region);
482         // map_canonical_id_A2.put(region,code);
483         // }
484         // }
485         // for (Iterator it = goodAvailableCodes.iterator(); it.hasNext();) {
486         // String code = (String)it.next();
487         // good.add(getInstance(code));
488         // }
489     }
490 
getContainment()491     public void getContainment() {
492         // <group type="001" contains="002 009 019 142 150"/> <!--World -->
493         for (Iterator<String> it = supplementalData
494             .iterator("//supplementalData/territoryContainment/group"); it.hasNext();) {
495             String path = it.next();
496             String fullPath = supplementalData.getFullXPath(path);
497             XPathParts parts = XPathParts.getFrozenInstance(fullPath);
498             String container = parts.getAttributeValue(parts.size() - 1, "type");
499             final String containedString = parts.getAttributeValue(-1, "contains");
500             List<String> contained = Arrays.asList(containedString.trim().split("\\s+"));
501             containment.put(container, contained);
502         }
503         // fix recursiveContainment.
504         // for (String region : (Collection<String>)containment.keySet()) {
505         // Set temp = new LinkedHashSet();
506         // addContains(region, temp);
507         // recursiveContainment.put(region, temp);
508         // }
509         Set<String> startingFromWorld = new TreeSet<>();
510         addContains("001", startingFromWorld);
511         compareSets("World", startingFromWorld, "CLDR", cldrCodes);
512         // generateContains();
513     }
514 
generateContains()515     private void generateContains() {
516 
517         for (String region : containment.keySet()) {
518             List<String> plain = containment.get(region);
519             // Collection recursive = (Collection)recursiveContainment.get(region);
520 
521             String setAsString = CldrUtility.join(plain, " ");
522             // String setAsString2 = recursive.equals(plain) ? "" : ", " +
523             // Utility.join(recursive," ");
524             Log.println("\t\tadd(\"" + region + "\", \"" + setAsString + "\");");
525         }
526     }
527 
528     Map<String, List<String>> containment = new TreeMap<>();
529 
530     // Map recursiveContainment = new TreeMap();
531 
addContains(String string, Set<String> startingFromWorld)532     private void addContains(String string, Set<String> startingFromWorld) {
533         startingFromWorld.add(string);
534         List<String> contained = containment.get(string);
535         if (contained == null)
536             return;
537         for (Iterator<String> it = contained.iterator(); it.hasNext();) {
538             addContains(it.next(), startingFromWorld);
539         }
540     }
541 
542     @SuppressWarnings("rawtypes")
compareSets(String name, Set availableCodes, String name2, Set cldrCodes)543     private void compareSets(String name, Set availableCodes, String name2,
544         Set cldrCodes) {
545         Set temp = new TreeSet();
546         temp.addAll(availableCodes);
547         temp.removeAll(cldrCodes);
548         System.out.println("In " + name + " but not in " + name2 + ": " + temp);
549         temp.clear();
550         temp.addAll(cldrCodes);
551         temp.removeAll(availableCodes);
552         System.out.println("Not in " + name + " but in " + name2 + ": " + temp);
553     }
554 
555     @SuppressWarnings("rawtypes")
checkDuplicates(Map m)556     private void checkDuplicates(Map m) {
557         Map backMap = new HashMap();
558         for (Iterator it = m.keySet().iterator(); it.hasNext();) {
559             Object key = it.next();
560             Object o = m.get(key);
561             Object otherKey = backMap.get(o);
562             if (otherKey != null)
563                 System.out.println("Collision with: " + key + ",\t" + otherKey + ",\t"
564                     + o);
565             else
566                 backMap.put(o, key);
567         }
568     }
569 
570     Set<String> corrigendum = new TreeSet<>(Arrays.asList(new String[] { "QE", "833",
571         "830", "172" })); // 003, 419
572 
573     private ImmutableMap<String, String> extraNames = ImmutableMap.<String, String>builder()
574         .put("BU", "Burma").put("TP", "East Timor").put("YU", "Yugoslavia")
575         .put("ZR", "Zaire").put("CD", "Congo (Kinshasa, Democratic Republic)")
576         .put("CI", "Ivory Coast (Cote d'Ivoire)")
577         .put("FM", "Micronesia (Federated States)")
578         .put("TL", "East Timor (Timor-Leste)")
579         // .put("155", "Western Europe")
580         .build();
581 
582     private Set<String> currencyCodes;
583 
584     private Set<String> validCurrencyCodes;
585 
586     static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo
587         .getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
588 
589     /**
590      * Get the RegionCode Enum
591      *
592      * @throws IOException
593      */
showRegionCodeInfo()594     private void showRegionCodeInfo() throws IOException {
595         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_enum.txt");
596         System.out.println();
597         System.out.println("Data for RegionCode");
598         System.out.println();
599         showGeneratedCommentStart(CODE_INDENT);
600 
601         Set<String> reordered = new TreeSet<>(new LengthFirstComparator());
602         reordered.addAll(enum_UN.keySet());
603         Map<String, String> code_replacements = new TreeMap<>();
604         int len = "  /** Polynesia */                                    UN061,"
605             .length();
606         for (Iterator<String> it = reordered.iterator(); it.hasNext();) {
607             String region = it.next();
608             printRow(Log.getLog(), region, null, "territory", code_replacements, len);
609         }
610         showGeneratedCommentEnd(CODE_INDENT);
611         Log.close();
612 
613         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_info.txt");
614         Log.println();
615         Log.println("Data for ISO Region Codes");
616         Log.println();
617         for (String territory : supplementalDataInfo
618             .getTerritoriesWithPopulationData()) {
619             if (territory.equals("ZZ")) {
620                 continue;
621             }
622             PopulationData popData = supplementalDataInfo
623                 .getPopulationDataForTerritory(territory);
624             // to("ak").add(Scope.Macrolanguage).add("aka");
625             Log.formatln("    addRegion(RegionCode.%s, %s, %s, %s) // %s", territory,
626                 format(popData.getPopulation()), format(popData
627                     .getLiteratePopulation()
628                     / popData.getPopulation()),
629                 format(popData.getGdp()), english
630                     .getName("territory", territory));
631             // remove all the ISO 639-3 until they are part of BCP 47
632             // we need to remove in earlier pass so we have the count
633             Set<String> languages = new TreeSet<>();
634             for (String language : supplementalDataInfo
635                 .getLanguagesForTerritoryWithPopulationData(territory)) {
636                 if (Iso639Data.getSource(language) == Iso639Data.Source.ISO_639_3) {
637                     continue;
638                 }
639                 popData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(
640                     language, territory);
641                 if (popData.getPopulation() == 0
642                     || Double.isNaN(popData.getLiteratePopulation()
643                         / popData.getPopulation())) {
644                     continue;
645                 }
646                 languages.add(language);
647             }
648             int count = languages.size();
649             for (String language : languages) {
650                 --count; // we need to know the last one
651                 popData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(
652                     language, territory);
653                 Log.formatln("    .addLanguage(\"%s\", %s, %s)%s // %s", language,
654                     format(popData.getPopulation()), format(popData
655                         .getLiteratePopulation()
656                         / popData.getPopulation()),
657                     (count == 0 ? ";" : ""), english
658                         .getName(language));
659             }
660         }
661         Log.close();
662 
663         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_converters.txt");
664         Log.println();
665         Log.println("Data for ISO Region Codes");
666         Log.println();
667         showGeneratedCommentStart(DATA_INDENT);
668         // addInfo(RegionCode.US, 840, "USA", "US", "US/XX", ....); ... are
669         // containees
670         reordered = new TreeSet<>(new DeprecatedAndLengthFirstComparator("territory"));
671         reordered.addAll(enum_UN.keySet());
672         for (Iterator<String> it = reordered.iterator(); it.hasNext();) {
673             String region = it.next();
674             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
675             // UN
676             // name
677             // int un = Integer.parseInt((String) enum_UN.get(region)); // get around
678             // dumb octal
679             // syntax
680             String isoCode = enum_alpha3.get(region);
681             if (isoCode == null)
682                 continue;
683             Log.println(DATA_INDENT + "add(" + quote(isoCode) + ", " + "RegionCode."
684                 + region + ");");
685         }
686         doAliases(code_replacements);
687         showGeneratedCommentEnd(DATA_INDENT);
688         Log.println();
689         Log.println("Data for M.49 Region Codes");
690         Log.println();
691         showGeneratedCommentStart(DATA_INDENT);
692 
693         for (Iterator<String> it = reordered.iterator(); it.hasNext();) {
694             String region = it.next();
695             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
696             // UN
697             // name
698             int un = Integer.parseInt(enum_UN.get(region), 10); // get
699             // around
700             // dumb
701             // octal
702             // syntax
703             Log.println(DATA_INDENT + "add(" + un + ", " + "RegionCode." + region
704                 + ");");
705         }
706         doAliases(code_replacements);
707 
708         System.out.println("Plain list");
709         for (Iterator<String> it = reordered.iterator(); it.hasNext();) {
710             String region = it.next();
711             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
712             // UN
713             // name
714             String newCode = code_replacements.get(region);
715             if (newCode != null)
716                 continue;
717 
718             int un = Integer.parseInt(enum_UN.get(region), 10); // get
719             // around
720             // dumb
721             // octal
722             // syntax
723             System.out.println(un + "\t" + region + "\t"
724                 + english.getName("territory", region));
725         }
726 
727         showGeneratedCommentEnd(DATA_INDENT);
728 
729         getContainment();
730         Log.close();
731     }
732 
733     static NumberFormat nf = NumberFormat.getInstance(Locale.ENGLISH);
734 
735     static NumberFormat sf = NumberFormat.getScientificInstance(Locale.ENGLISH);
736     static {
737         nf.setMaximumFractionDigits(3);
738         sf.setMaximumFractionDigits(3);
739         nf.setGroupingUsed(false);
740     }
741 
format(double value)742     private String format(double value) {
743         double newValue = CldrUtility.roundToDecimals(value, 3);
744         String option1 = nf.format(newValue);
745         String option2 = sf.format(value);
746         return option1.length() <= option2.length() ? option1 : option2;
747     }
748 
doAliases(Map<String, String> code_replacements)749     private void doAliases(Map<String, String> code_replacements) {
750         for (String code : code_replacements.keySet()) {
751             String newCode = code_replacements.get(code);
752             if (newCode.length() == 0)
753                 newCode = "ZZ";
754             Log.println(DATA_INDENT + "addAlias(" + "RegionCode." + code + ", \""
755                 + newCode + "\");");
756         }
757     }
758 
showGeneratedCommentEnd(String indent)759     private void showGeneratedCommentEnd(String indent) {
760         Log.println(indent + "/* End of generated code. */");
761     }
762 
showGeneratedCommentStart(String indent)763     private void showGeneratedCommentStart(String indent) {
764         Log.println(indent + "/*");
765         Log.println(indent
766             + " * The following information is generated from a tool,");
767         Log.println(indent + " * as described on");
768         Log.println(indent + " * http://wiki/Main/InternationalIdentifierUpdates.");
769         Log.println(indent + " * Do not edit manually.");
770         Log.println(indent + " * Start of generated code.");
771         Log.println(indent + " */");
772     }
773 
774     public final static class LengthFirstComparator implements Comparator<Object> {
775         @Override
compare(Object a, Object b)776         public int compare(Object a, Object b) {
777             String as = a.toString();
778             String bs = b.toString();
779             if (as.length() < bs.length())
780                 return -1;
781             if (as.length() > bs.length())
782                 return 1;
783             return as.compareTo(bs);
784         }
785     }
786 
787     public final class DeprecatedAndLengthFirstComparator implements Comparator<Object> {
788         String type;
789 
DeprecatedAndLengthFirstComparator(String type)790         DeprecatedAndLengthFirstComparator(String type) {
791             this.type = type;
792         }
793 
794         @Override
compare(Object a, Object b)795         public int compare(Object a, Object b) {
796             String as = a.toString();
797             String bs = b.toString();
798             String ar = getDeprecatedReplacement(type, as);
799             String br = getDeprecatedReplacement(type, bs);
800             // put the deprecated ones first, eg those that aren't null
801             if (ar != null) {
802                 if (br == null)
803                     return -1;
804             }
805             if (br != null) {
806                 if (ar == null)
807                     return 1;
808             }
809             // now check the length
810             if (as.length() < bs.length())
811                 return -1;
812             if (as.length() > bs.length())
813                 return 1;
814             return as.compareTo(bs);
815         }
816     }
817 
818     /**
819      * Returns null if not deprecated, otherwise "" if there is no replacement,
820      * otherwise the replacement.
821      *
822      * @return
823      */
getDeprecatedReplacement(String type, String cldrTypeValue)824     public String getDeprecatedReplacement(String type, String cldrTypeValue) {
825         if (type.equals("currency")) {
826             return null;
827         }
828         String path = supplementalMetadata.getFullXPath(
829             "//supplementalData/metadata/alias/" + type + "Alias[@type=\""
830                 + cldrTypeValue + "\"]",
831             true);
832         if (path == null) {
833             return null;
834         }
835         XPathParts parts = XPathParts.getFrozenInstance(path);
836         String replacement = parts.findAttributeValue("territoryAlias", "replacement");
837         if (replacement == null) {
838             return "";
839         }
840         return replacement;
841     }
842 
843     static Transliterator doFallbacks = Transliterator.createFromRules("id",
844         "[’ʻ] > ''; ", Transliterator.FORWARD);
845 
printRow(PrintWriter out, String codeName, String englishName, String type, Map<String, String> code_replacements, int lineLength)846     private void printRow(PrintWriter out, String codeName, String englishName,
847         String type, Map<String, String> code_replacements, int lineLength) {
848         // int numeric = Integer.parseInt((String) enum_UN.get(codeName));
849         // String alpha3 = (String) enum_alpha3.get(codeName);
850         String cldrName = codeName.length() < 5 ? codeName : codeName.substring(2); // fix
851         // UN
852         // name
853         String replacement = getDeprecatedReplacement(type, cldrName);
854 
855         String resolvedEnglishName = englishName != null ? englishName : type
856             .equals("territory") ? getEnglishName(codeName) : type
857                 .equals("currency") ? getName(codeName) : english.getName(CLDRFile.SCRIPT_NAME, codeName);
858         resolvedEnglishName = doFallbacks.transliterate(resolvedEnglishName);
859 
860         String prefix = CODE_INDENT + "/** " + resolvedEnglishName; // + " - " +
861         // threeDigit.format(numeric);
862         String printedCodeName = codeName;
863         if (replacement != null) {
864             code_replacements.put(codeName, replacement);
865             out.println(prefix);
866             prefix = CODE_INDENT + " * @deprecated"
867                 + (replacement.length() == 0 ? "" : " see " + replacement);
868             printedCodeName = "@Deprecated " + printedCodeName;
869         }
870         prefix += " */";
871 
872         if (codeName.equals("UN001")) {
873             out.println();
874         }
875         if (prefix.length() > lineLength - (printedCodeName.length() + 1)) {
876             // break at last space
877             int lastFit = prefix.lastIndexOf(' ', lineLength
878                 - (printedCodeName.length() + 1) - 2);
879             out.println(prefix.substring(0, lastFit));
880             prefix = CODE_INDENT + " *" + prefix.substring(lastFit);
881         }
882         out.print(prefix);
883         out.print(Utility.repeat(" ", (lineLength
884             - (prefix.length() + printedCodeName.length() + 1))));
885         out.println(printedCodeName + ",");
886     }
887 
getEnglishName(String codeName)888     private String getEnglishName(String codeName) {
889         if (codeName.length() > 3)
890             codeName = codeName.substring(2); // fix UN name
891         String name = extraNames.get(codeName);
892         if (name != null)
893             return name;
894         name = english.getName(CLDRFile.TERRITORY_NAME, codeName);
895         if (name != null)
896             return name;
897         return codeName;
898     }
899 
getRFC3066Name(String codeName)900     private String getRFC3066Name(String codeName) {
901         if (codeName.length() > 2)
902             codeName = codeName.substring(2); // fix UN name
903         List<String> list = sc.getFullData("territory", codeName);
904         if (list == null)
905             return null;
906         return list.get(0);
907     }
908 
enumName(String codeName)909     private String enumName(String codeName) {
910         return codeName.charAt(0) < 'A' ? "UN" + codeName : codeName;
911     }
912 
quote(Object input)913     static String quote(Object input) {
914         if (input != null)
915             return '"' + input.toString().trim() + '"';
916         return null;
917     }
918 
isPrivateUseRegion(String codeName)919     static boolean isPrivateUseRegion(String codeName) {
920         // AA, QM..QZ, XA..XZ, ZZ - CLDR codes
921         if (codeName.equals("EU") || codeName.equals("QO") || codeName.equals("ZZ")) {
922             return false;
923         } else if (codeName.equals("AA") || codeName.equals("ZZ")) {
924             return true;
925         } else if (codeName.compareTo("QM") >= 0 && codeName.compareTo("QZ") <= 0) {
926             return true;
927         } else if (codeName.compareTo("XA") >= 0 && codeName.compareTo("XZ") <= 0) {
928             return true;
929         }
930         return false;
931     }
932     /*
933      * <reset before="tertiary">ウ</reset> <x><context>ウ</context><t>ヽ</t></x>
934      * <x><context>ウ</context><i>ヽ</i></x>
935      *
936      * <x><context>う</context><i>ゝ</i></x> <x><context>ゥ</context><i>ヽ</i></x>
937      * <x><context>ゥ</context><i>ヽ</i></x> <x><context>ぅ</context><i>ゝ</i></x>
938      * <x><context>ヴ</context><i>ヽ</i></x>
939      *
940      * <x><context>ゔ</context><i>ゝ</i></x> <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x>
941      * <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x> <x><context>う</context><i>ゞ</i><extend>゙</extend></x>
942      *
943      * <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x> <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x>
944      * <x><context>ぅ</context><i>ゞ</i><extend>゙</extend></x> <x><context>ヴ</context><i>ヾ</i><extend>゙</extend></x>
945      *
946      * <x><context>ゔ</context><i>ゞ</i><extend>゙</extend></x>
947      */
948 }