1 package org.unicode.cldr.tool;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.PrintWriter;
6 import java.text.ParseException;
7 import java.util.Arrays;
8 import java.util.Collection;
9 import java.util.Comparator;
10 import java.util.Date;
11 import java.util.HashMap;
12 import java.util.Iterator;
13 import java.util.List;
14 import java.util.Locale;
15 import java.util.Map;
16 import java.util.Set;
17 import java.util.TreeMap;
18 import java.util.TreeSet;
19 
20 import org.unicode.cldr.util.CLDRFile;
21 import org.unicode.cldr.util.CLDRPaths;
22 import org.unicode.cldr.util.CldrUtility;
23 import org.unicode.cldr.util.Factory;
24 import org.unicode.cldr.util.Iso639Data;
25 import org.unicode.cldr.util.Iso639Data.Scope;
26 import org.unicode.cldr.util.Iso639Data.Type;
27 import org.unicode.cldr.util.Log;
28 import org.unicode.cldr.util.StandardCodes;
29 import org.unicode.cldr.util.StandardCodes.LstrType;
30 import org.unicode.cldr.util.SupplementalDataInfo;
31 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
32 import org.unicode.cldr.util.Validity;
33 import org.unicode.cldr.util.Validity.Status;
34 import org.unicode.cldr.util.XPathParts;
35 
36 import com.ibm.icu.dev.util.CollectionUtilities;
37 import com.ibm.icu.impl.Relation;
38 import com.ibm.icu.impl.Utility;
39 import com.ibm.icu.text.Collator;
40 import com.ibm.icu.text.DateFormat;
41 import com.ibm.icu.text.DecimalFormat;
42 import com.ibm.icu.text.NumberFormat;
43 import com.ibm.icu.text.SimpleDateFormat;
44 import com.ibm.icu.text.Transliterator;
45 import com.ibm.icu.util.ULocale;
46 
47 public class GenerateEnums {
48     private static final String CODE_INDENT = "  ";
49 
50     private static final String DATA_INDENT = "    ";
51 
52     private static final String LIST_INDENT = "              ";
53 
54     private StandardCodes sc = StandardCodes.make();
55 
56     private Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
57 
58 //    private Factory supplementalFactory = Factory.make(
59 //        CLDRPaths.SUPPLEMENTAL_DIRECTORY, ".*");
60 
61     private Set<String> cldrCodes = new TreeSet<String>();
62 
63     // private Map enum_canonical = new TreeMap();
64     private Map<String, String> enum_alpha3 = new TreeMap<String, String>();
65 
66     private Map<String, String> enum_UN = new TreeMap<String, String>();
67 
68     // private Map enum_FIPS10 = new TreeMap();
69 
70     // private Map enum_TLD = new TreeMap();
71 
72     private CLDRFile english = factory.make("en", false);
73 
74     private CLDRFile supplementalMetadata = factory.make("supplementalMetadata",
75         false);
76 
77     private CLDRFile supplementalData = factory.make("supplementalData", false);
78 
79     private Relation<String, String> unlimitedCurrencyCodes;
80 
81     private Set<String> scripts = new TreeSet<String>();
82 
83     private Set<String> languages = new TreeSet<String>();
84 
main(String[] args)85     public static void main(String[] args) throws IOException {
86         GenerateEnums gen = new GenerateEnums();
87         gen.showLanguageInfo();
88         gen.loadCLDRData();
89         gen.showCounts();
90         gen.showCurrencies();
91         gen.showLanguages();
92         gen.showScripts();
93         gen.showRegionCodeInfo();
94         System.out.println("DONE");
95     }
96 
showCounts()97     private void showCounts() {
98         System.out.format("Language Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes(
99             "language").size());
100         System.out.format("Script Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes(
101             "script").size());
102         System.out.format("Territory Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes(
103             "territory").size());
104     }
105 
showCurrencies()106     private void showCurrencies() throws IOException {
107         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/currency_enum.txt");
108         Log.println();
109         Log.println("Currency Data");
110         Log.println();
111         showGeneratedCommentStart(CODE_INDENT);
112         compareSets("currencies from sup.data", currencyCodes, "valid currencies",
113             validCurrencyCodes);
114         Set<String> unused = new TreeSet<String>(validCurrencyCodes);
115         unused.removeAll(currencyCodes);
116         showCurrencies(currencyCodes);
117         Log.println();
118         showCurrencies(unused);
119         Map<String, String> sorted = new TreeMap<String, String>(Collator
120             .getInstance(ULocale.ENGLISH));
121         for (String code : validCurrencyCodes) {
122             if (unused.contains(code) && !code.equals("CLF"))
123                 continue; // we include CLF for compatibility
124             sorted.put(getName(code), code);
125         }
126         int lineLength = "  /** Belgian Franc */                                            BEF,"
127             .length();
128         for (String name : sorted.keySet()) {
129             printRow(Log.getLog(), sorted.get(name), name, "currency", null,
130                 lineLength);
131         }
132         showGeneratedCommentEnd(CODE_INDENT);
133         Log.close();
134     }
135 
getName(String code)136     private String getName(String code) {
137         String result = english.getName(CLDRFile.CURRENCY_NAME, code);
138         if (result == null) {
139             result = code;
140             System.out.println("Failed to find: " + code);
141         }
142         return result;
143     }
144 
showCurrencies(Set<String> both)145     private void showCurrencies(Set<String> both) {
146         // /** Afghani */ AFN,
147         for (Iterator<String> it = both.iterator(); it.hasNext();) {
148             String code = it.next();
149             String englishName = getName(code);
150             if (englishName == null) {
151             }
152             Set<String> regions = unlimitedCurrencyCodes.getAll(code);
153             System.out
154                 .println(code
155                     + "\t"
156                     + englishName
157                     + "\t"
158                     + (validCurrencyCodes.contains(code) ? currencyCodes
159                         .contains(code) ? "" : "valid-only" : "supp-only")
160                     + "\t"
161                     + (regions != null ? regions : "unused"));
162         }
163     }
164 
showScripts()165     private void showScripts() throws IOException {
166         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/script_enum.txt");
167         Log.println();
168         Log.println("Script Data");
169         Log.println();
170 
171         showGeneratedCommentStart(CODE_INDENT);
172         Map<String, String> code_replacements = new TreeMap<String, String>();
173         int len = "  /** Arabic */                                        Arab,"
174             .length();
175         for (Iterator<String> it = scripts.iterator(); it.hasNext();) {
176             String code = it.next();
177             String englishName = english.getName(CLDRFile.SCRIPT_NAME, code);
178             if (englishName == null)
179                 continue;
180             printRow(Log.getLog(), code, null, "script", code_replacements, len);
181             // Log.println(" /**" + englishName + "*/ " + code + ",");
182         }
183         showGeneratedCommentEnd(CODE_INDENT);
184         Log.close();
185     }
186 
showLanguageInfo()187     private void showLanguageInfo() throws IOException {
188         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_info.txt");
189         System.out.println();
190         System.out.println("Language Converter");
191         System.out.println();
192         StringBuilder buffer = new StringBuilder();
193         // language information
194         for (String language : sc.getAvailableCodes("language")) {
195             Scope scope = Iso639Data.getScope(language);
196             if (scope == Scope.PrivateUse) {
197                 continue;
198             }
199             buffer.setLength(0);
200             String alpha3 = Iso639Data.toAlpha3(language);
201             if (alpha3 != null) {
202                 buffer.append(".add(\"" + alpha3 + "\")");
203             }
204             Type type = Iso639Data.getType(language);
205             if (type != Type.Living) {
206                 buffer.append(".add(Type." + type + ")");
207             }
208             if (scope != Scope.Individual) {
209                 buffer.append(".add(Scope." + scope + ")");
210             }
211             if (buffer.length() > 0) {
212                 Log.println("\t\tto(\"" + language + "\")" + buffer + ";");
213             }
214         }
215         Log.close();
216     }
217 
showLanguages()218     private void showLanguages() throws IOException {
219         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_enum.txt");
220         System.out.println();
221         System.out.println("Language Data");
222         System.out.println();
223 
224         for (Iterator<String> it = languages.iterator(); it.hasNext();) {
225             String code = it.next();
226             String englishName = english.getName(CLDRFile.LANGUAGE_NAME, code);
227             if (englishName == null)
228                 continue;
229             System.out.println("     /**" + englishName + "*/    " + code + ",");
230         }
231 
232         showGeneratedCommentStart(LIST_INDENT);
233         /*
234          * get the form: "anp frr frs gsw krl zxx aa ab ace ach ada ady ae af afa
235          * afh" + " ain ak akk ale alg alt am an ang apa ar arc arn arp art arw" + "
236          * as ast ath aus av awa ay az ba bad bai bal ban bas bat be"
237          */
238         StringBuffer buffer = new StringBuffer();
239         int lineLimit = 70 - LIST_INDENT.length();
240         char lastChar = 0;
241         for (Iterator<String> it = languages.iterator(); it.hasNext();) {
242             String code = it.next();
243             if (code.equals("root")) {
244                 continue;
245             }
246             if (code.charAt(0) != lastChar
247                 || buffer.length() + 1 + code.length() > lineLimit) {
248                 if (buffer.length() != 0)
249                     Log.println(LIST_INDENT + "+ \"" + buffer + "\"");
250                 buffer.setLength(0);
251                 lastChar = code.charAt(0);
252             }
253             buffer.append(code).append(' ');
254         }
255         // remove the very last space
256         if (buffer.charAt(buffer.length() - 1) == ' ') {
257             buffer.setLength(buffer.length() - 1);
258         }
259         Log.println(LIST_INDENT + "+ \"" + buffer + "\"");
260 
261         showGeneratedCommentEnd(LIST_INDENT);
262         Log.close();
263     }
264 
265     @SuppressWarnings("rawtypes")
join(Collection collection, String separator)266     private Object join(Collection collection, String separator) {
267         if (collection == null)
268             return null;
269         StringBuffer result = new StringBuffer();
270         boolean first = true;
271         for (Iterator it = collection.iterator(); it.hasNext();) {
272             if (first)
273                 first = false;
274             else
275                 result.append(separator);
276             result.append(it.next());
277         }
278         return result.toString();
279     }
280 
281     static NumberFormat threeDigit = new DecimalFormat("000");
282 
loadCLDRData()283     public void loadCLDRData() throws IOException {
284         // BufferedReader codes = Utility.getUTF8Data("territory_codes.txt");
285         // while (true) {
286         // String line = codes.readLine();
287         // if (line == null)
288         // break;
289         // line = line.split("#")[0].trim();
290         // if (line.length() == 0)
291         // continue;
292         // String[] sourceValues = line.split("\\s+");
293         // String[] values = new String[5];
294         // for (int i = 0; i < values.length; ++i) {
295         // if (i >= sourceValues.length || sourceValues[i].equals("-"))
296         // values[i] = null;
297         // else
298         // values[i] = sourceValues[i];
299         // }
300         // String alpha2 = values[0];
301         // cldrCodes.add(alpha2);
302         // if (isPrivateUseRegion(alpha2))
303         // continue;
304         // String numeric = values[1];
305         // String alpha3 = values[2];
306         // String internet = values[3];
307         // if (internet != null)
308         // internet = internet.toUpperCase();
309         // String fips10 = values[4];
310         // String enumValue = enumName(alpha2);
311         // enum_alpha3.put(enumValue, alpha3);
312         // enum_UN.put(enumValue, numeric);
313         // enum_FIPS10.put(enumValue, fips10);
314         // enum_TLD.put(enumValue, internet);
315         // }
316         // codes.close();
317         DecimalFormat threeDigits = new DecimalFormat("000");
318         for (String value : supplementalDataInfo.getNumericTerritoryMapping().keySet()) {
319             cldrCodes.add(value);
320             if (isPrivateUseRegion(value)) continue;
321             enum_UN.put(value,
322                 threeDigits.format(supplementalDataInfo.getNumericTerritoryMapping().getAll(value).iterator().next()));
323         }
324         for (String value : supplementalDataInfo.getAlpha3TerritoryMapping().keySet()) {
325             cldrCodes.add(value);
326             if (isPrivateUseRegion(value)) continue;
327             enum_alpha3.put(value, supplementalDataInfo.getAlpha3TerritoryMapping().getAll(value).iterator().next());
328         }
329 
330         BufferedReader codes = CldrUtility.getUTF8Data("UnMacroRegions.txt");
331         Map<String, String> macro_name = new TreeMap<String, String>();
332         while (true) {
333             String line = codes.readLine();
334             if (line == null)
335                 break;
336             line = line.trim();
337             if (line.length() == 0)
338                 continue;
339             if (line.charAt(0) < '0' || line.charAt(0) > '9') {
340                 System.out.println("GenerateEnums: Skipping: " + line);
341                 continue;
342             }
343             String[] sourceValues = line.split("\\s+");
344             int code = Integer.parseInt(sourceValues[0]);
345             String codeName = threeDigit.format(code);
346             macro_name.put(codeName, line);
347         }
348         codes.close();
349 //        String values = supplementalDataInfo.getValidityInfo().get("$territory").get1().trim();
350         Map<Status, Set<String>> validRegions = Validity.getInstance().getStatusToCodes(LstrType.region);
351         Set<String> regions = new TreeSet<String>();
352         regions.addAll(validRegions.get(Status.regular));
353         regions.addAll(validRegions.get(Status.macroregion));
354 //        String[] validTerritories = values.split("\\s+");
355 //        for (int i = 0; i < validTerritories.length; ++i) {
356         for (String region : regions) {
357             if (corrigendum.contains(region)) {
358                 System.out.println("Skipping " + region + "\t\t"
359                     + getEnglishName(region));
360                 continue; // exception, corrigendum
361             }
362             if (isPrivateUseRegion(region))
363                 continue;
364             if (region.charAt(0) < 'A') {// numeric
365                 enum_UN.put(enumName(region), region);
366                 cldrCodes.add(region);
367             } else {
368                 if (enum_alpha3.get(region) == null) {
369                     System.out.println("Missing alpha3 for: " + region);
370                 }
371             }
372         }
373         checkDuplicates(enum_UN);
374         checkDuplicates(enum_alpha3);
375         Set<String> availableCodes = new TreeSet<String>(sc.getAvailableCodes("territory"));
376         compareSets("RFC 4646", availableCodes, "CLDR", cldrCodes);
377         Set<String> missing = new TreeSet<String>(availableCodes);
378         missing.removeAll(cldrCodes);
379         // don't care list: "003"
380         // missing.remove("003");
381         // missing.remove("172");
382         // Remove the following. They don't have numeric or alpha3 codes so they can't be found.
383         missing.remove("EA");
384         missing.remove("EZ");
385         missing.remove("IC");
386         missing.remove("QU");
387         missing.remove("UN");
388 
389         if (missing.size() != 0) {
390             throw new IllegalArgumentException("Codes in Registry but not in CLDR: "
391                 + missing);
392         }
393 
394         Set<String> UNValues = new TreeSet<String>(enum_UN.values());
395 
396         for (Iterator<String> it = macro_name.keySet().iterator(); it.hasNext();) {
397             Object key = it.next();
398             Object value = macro_name.get(key);
399             if (!UNValues.contains(key)) {
400                 System.out.println("Macro " + key + "\t" + value);
401             }
402 
403         }
404 
405         for (Iterator<String> it = enum_UN.keySet().iterator(); it.hasNext();) {
406             String region = it.next();
407             String englishName = getEnglishName(region);
408             if (englishName == null) {
409                 englishName = "NULL"; // for debugging\
410             }
411             String rfcName = getRFC3066Name(region);
412             if (!englishName.equals(rfcName)) {
413                 System.out.println("Different names: {\"" + region + "\",\t\""
414                     + englishName + " (" + rfcName + ")\"},");
415             }
416         }
417 
418         XPathParts parts = new XPathParts();
419         getContainment();
420 
421         DateFormat[] simpleFormats = { new SimpleDateFormat("yyyy-MM-dd"),
422             new SimpleDateFormat("yyyy-MM"), new SimpleDateFormat("yyyy"), };
423         Date today = new Date();
424         Date longAgo = new Date(1000 - 1900, 1, 1);
425         currencyCodes = new TreeSet<String>();
426         unlimitedCurrencyCodes = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, null);
427         for (Iterator<String> it = supplementalData
428             .iterator("//supplementalData/currencyData/region"); it.hasNext();) {
429             String path = it.next();
430             parts.set(path);
431             String region = parts.findAttributeValue("region", "iso3166");
432             String code = parts.findAttributeValue("currency", "iso4217");
433             String to = parts.findAttributeValue("currency", "to");
434             main: if (to == null) {
435                 unlimitedCurrencyCodes.put(code, region);
436             } else {
437                 for (int i = 0; i < simpleFormats.length; ++i) {
438                     try {
439                         Date foo = simpleFormats[i].parse(to);
440                         if (foo.compareTo(longAgo) < 0) {
441                             System.out.println("Date Error: can't parse " + to);
442                             break main;
443                         } else if (foo.compareTo(today) >= 0) {
444                             unlimitedCurrencyCodes.put(code, region);
445                         }
446                         break main;
447                     } catch (ParseException e) {
448                     }
449                 }
450                 System.out.println("Date Error: can't parse " + to);
451             }
452             currencyCodes.add(code);
453         }
454 
455         validCurrencyCodes = new TreeSet<String>();
456         Set<String> bcp47CurrencyCodes = supplementalDataInfo.getBcp47Keys().getAll("cu");
457         for (String code : bcp47CurrencyCodes) {
458             validCurrencyCodes.add(code.toUpperCase());
459         }
460 
461         scripts = supplementalDataInfo.getCLDRScriptCodes();
462         languages = supplementalDataInfo.getCLDRLanguageCodes();
463 
464         // Set availableCodes = new TreeSet(sc.getAvailableCodes("territory"));
465         // availableCodes.add("003");
466         // for (Iterator it = availableCodes.iterator(); it.hasNext();) {
467         // String code = (String) next())
468         // canonicalRegion_UN.put(alpha2, numeric);
469         // }
470 
471         // for (Iterator it = availableCodes.iterator(); it.hasNext();) {
472         // String code = (String)it.next();
473         // RegionCode region = map_id_canonical_RFC.get(code);
474         // if (region != null) continue; // skip others
475         // region = new RegionCode(code);
476         // map_id_canonical_RFC.put(code,region);
477         // map_canonical_id_RFC.put(region,code);
478         // if ("A".compareTo(code) > 0) {
479         // map_id_canonical_UN.put(code,region);
480         // map_canonical_id_UN.put(region,code);
481         // } else {
482         // map_id_canonical_A2.put(code,region);
483         // map_canonical_id_A2.put(region,code);
484         // }
485         // }
486         // for (Iterator it = goodAvailableCodes.iterator(); it.hasNext();) {
487         // String code = (String)it.next();
488         // good.add(getInstance(code));
489         // }
490     }
491 
getContainment()492     public void getContainment() {
493         XPathParts parts = new XPathParts();
494         // <group type="001" contains="002 009 019 142 150"/> <!--World -->
495         for (Iterator<String> it = supplementalData
496             .iterator("//supplementalData/territoryContainment/group"); it.hasNext();) {
497             String path = it.next();
498             String fullPath = supplementalData.getFullXPath(path);
499             parts.set(fullPath);
500             String container = parts.getAttributeValue(parts.size() - 1, "type");
501             final String containedString = parts.getAttributeValue(-1, "contains");
502             List<String> contained = Arrays.asList(containedString.trim().split("\\s+"));
503             containment.put(container, contained);
504         }
505         // fix recursiveContainment.
506         // for (String region : (Collection<String>)containment.keySet()) {
507         // Set temp = new LinkedHashSet();
508         // addContains(region, temp);
509         // recursiveContainment.put(region, temp);
510         // }
511         Set<String> startingFromWorld = new TreeSet<String>();
512         addContains("001", startingFromWorld);
513         compareSets("World", startingFromWorld, "CLDR", cldrCodes);
514         // generateContains();
515     }
516 
generateContains()517     private void generateContains() {
518 
519         for (String region : containment.keySet()) {
520             List<String> plain = containment.get(region);
521             // Collection recursive = (Collection)recursiveContainment.get(region);
522 
523             String setAsString = CldrUtility.join(plain, " ");
524             // String setAsString2 = recursive.equals(plain) ? "" : ", " +
525             // Utility.join(recursive," ");
526             Log.println("\t\tadd(\"" + region + "\", \"" + setAsString + "\");");
527         }
528     }
529 
530     Map<String, List<String>> containment = new TreeMap<String, List<String>>();
531 
532     // Map recursiveContainment = new TreeMap();
533 
addContains(String string, Set<String> startingFromWorld)534     private void addContains(String string, Set<String> startingFromWorld) {
535         startingFromWorld.add(string);
536         List<String> contained = (List<String>) containment.get(string);
537         if (contained == null)
538             return;
539         for (Iterator<String> it = contained.iterator(); it.hasNext();) {
540             addContains(it.next(), startingFromWorld);
541         }
542     }
543 
544     @SuppressWarnings("rawtypes")
compareSets(String name, Set availableCodes, String name2, Set cldrCodes)545     private void compareSets(String name, Set availableCodes, String name2,
546         Set cldrCodes) {
547         Set temp = new TreeSet();
548         temp.addAll(availableCodes);
549         temp.removeAll(cldrCodes);
550         System.out.println("In " + name + " but not in " + name2 + ": " + temp);
551         temp.clear();
552         temp.addAll(cldrCodes);
553         temp.removeAll(availableCodes);
554         System.out.println("Not in " + name + " but in " + name2 + ": " + temp);
555     }
556 
557     @SuppressWarnings("rawtypes")
checkDuplicates(Map m)558     private void checkDuplicates(Map m) {
559         Map backMap = new HashMap();
560         for (Iterator it = m.keySet().iterator(); it.hasNext();) {
561             Object key = it.next();
562             Object o = m.get(key);
563             Object otherKey = backMap.get(o);
564             if (otherKey != null)
565                 System.out.println("Collision with: " + key + ",\t" + otherKey + ",\t"
566                     + o);
567             else
568                 backMap.put(o, key);
569         }
570     }
571 
572     Set<String> corrigendum = new TreeSet<String>(Arrays.asList(new String[] { "QE", "833",
573         "830", "172" })); // 003, 419
574 
575     private Map extraNames = CollectionUtilities.asMap(new String[][] {
576         { "BU", "Burma" }, { "TP", "East Timor" }, { "YU", "Yugoslavia" },
577         { "ZR", "Zaire" }, { "CD", "Congo (Kinshasa, Democratic Republic)" },
578         { "CI", "Ivory Coast (Cote d'Ivoire)" },
579         { "FM", "Micronesia (Federated States)" },
580         { "TL", "East Timor (Timor-Leste)" },
581         // {"155","Western Europe"},
582 
583     });
584 
585     private Set<String> currencyCodes;
586 
587     private Set<String> validCurrencyCodes;
588 
589     static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo
590         .getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
591 
592     /**
593      * Get the RegionCode Enum
594      *
595      * @throws IOException
596      */
showRegionCodeInfo()597     private void showRegionCodeInfo() throws IOException {
598         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_enum.txt");
599         System.out.println();
600         System.out.println("Data for RegionCode");
601         System.out.println();
602         showGeneratedCommentStart(CODE_INDENT);
603 
604         Set<String> reordered = new TreeSet<String>(new LengthFirstComparator());
605         reordered.addAll(enum_UN.keySet());
606         Map<String, String> code_replacements = new TreeMap<String, String>();
607         int len = "  /** Polynesia */                                    UN061,"
608             .length();
609         for (Iterator<String> it = reordered.iterator(); it.hasNext();) {
610             String region = it.next();
611             printRow(Log.getLog(), region, null, "territory", code_replacements, len);
612         }
613         showGeneratedCommentEnd(CODE_INDENT);
614         Log.close();
615 
616         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_info.txt");
617         Log.println();
618         Log.println("Data for ISO Region Codes");
619         Log.println();
620         for (String territory : supplementalDataInfo
621             .getTerritoriesWithPopulationData()) {
622             if (territory.equals("ZZ")) {
623                 continue;
624             }
625             PopulationData popData = supplementalDataInfo
626                 .getPopulationDataForTerritory(territory);
627             // to("ak").add(Scope.Macrolanguage).add("aka");
628             Log.formatln("    addRegion(RegionCode.%s, %s, %s, %s) // %s", territory,
629                 format(popData.getPopulation()), format(popData
630                     .getLiteratePopulation()
631                     / popData.getPopulation()),
632                 format(popData.getGdp()), english
633                     .getName("territory", territory));
634             // remove all the ISO 639-3 until they are part of BCP 47
635             // we need to remove in earlier pass so we have the count
636             Set<String> languages = new TreeSet<String>();
637             for (String language : supplementalDataInfo
638                 .getLanguagesForTerritoryWithPopulationData(territory)) {
639                 if (Iso639Data.getSource(language) == Iso639Data.Source.ISO_639_3) {
640                     continue;
641                 }
642                 popData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(
643                     language, territory);
644                 if (popData.getPopulation() == 0
645                     || Double.isNaN(popData.getLiteratePopulation()
646                         / popData.getPopulation())) {
647                     continue;
648                 }
649                 languages.add(language);
650             }
651             int count = languages.size();
652             for (String language : languages) {
653                 --count; // we need to know the last one
654                 popData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(
655                     language, territory);
656                 Log.formatln("    .addLanguage(\"%s\", %s, %s)%s // %s", language,
657                     format(popData.getPopulation()), format(popData
658                         .getLiteratePopulation()
659                         / popData.getPopulation()),
660                     (count == 0 ? ";" : ""), english
661                         .getName(language));
662             }
663         }
664         Log.close();
665 
666         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_converters.txt");
667         Log.println();
668         Log.println("Data for ISO Region Codes");
669         Log.println();
670         showGeneratedCommentStart(DATA_INDENT);
671         // addInfo(RegionCode.US, 840, "USA", "US", "US/XX", ....); ... are
672         // containees
673         reordered = new TreeSet<String>(new DeprecatedAndLengthFirstComparator("territory"));
674         reordered.addAll(enum_UN.keySet());
675         for (Iterator<String> it = reordered.iterator(); it.hasNext();) {
676             String region = it.next();
677             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
678             // UN
679             // name
680             // int un = Integer.parseInt((String) enum_UN.get(region)); // get around
681             // dumb octal
682             // syntax
683             String isoCode = (String) enum_alpha3.get(region);
684             if (isoCode == null)
685                 continue;
686             Log.println(DATA_INDENT + "add(" + quote(isoCode) + ", " + "RegionCode."
687                 + region + ");");
688         }
689         doAliases(code_replacements);
690         showGeneratedCommentEnd(DATA_INDENT);
691         Log.println();
692         Log.println("Data for M.49 Region Codes");
693         Log.println();
694         showGeneratedCommentStart(DATA_INDENT);
695 
696         for (Iterator<String> it = reordered.iterator(); it.hasNext();) {
697             String region = it.next();
698             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
699             // UN
700             // name
701             int un = Integer.parseInt((String) enum_UN.get(region), 10); // get
702             // around
703             // dumb
704             // octal
705             // syntax
706             Log.println(DATA_INDENT + "add(" + un + ", " + "RegionCode." + region
707                 + ");");
708         }
709         doAliases(code_replacements);
710 
711         System.out.println("Plain list");
712         for (Iterator<String> it = reordered.iterator(); it.hasNext();) {
713             String region = it.next();
714             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
715             // UN
716             // name
717             String newCode = code_replacements.get(region);
718             if (newCode != null)
719                 continue;
720 
721             int un = Integer.parseInt((String) enum_UN.get(region), 10); // get
722             // around
723             // dumb
724             // octal
725             // syntax
726             System.out.println(un + "\t" + region + "\t"
727                 + english.getName("territory", region));
728         }
729 
730         showGeneratedCommentEnd(DATA_INDENT);
731 
732         getContainment();
733         Log.close();
734     }
735 
736     static NumberFormat nf = NumberFormat.getInstance(Locale.ENGLISH);
737 
738     static NumberFormat sf = NumberFormat.getScientificInstance(Locale.ENGLISH);
739     static {
740         nf.setMaximumFractionDigits(3);
741         sf.setMaximumFractionDigits(3);
742         nf.setGroupingUsed(false);
743     }
744 
format(double value)745     private String format(double value) {
746         double newValue = CldrUtility.roundToDecimals(value, 3);
747         String option1 = nf.format(newValue);
748         String option2 = sf.format(value);
749         return option1.length() <= option2.length() ? option1 : option2;
750     }
751 
doAliases(Map<String, String> code_replacements)752     private void doAliases(Map<String, String> code_replacements) {
753         for (String code : code_replacements.keySet()) {
754             String newCode = code_replacements.get(code);
755             if (newCode.length() == 0)
756                 newCode = "ZZ";
757             Log.println(DATA_INDENT + "addAlias(" + "RegionCode." + code + ", \""
758                 + newCode + "\");");
759         }
760     }
761 
showGeneratedCommentEnd(String indent)762     private void showGeneratedCommentEnd(String indent) {
763         Log.println(indent + "/* End of generated code. */");
764     }
765 
showGeneratedCommentStart(String indent)766     private void showGeneratedCommentStart(String indent) {
767         Log.println(indent + "/*");
768         Log.println(indent
769             + " * The following information is generated from a tool,");
770         Log.println(indent + " * as described on");
771         Log.println(indent + " * http://wiki/Main/InternationalIdentifierUpdates.");
772         Log.println(indent + " * Do not edit manually.");
773         Log.println(indent + " * Start of generated code.");
774         Log.println(indent + " */");
775     }
776 
777     public final static class LengthFirstComparator implements Comparator<Object> {
compare(Object a, Object b)778         public int compare(Object a, Object b) {
779             String as = a.toString();
780             String bs = b.toString();
781             if (as.length() < bs.length())
782                 return -1;
783             if (as.length() > bs.length())
784                 return 1;
785             return as.compareTo(bs);
786         }
787     }
788 
789     public final class DeprecatedAndLengthFirstComparator implements Comparator<Object> {
790         String type;
791 
DeprecatedAndLengthFirstComparator(String type)792         DeprecatedAndLengthFirstComparator(String type) {
793             this.type = type;
794         }
795 
compare(Object a, Object b)796         public int compare(Object a, Object b) {
797             String as = a.toString();
798             String bs = b.toString();
799             String ar = getDeprecatedReplacement(type, as);
800             String br = getDeprecatedReplacement(type, bs);
801             // put the deprecated ones first, eg those that aren't null
802             if (ar != null) {
803                 if (br == null)
804                     return -1;
805             }
806             if (br != null) {
807                 if (ar == null)
808                     return 1;
809             }
810             // now check the length
811             if (as.length() < bs.length())
812                 return -1;
813             if (as.length() > bs.length())
814                 return 1;
815             return as.compareTo(bs);
816         }
817     }
818 
819     /**
820      * Returns null if not deprecated, otherwise "" if there is no replacement,
821      * otherwise the replacement.
822      *
823      * @return
824      */
getDeprecatedReplacement(String type, String cldrTypeValue)825     public String getDeprecatedReplacement(String type, String cldrTypeValue) {
826         if (type.equals("currency")) {
827             return null;
828         }
829         String path = supplementalMetadata.getFullXPath(
830             "//supplementalData/metadata/alias/" + type + "Alias[@type=\""
831                 + cldrTypeValue + "\"]",
832             true);
833         if (path == null)
834             return null;
835         String replacement = new XPathParts().set(path).findAttributeValue(
836             "territoryAlias", "replacement");
837         if (replacement == null)
838             return "";
839         return replacement;
840     }
841 
842     static Transliterator doFallbacks = Transliterator.createFromRules("id",
843         "[’ʻ] > ''; ", Transliterator.FORWARD);
844 
printRow(PrintWriter out, String codeName, String englishName, String type, Map<String, String> code_replacements, int lineLength)845     private void printRow(PrintWriter out, String codeName, String englishName,
846         String type, Map<String, String> code_replacements, int lineLength) {
847         // int numeric = Integer.parseInt((String) enum_UN.get(codeName));
848         // String alpha3 = (String) enum_alpha3.get(codeName);
849         String cldrName = codeName.length() < 5 ? codeName : codeName.substring(2); // fix
850         // UN
851         // name
852         String replacement = getDeprecatedReplacement(type, cldrName);
853 
854         String resolvedEnglishName = englishName != null ? englishName : type
855             .equals("territory") ? getEnglishName(codeName) : type
856                 .equals("currency") ? getName(codeName) : english.getName(CLDRFile.SCRIPT_NAME, codeName);
857         resolvedEnglishName = doFallbacks.transliterate(resolvedEnglishName);
858 
859         String prefix = CODE_INDENT + "/** " + resolvedEnglishName; // + " - " +
860         // threeDigit.format(numeric);
861         String printedCodeName = codeName;
862         if (replacement != null) {
863             code_replacements.put(codeName, replacement);
864             out.println(prefix);
865             prefix = CODE_INDENT + " * @deprecated"
866                 + (replacement.length() == 0 ? "" : " see " + replacement);
867             printedCodeName = "@Deprecated " + printedCodeName;
868         }
869         prefix += " */";
870 
871         if (codeName.equals("UN001")) {
872             out.println();
873         }
874         if (prefix.length() > lineLength - (printedCodeName.length() + 1)) {
875             // break at last space
876             int lastFit = prefix.lastIndexOf(' ', lineLength
877                 - (printedCodeName.length() + 1) - 2);
878             out.println(prefix.substring(0, lastFit));
879             prefix = CODE_INDENT + " *" + prefix.substring(lastFit);
880         }
881         out.print(prefix);
882         out.print(Utility.repeat(" ", (lineLength
883             - (prefix.length() + printedCodeName.length() + 1))));
884         out.println(printedCodeName + ",");
885     }
886 
getEnglishName(String codeName)887     private String getEnglishName(String codeName) {
888         if (codeName.length() > 3)
889             codeName = codeName.substring(2); // fix UN name
890         String name = (String) extraNames.get(codeName);
891         if (name != null)
892             return name;
893         name = english.getName(CLDRFile.TERRITORY_NAME, codeName);
894         if (name != null)
895             return name;
896         return codeName;
897     }
898 
getRFC3066Name(String codeName)899     private String getRFC3066Name(String codeName) {
900         if (codeName.length() > 2)
901             codeName = codeName.substring(2); // fix UN name
902         List<String> list = sc.getFullData("territory", codeName);
903         if (list == null)
904             return null;
905         return (String) list.get(0);
906     }
907 
enumName(String codeName)908     private String enumName(String codeName) {
909         return codeName.charAt(0) < 'A' ? "UN" + codeName : codeName;
910     }
911 
quote(Object input)912     static String quote(Object input) {
913         if (input != null)
914             return '"' + input.toString().trim() + '"';
915         return null;
916     }
917 
isPrivateUseRegion(String codeName)918     static boolean isPrivateUseRegion(String codeName) {
919         // AA, QM..QZ, XA..XZ, ZZ - CLDR codes
920         if (codeName.equals("EU") || codeName.equals("QO") || codeName.equals("ZZ")) {
921             return false;
922         } else if (codeName.equals("AA") || codeName.equals("ZZ")) {
923             return true;
924         } else if (codeName.compareTo("QM") >= 0 && codeName.compareTo("QZ") <= 0) {
925             return true;
926         } else if (codeName.compareTo("XA") >= 0 && codeName.compareTo("XZ") <= 0) {
927             return true;
928         }
929         return false;
930     }
931     /*
932      * <reset before="tertiary">ウ</reset> <x><context>ウ</context><t>ヽ</t></x>
933      * <x><context>ウ</context><i>ヽ</i></x>
934      *
935      * <x><context>う</context><i>ゝ</i></x> <x><context>ゥ</context><i>ヽ</i></x>
936      * <x><context>ゥ</context><i>ヽ</i></x> <x><context>ぅ</context><i>ゝ</i></x>
937      * <x><context>ヴ</context><i>ヽ</i></x>
938      *
939      * <x><context>ゔ</context><i>ゝ</i></x> <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x>
940      * <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x> <x><context>う</context><i>ゞ</i><extend>゙</extend></x>
941      *
942      * <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x> <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x>
943      * <x><context>ぅ</context><i>ゞ</i><extend>゙</extend></x> <x><context>ヴ</context><i>ヾ</i><extend>゙</extend></x>
944      *
945      * <x><context>ゔ</context><i>ゞ</i><extend>゙</extend></x>
946      */
947 }