1 package org.unicode.cldr.tool;
2 
3 import java.util.HashMap;
4 import java.util.HashSet;
5 import java.util.Iterator;
6 import java.util.LinkedHashSet;
7 import java.util.Map;
8 import java.util.Map.Entry;
9 import java.util.Set;
10 import java.util.TreeMap;
11 import java.util.TreeSet;
12 import java.util.regex.Matcher;
13 
14 import org.unicode.cldr.util.CLDRConfig;
15 import org.unicode.cldr.util.CLDRFile;
16 import org.unicode.cldr.util.Factory;
17 import org.unicode.cldr.util.Pair;
18 import org.unicode.cldr.util.PatternCache;
19 import org.unicode.cldr.util.SupplementalDataInfo;
20 import org.unicode.cldr.util.Timer;
21 import org.unicode.cldr.util.XPathParts;
22 
23 import com.google.common.base.Splitter;
24 import com.ibm.icu.text.UnicodeSet;
25 
26 public class ListUnits {
27     private static final UnicodeSet BIDI_CONTROL = new UnicodeSet("[:bidi_control:]").freeze();
28     private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
29     private static final SupplementalDataInfo SUPP = CONFIG.getSupplementalDataInfo();
30     private static final Task TASK = Task.listSimpleUnits;
31 
32     private enum Task {
33         listUnits, listSimpleUnits, showDecimals, getDigits,
34     }
35 
36     enum Type {
37         root, en, other;
fromString(String type)38         static Type fromString(String type) {
39             return type.equals("en") ? en : type.equals("root") ? root : other;
40         }
41     }
42 
main(String[] args)43     public static void main(String[] args) {
44         Factory cldrFactory = CONFIG.getCldrFactory();
45         Set<String> defaultContent = SUPP.getDefaultContentLocales();
46         Set<String> seen = new HashSet<>();
47 
48         LinkedHashSet<String> items = new LinkedHashSet<>();
49         items.add("root");
50         items.add("en");
51         items.addAll(cldrFactory.getAvailableLanguages());
52         Map<String, Data> rootMap = new HashMap<>();
53         Map<String, Data> enMap = new HashMap<>();
54 
55         Timer timer = new Timer();
56         int count = 0;
57         XPathParts parts = new XPathParts();
58         Splitter SEMI = Splitter.on(";").trimResults();
59         Matcher currencyMatcher = PatternCache.get("([^0#]*).*[0#]([^0#]*)").matcher("");
60 
61         for (String locale : items) {
62             Type type = Type.fromString(locale);
63             if (type == Type.root || type == Type.en || defaultContent.contains(locale)) {
64                 continue;
65             }
66             CLDRFile cldrFile = cldrFactory.make(locale, true);
67 //            DecimalFormat format = new DecimalFormat(currencyPattern);
68 //            String prefix = format.getPositivePrefix();
69 //            String suffix = format.getPositiveSuffix();
70 
71 //            ICUServiceBuilder builder = new ICUServiceBuilder().setCldrFile(cldrFile);
72 //            DecimalFormat format = builder.getCurrencyFormat("XXX");
73 //            String prefix = format.getPositivePrefix().replace("XXX", "\u00a4");
74 //            String suffix = format.getPositiveSuffix().replace("XXX", "\u00a4");
75             switch (TASK) {
76             case showDecimals: {
77                 String compactPathPrefix = "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]";
78                 String currencyPattern = cldrFile
79                     .getStringValue(
80                         "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]");
81                 String firstPart = SEMI.split(currencyPattern).iterator().next();
82                 if (!currencyMatcher.reset(firstPart).matches()) {
83                     throw new IllegalArgumentException("bad matcher");
84                 }
85                 String prefix = currencyMatcher.group(1);
86                 String suffix = currencyMatcher.group(2);
87                 System.out.println("\n#" + locale + "\t«" + prefix + "»\t«" + suffix + "»\t«" + currencyPattern + "»");
88                 TreeMap<String, String> data = new TreeMap<>();
89                 for (String path : cldrFile.fullIterable()) {
90 //                    if (s.contains("decimalFormats")) {
91 //                        System.out.println(s);
92 //                    }
93                     if (path.startsWith(compactPathPrefix)) {
94                         String value = cldrFile.getStringValue(path);
95                         String mod = path.replace("decimal", "currency") + "[@draft=\"provisional\"]";
96                         //                        // locale=en ; action=add ; new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine
97                         data.put(mod, "locale=" + locale
98                             + " ; action=add"
99                             + " ; new_value=" + prefix + value + suffix
100                             + " ; new_path=" + mod);
101                     }
102                 }
103                 for (Entry<String, String> line : data.entrySet()) {
104                     System.out.println(line.getValue());
105                 }
106                 data.clear();
107                 break;
108             }
109             case listUnits:
110             case listSimpleUnits: {
111                 Set<String> units = getUnits(cldrFile, TASK, type == Type.root ? rootMap : type == Type.en ? enMap : null);
112                 if (type == Type.en) {
113                     TreeSet<String> missing = new TreeSet<>(seen);
114                     missing.removeAll(units);
115                     for (String unit : missing) {
116                         // locale=en ; action=add ; new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine
117                         Data data = rootMap.get(unit);
118                         if (data != null) {
119                             System.out.println(data);
120                         }
121                     }
122                 }
123                 Splitter HYPHEN = Splitter.on('-');
124                 String oldBase = "";
125                 for (String unit : units) {
126                     if (!seen.contains(unit)) {
127                         switch (TASK) {
128                         case listSimpleUnits:
129                             String base = HYPHEN.split(unit).iterator().next();
130                             if (!base.equals(oldBase)) {
131                                 oldBase = base;
132                                 System.out.println();
133                             } else {
134                                 System.out.print(' ');
135                             }
136                             System.out.print(unit);
137                             break;
138                         case listUnits:
139                             System.out.println("\t" + unit.replace("/", "\t")
140                                 .replaceFirst("-", "\t") + "\t" + locale);
141                             break;
142                         }
143                         seen.add(unit);
144                     }
145                 }
146                 break;
147             }
148             case getDigits: {
149                 getDigits(cldrFile);
150                 break;
151             }
152             }
153         }
154         System.out.println();
155         System.out.println("#Done: " + count + ", " + timer);
156     }
157 
getDigits(CLDRFile cldrFile)158     static void getDigits(CLDRFile cldrFile) {
159         System.out.println(cldrFile.getLocaleID());
160         String numberSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem");
161         Set<String> seen = new HashSet<>();
162         seen.add(numberSystem);
163         Pair<UnicodeSet, UnicodeSet> main = getCharacters(cldrFile, numberSystem);
164         System.out.println("\tdefault: " + numberSystem + ", " + main.getFirst().toPattern(false) + ", " + main.getSecond().toPattern(false));
165         for (Iterator<String> it = cldrFile.iterator("//ldml/numbers/otherNumberingSystems"); it.hasNext();) {
166             String path = it.next();
167             String otherNumberingSystem = cldrFile.getWinningValue(path);
168             if (seen.contains(otherNumberingSystem)) {
169                 continue;
170             }
171             seen.add(otherNumberingSystem);
172             main = getCharacters(cldrFile, otherNumberingSystem);
173             System.out.println("\tother: " + otherNumberingSystem
174                 + ", " + main.getFirst().toPattern(false) + "\t" + main.getSecond().toPattern(false));
175         }
176     }
177 
getCharacters(CLDRFile cldrFileToCheck, String numberSystem)178     private static Pair<UnicodeSet, UnicodeSet> getCharacters(CLDRFile cldrFileToCheck, String numberSystem) {
179         String digitString = SUPP.getDigits(numberSystem);
180         UnicodeSet digits = digitString == null ? UnicodeSet.EMPTY : new UnicodeSet().addAll(digitString);
181 
182         UnicodeSet punctuation = new UnicodeSet();
183         Set<String> errors = new LinkedHashSet<>();
184         add(cldrFileToCheck, "decimal", numberSystem, punctuation, errors);
185         //add(cldrFileToCheck, "exponential", numberSystem, punctuation, errors);
186         add(cldrFileToCheck, "group", numberSystem, punctuation, errors);
187         //add(cldrFileToCheck, "infinity", numberSystem, punctuation, errors);
188         add(cldrFileToCheck, "minusSign", numberSystem, punctuation, errors);
189         //add(cldrFileToCheck, "nan", numberSystem, punctuation, errors);
190         add(cldrFileToCheck, "list", numberSystem, punctuation, errors);
191         add(cldrFileToCheck, "percentSign", numberSystem, punctuation, errors);
192         add(cldrFileToCheck, "perMille", numberSystem, punctuation, errors);
193         add(cldrFileToCheck, "plusSign", numberSystem, punctuation, errors);
194         // symbols.setZeroDigit(getSymbolString(cldrFileToCheck, "nativeZeroDigit", numberSystem));
195         if (!errors.isEmpty() && digitString != null) {
196             System.out.println("Missing: " + numberSystem + "\t" + errors);
197         }
198         punctuation.removeAll(BIDI_CONTROL);
199         return Pair.of(digits, punctuation);
200     }
201 
add(CLDRFile cldrFileToCheck, String subtype, String numberSystem, UnicodeSet punctuation, Set<String> errors)202     private static void add(CLDRFile cldrFileToCheck, String subtype, String numberSystem, UnicodeSet punctuation, Set<String> errors) {
203         final String result = getSymbolString(cldrFileToCheck, subtype, numberSystem);
204         if (result == null) {
205             errors.add(subtype);
206         } else {
207             punctuation.addAll(result);
208         }
209     }
210 
getSymbolString(CLDRFile cldrFile, String key, String numsys)211     private static String getSymbolString(CLDRFile cldrFile, String key, String numsys) {
212         return cldrFile.getWinningValue("//ldml/numbers/symbols[@numberSystem=\"" + numsys + "\"]/" + key);
213     }
214 
215     static final class Data {
Data(String path2, String stringValue)216         public Data(String path2, String stringValue) {
217             path = path2;
218             value = stringValue;
219         }
220 
221         final String path;
222         final String value;
223 
toString()224         public String toString() {
225             return "locale=en"
226                 + " ; action=add"
227                 + " ; new_path=" + path
228                 + " ; new_value=" + value;
229         }
230     }
231 
getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra)232     private static Set<String> getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra) {
233         Set<String> seen = new TreeSet<String>();
234         for (String path : cldrFile) {
235             if (!path.contains("/unit")) {
236                 continue;
237             }
238             XPathParts parts = XPathParts.getFrozenInstance(path);
239             String unit = parts.findAttributeValue("unit", "type");
240             if (unit == null) {
241                 continue;
242             }
243             String key = unit;
244             if (task == Task.listUnits) {
245                 String length = parts.findAttributeValue("unitLength", "type");
246                 String per = "perUnitPattern".equals(parts.getElement(-1)) ? "per" : "";
247                 key = unit + "/" + length + "/" + per;
248             }
249             seen.add(key);
250             if (extra != null && !path.endsWith("/alias")) {
251                 extra.put(key, new Data(path, cldrFile.getStringValue(path)));
252             }
253         }
254         return seen;
255     }
256 }
257