1 package org.unicode.cldr.tool;
2 
3 import java.io.IOException;
4 import java.io.PrintWriter;
5 import java.lang.invoke.MethodHandles;
6 import java.util.Collection;
7 import java.util.Date;
8 import java.util.EnumMap;
9 import java.util.LinkedHashMap;
10 import java.util.List;
11 import java.util.Locale;
12 import java.util.Map;
13 import java.util.Map.Entry;
14 import java.util.Set;
15 import java.util.TreeMap;
16 import java.util.TreeSet;
17 
18 import org.unicode.cldr.draft.FileUtilities;
19 import org.unicode.cldr.draft.ScriptMetadata;
20 import org.unicode.cldr.util.CLDRPaths;
21 import org.unicode.cldr.util.CLDRTool;
22 import org.unicode.cldr.util.DtdType;
23 import org.unicode.cldr.util.StandardCodes;
24 import org.unicode.cldr.util.StandardCodes.LstrField;
25 import org.unicode.cldr.util.StandardCodes.LstrType;
26 import org.unicode.cldr.util.StringRange;
27 import org.unicode.cldr.util.StringRange.Adder;
28 import org.unicode.cldr.util.SupplementalDataInfo;
29 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
30 import org.unicode.cldr.util.Validity;
31 import org.unicode.cldr.util.Validity.Status;
32 
33 import com.google.common.base.Joiner;
34 import com.google.common.base.Objects;
35 import com.google.common.collect.ImmutableSet;
36 import com.google.common.collect.ImmutableSetMultimap;
37 import com.google.common.collect.Multimap;
38 import com.google.common.collect.Multimaps;
39 import com.google.common.collect.SetMultimap;
40 import com.google.common.collect.TreeMultimap;
41 import com.ibm.icu.impl.Row.R2;
42 import com.ibm.icu.util.ICUUncheckedIOException;
43 
44 @CLDRTool(
45     alias = "generate-validity-data",
46     url = "http://cldr.unicode.org/development/updating-codes/update-validity-xml")
47 public class GenerateValidityXml {
48 
49     private static final Validity VALIDITY = Validity.getInstance();
50     private static Validity OLD_VALIDITY = Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/");
51 
52     private static final Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG = StandardCodes.getEnumLstreg();
53     private static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance();
54 
55     private static class MyAdder implements Adder {
56         Appendable target;
57         boolean twoCodePoints = false;
58         long lastCodePoint = -1;
59 
60         @Override
add(String start, String end)61         public void add(String start, String end) {
62             try {
63                 long firstCodePoint = start.codePointAt(0);
64                 if (twoCodePoints) {
65                     firstCodePoint <<= 22;
66                     firstCodePoint |= start.codePointAt(1);
67                 }
68                 if (firstCodePoint == lastCodePoint) {
69                     target.append(' ');
70                 } else {
71                     target.append("\n\t\t\t");
72                 }
73                 target.append(start);
74                 if (end != null) {
75                     target.append('~').append(end);
76                 }
77                 lastCodePoint = firstCodePoint;
78             } catch (IOException e) {
79                 throw new ICUUncheckedIOException(e);
80             }
81         }
82 
reset(boolean b)83         public void reset(boolean b) {
84             lastCodePoint = -1;
85             twoCodePoints = b;
86         }
87     }
88 
89     static Set<String> containment = SDI.getContainers();
90     static Map<String, Map<LstrField, String>> codeToData = LSTREG.get(LstrType.region);
91 
92     static class Info {
93         String mainComment;
94         //private Relation<Validity.Status, String> statusMap = Relation.of(new EnumMap<Validity.Status, Set<String>>(Validity.Status.class), TreeSet.class);
95         Map<String, Validity.Status> codeToStatus = new TreeMap<>();
96         Map<Validity.Status, String> statusComment = new EnumMap<>(Status.class);
97         Set<String> newCodes = new TreeSet<>();
98 
99         static Map<String, Info> types = new LinkedHashMap<>();
100 
getInfo(String myType)101         static Info getInfo(String myType) {
102             Info info = types.get(myType);
103             if (info == null) {
104                 types.put(myType, info = new Info());
105             }
106             return info;
107         }
getStatusMap()108         public SetMultimap<Status, String> getStatusMap() {
109             TreeMultimap<Status, String> result = TreeMultimap.create();
110             Multimaps.invertFrom(Multimaps.forMap(codeToStatus), result);
111             return ImmutableSetMultimap.copyOf(result);
112         }
put(String key, Status value)113         public void put(String key, Status value) {
114             codeToStatus.put(key, value);
115         }
remove(String key, Status value)116         public void remove(String key, Status value) {
117             codeToStatus.remove(key, value);
118         }
clear()119         public void clear() {
120             codeToStatus.clear();
121         }
entrySet()122         public Set<Entry<String, Status>> entrySet() {
123             return codeToStatus.entrySet();
124         }
get(String key)125         public Status get(String key) {
126             return codeToStatus.get(key);
127         }
putBest(String currency, Status newStatus)128         public void putBest(String currency, Status newStatus) {
129             Status oldStatus = get(currency);
130             if (oldStatus == null || newStatus.compareTo(oldStatus) < 0) {
131                 put(currency, newStatus);
132             }
133         }
134     }
135 
136     static final Map<String, Info> types = Info.types;
137 
main(String[] args)138     public static void main(String[] args) throws IOException {
139 
140         doLstr(types);
141         doSubdivisions(types);
142         doCurrency(types);
143         // write file
144         MyAdder adder = new MyAdder();
145         for (Entry<String, Info> entry : types.entrySet()) {
146             String type = entry.getKey();
147             final Info info = entry.getValue();
148             Multimap<Status, String> subtypeMap = info.getStatusMap();
149             try (PrintWriter output = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "validity/" + type + ".xml")) {
150                 adder.target = output;
151                 output.append(DtdType.supplementalData.header(MethodHandles.lookup().lookupClass())
152                     + "\t<version number=\"$Revision" /*hack to stop SVN changing this*/ + "$\"/>\n"
153                     + "\t<idValidity>\n");
154                 for (Entry<Status, Collection<String>> entry2 : subtypeMap.asMap().entrySet()) {
155                     Validity.Status subtype = entry2.getKey();
156                     Set<String> set = (Set<String>) entry2.getValue();
157                     String comment = info.statusComment.get(entry2.getKey());
158                     if (comment != null) {
159                         output.append("\t\t<!-- " + comment.replace("\n", "\n\t\t\t ") + " -->\n");
160                     }
161                     output.append("\t\t<id type='" + type + "' idStatus='" + subtype + "'>");
162                     final int size = set.size();
163                     output.append("\t\t<!-- " + size + " item" + (size > 1 ? "s" : "") // we know it’s English ;-)
164                         + " -->");
165                     adder.reset(size > 600); //  || type.equals("subdivision")
166                     StringRange.compact(set, adder, true);
167                     output.append("\n\t\t</id>\n");
168                 }
169 //                if (!info.newCodes.isEmpty()) {
170 //                    output.append("\t\t<!-- Codes added this release:\n\t\t\t" + showCodes(info.newCodes, "\n\t\t\t") + "\n\t\t-->\n");
171 //                }
172                 output.append("\t</idValidity>\n</supplementalData>\n");
173             }
174         }
175         // System.out.println("TODO: add Unknown subdivisions, add private_use currencies, ...");
176     }
177 
showCodes(Set<String> newCodes, String linePrefix)178     private static String showCodes(Set<String> newCodes, String linePrefix) {
179         StringBuilder result = new StringBuilder();
180         String last = "";
181         for (String s : newCodes) {
182             String newPrefix = s.substring(0, s.indexOf('-'));
183             if (last.equals(newPrefix)) {
184                 result.append(" ");
185             } else {
186                 if (!last.isEmpty()) {
187                     result.append(linePrefix);
188                 }
189                 last = newPrefix;
190             }
191             result.append(s);
192         }
193         return result.toString();
194     }
195 
doCurrency(Map<String, Info> types)196     private static void doCurrency(Map<String, Info> types) {
197         Info info = Info.getInfo("currency");
198         Date now = new Date();
199         Date eoy = new Date(now.getYear() + 1, 0, 1); // Dec
200         for (String region : SDI.getCurrencyTerritories()) {
201             for (CurrencyDateInfo data : SDI.getCurrencyDateInfo(region)) {
202                 String currency = data.getCurrency();
203                 Date end = data.getEnd();
204                 boolean legalTender = data.isLegalTender();
205                 Status newStatus = end.after(eoy) && legalTender ? Status.regular : Status.deprecated;
206                 info.putBest(currency, newStatus);
207             }
208         }
209         info.put(LstrType.currency.unknown, Status.unknown);
210         // make sure we don't overlap.
211         // we want to keep any code that is valid in any territory, so
212         info.remove("XXX", Status.deprecated);
213         info.remove("XXX", Status.regular);
214 
215         // just to make sure info never disappears
216         Map<String, Status> oldCodes = OLD_VALIDITY.getCodeToStatus(LstrType.currency);
217         for (Entry<String, Status> entry : oldCodes.entrySet()) {
218             String key = entry.getKey();
219             Status oldStatus = entry.getValue();
220             Status newStatus = info.get(key);
221             if (!Objects.equal(oldStatus, newStatus)) {
222                 System.out.println("Status changed: " + key + ", " + oldStatus + " => " + newStatus);
223             }
224         }
225 
226         info.statusComment.put(Status.deprecated,
227             "Deprecated values are those that are not legal tender in some country after " + (1900 + now.getYear()) + ".\n"
228                 + "More detailed usage information needed for some implementations is in supplemental data.");
229     }
230 
doSubdivisions(Map<String, Info> types)231     private static void doSubdivisions(Map<String, Info> types) {
232         Info info = Info.getInfo("subdivision");
233         Map<String, R2<List<String>, String>> aliases = SDI.getLocaleAliasInfo().get("subdivision");
234         for (String container : SDI.getContainersForSubdivisions()) {
235             for (String contained : SDI.getContainedSubdivisions(container)) {
236                 Status status = aliases.containsKey(contained) ? Validity.Status.deprecated : Validity.Status.regular;
237                 info.put(contained.toLowerCase(Locale.ROOT).replace("-", ""), status);
238             }
239         }
240 
241         // find out which items were valid, but are no longer in the containment map
242         // add them as deprecated
243         Map<Status, Set<String>> oldSubdivisionData = OLD_VALIDITY.getStatusToCodes(LstrType.subdivision);
244         for (Entry<Status, Set<String>> entry : oldSubdivisionData.entrySet()) {
245             for (String oldSdId : entry.getValue()) {
246                 if (info.get(oldSdId) == null) {
247                     info.put(oldSdId, Status.deprecated);
248                 }
249             }
250         }
251 
252         info.statusComment.put(Status.deprecated,
253             "Deprecated values include those that are not formally deprecated in the country in question, but have their own region codes.\n"
254                 + "It also include codes that were previously in CLDR, for compatibility.");
255         info.statusComment.put(Status.unknown,
256             "Unknown/Undetermined subdivision codes (ZZZZ) are defined for all regular region codes.");
257     }
258 
doLstr(Map<String, Info> types)259     private static void doLstr(Map<String, Info> types) throws IOException {
260         Set<String> skippedScripts = new TreeSet<>();
261         for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : LSTREG.entrySet()) {
262             LstrType type = entry.getKey();
263             if (!type.isLstr || !type.isUnicode) {
264                 continue;
265             }
266             Info info = Info.getInfo(type.toString());
267             Map<String, R2<List<String>, String>> aliases = SDI.getLocaleAliasInfo().get(type == LstrType.region ? "territory" : type.toString());
268             if (aliases == null) {
269                 System.out.println("No aliases for: " + type);
270             }
271             // gather data
272             info.clear();
273             for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) {
274                 String code = entry2.getKey();
275                 if (type == LstrType.language && code.startsWith("bh")) {
276                     int debug = 0;
277                 }
278                 Map<LstrField, String> data = entry2.getValue();
279                 Validity.Status subtype = Validity.Status.regular;
280                 if (code.equals(type.unknown)) {
281                     subtype = Validity.Status.unknown;
282                 } else if (type.specials.contains(code)) {
283                     subtype = Validity.Status.special;
284                 } else if (aliases != null && aliases.containsKey(code)) {
285                     subtype = Validity.Status.deprecated;
286                 } else if (data.get(LstrField.Description).startsWith("Private use")) {
287                     subtype = Validity.Status.private_use;
288                 }
289                 switch (type) {
290                 case language:
291                     if (code.equals("root")) {
292                         continue;
293                     }
294                     break;
295                 case region:
296                     if (containment.contains(code)) {
297                         subtype = Validity.Status.macroregion;
298                     } else if (code.equals("XA") || code.equals("XB")) {
299                         subtype = Validity.Status.special;
300                     }
301                     if (subtype == Status.regular) {
302                         Info subInfo = Info.getInfo("subdivision");
303                         subInfo.put(code.toLowerCase(Locale.ROOT) + "zzzz", Status.unknown);
304                     }
305                     break;
306                 case script:
307                     switch (code) {
308                     case "Qaag":
309                     case "Zsye":
310                         subtype = Status.special;
311                         break;
312                     default:
313                         if (subtype == Validity.Status.regular) {
314                             ScriptMetadata.Info scriptInfo = ScriptMetadata.getInfo(code);
315                             if (scriptInfo == null && !code.equals("Hrkt")) {
316                                 skippedScripts.add(code);
317                                 continue;
318                             }
319                         }
320                         break;
321                     }
322                     break;
323                 case variant:
324                     if (VARIANT_EXTRAS.contains(code)) {
325                         continue;
326                     }
327                 default:
328                     break;
329                 }
330                 info.put(code, subtype);
331             }
332         }
333         System.out.println("Skipping non-Unicode scripts: " + Joiner.on(' ').join(skippedScripts));
334     }
335 
336     static final Set<String> VARIANT_EXTRAS = ImmutableSet.of("POSIX", "REVISED", "SAAHO");
337 }
338