1 package org.unicode.cldr.tool;
2 
3 import java.io.IOException;
4 import java.lang.invoke.MethodHandles;
5 import java.util.Collection;
6 import java.util.Date;
7 import java.util.EnumMap;
8 import java.util.LinkedHashMap;
9 import java.util.List;
10 import java.util.Locale;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
16 
17 import org.unicode.cldr.draft.ScriptMetadata;
18 import org.unicode.cldr.util.CLDRPaths;
19 import org.unicode.cldr.util.CLDRTool;
20 import org.unicode.cldr.util.DtdType;
21 import org.unicode.cldr.util.StandardCodes;
22 import org.unicode.cldr.util.StandardCodes.LstrField;
23 import org.unicode.cldr.util.StandardCodes.LstrType;
24 import org.unicode.cldr.util.StringRange;
25 import org.unicode.cldr.util.StringRange.Adder;
26 import org.unicode.cldr.util.SupplementalDataInfo;
27 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
28 import org.unicode.cldr.util.TempPrintWriter;
29 import org.unicode.cldr.util.Validity;
30 import org.unicode.cldr.util.Validity.Status;
31 
32 import com.google.common.base.Joiner;
33 import com.google.common.base.Objects;
34 import com.google.common.collect.ImmutableSet;
35 import com.google.common.collect.ImmutableSetMultimap;
36 import com.google.common.collect.Multimap;
37 import com.google.common.collect.Multimaps;
38 import com.google.common.collect.SetMultimap;
39 import com.google.common.collect.TreeMultimap;
40 import com.ibm.icu.impl.Row.R2;
41 import com.ibm.icu.util.ICUUncheckedIOException;
42 
43 @CLDRTool(
44     alias = "generate-validity-data",
45     url = "http://cldr.unicode.org/development/updating-codes/update-validity-xml")
46 public class GenerateValidityXml {
47 
48     private static final Validity VALIDITY = Validity.getInstance();
49     private static Validity OLD_VALIDITY = Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/");
50 
51     private static final Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG = StandardCodes.getEnumLstreg();
52     private static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance();
53 
54     private static class MyAdder implements Adder {
55         Appendable target;
56         boolean twoCodePoints = false;
57         long lastCodePoint = -1;
58 
59         @Override
add(String start, String end)60         public void add(String start, String end) {
61             try {
62                 long firstCodePoint = start.codePointAt(0);
63                 if (twoCodePoints) {
64                     firstCodePoint <<= 22;
65                     firstCodePoint |= start.codePointAt(1);
66                 }
67                 if (firstCodePoint == lastCodePoint) {
68                     target.append(' ');
69                 } else {
70                     target.append("\n\t\t\t");
71                 }
72                 target.append(start);
73                 if (end != null) {
74                     target.append('~').append(end);
75                 }
76                 lastCodePoint = firstCodePoint;
77             } catch (IOException e) {
78                 throw new ICUUncheckedIOException(e);
79             }
80         }
81 
reset(boolean b)82         public void reset(boolean b) {
83             lastCodePoint = -1;
84             twoCodePoints = b;
85         }
86     }
87 
88     static Set<String> containment = SDI.getContainers();
89     static Map<String, Map<LstrField, String>> codeToData = LSTREG.get(LstrType.region);
90 
91     static class Info {
92         String mainComment;
93         //private Relation<Validity.Status, String> statusMap = Relation.of(new EnumMap<Validity.Status, Set<String>>(Validity.Status.class), TreeSet.class);
94         Map<String, Validity.Status> codeToStatus = new TreeMap<>();
95         Map<Validity.Status, String> statusComment = new EnumMap<>(Status.class);
96         Set<String> newCodes = new TreeSet<>();
97 
98         static Map<String, Info> types = new LinkedHashMap<>();
99 
getInfo(String myType)100         static Info getInfo(String myType) {
101             Info info = types.get(myType);
102             if (info == null) {
103                 types.put(myType, info = new Info());
104             }
105             return info;
106         }
getStatusMap()107         public SetMultimap<Status, String> getStatusMap() {
108             TreeMultimap<Status, String> result = TreeMultimap.create();
109             Multimaps.invertFrom(Multimaps.forMap(codeToStatus), result);
110             return ImmutableSetMultimap.copyOf(result);
111         }
put(String key, Status value)112         public void put(String key, Status value) {
113             codeToStatus.put(key, value);
114         }
remove(String key, Status value)115         public void remove(String key, Status value) {
116             codeToStatus.remove(key, value);
117         }
clear()118         public void clear() {
119             codeToStatus.clear();
120         }
entrySet()121         public Set<Entry<String, Status>> entrySet() {
122             return codeToStatus.entrySet();
123         }
get(String key)124         public Status get(String key) {
125             return codeToStatus.get(key);
126         }
putBest(String currency, Status newStatus)127         public void putBest(String currency, Status newStatus) {
128             Status oldStatus = get(currency);
129             if (oldStatus == null || newStatus.compareTo(oldStatus) < 0) {
130                 put(currency, newStatus);
131             }
132         }
133     }
134 
135     static final Map<String, Info> types = Info.types;
136 
main(String[] args)137     public static void main(String[] args) throws IOException {
138 
139         doLstr(types);
140         doSubdivisions(types);
141         doCurrency(types);
142         // write file
143         MyAdder adder = new MyAdder();
144         for (Entry<String, Info> entry : types.entrySet()) {
145             String type = entry.getKey();
146             final Info info = entry.getValue();
147             Multimap<Status, String> subtypeMap = info.getStatusMap();
148             try (TempPrintWriter output = TempPrintWriter.openUTF8Writer(CLDRPaths.COMMON_DIRECTORY, "validity/" + type + ".xml")) {
149                 adder.target = output;
150                 output.append(DtdType.supplementalData.header(MethodHandles.lookup().lookupClass())
151                     + "\t<version number=\"$Revision" + "$\"/>\n"
152                     + "\t<idValidity>\n");
153                 for (Entry<Status, Collection<String>> entry2 : subtypeMap.asMap().entrySet()) {
154                     Validity.Status subtype = entry2.getKey();
155                     Set<String> set = (Set<String>) entry2.getValue();
156                     String comment = info.statusComment.get(entry2.getKey());
157                     if (comment != null) {
158                         output.append("\t\t<!-- " + comment.replace("\n", "\n\t\t\t ") + " -->\n");
159                     }
160                     output.append("\t\t<id type='" + type + "' idStatus='" + subtype + "'>");
161                     final int size = set.size();
162                     output.append("\t\t<!-- " + size + " item" + (size > 1 ? "s" : "") // we know it’s English ;-)
163                         + " -->");
164                     adder.reset(size > 600); //  || type.equals("subdivision")
165                     StringRange.compact(set, adder, true);
166                     output.append("\n\t\t</id>\n");
167                 }
168 //                if (!info.newCodes.isEmpty()) {
169 //                    output.append("\t\t<!-- Codes added this release:\n\t\t\t" + showCodes(info.newCodes, "\n\t\t\t") + "\n\t\t-->\n");
170 //                }
171                 output.append("\t</idValidity>\n</supplementalData>\n");
172             }
173         }
174         // System.out.println("TODO: add Unknown subdivisions, add private_use currencies, ...");
175     }
176 
showCodes(Set<String> newCodes, String linePrefix)177     private static String showCodes(Set<String> newCodes, String linePrefix) {
178         StringBuilder result = new StringBuilder();
179         String last = "";
180         for (String s : newCodes) {
181             String newPrefix = s.substring(0, s.indexOf('-'));
182             if (last.equals(newPrefix)) {
183                 result.append(" ");
184             } else {
185                 if (!last.isEmpty()) {
186                     result.append(linePrefix);
187                 }
188                 last = newPrefix;
189             }
190             result.append(s);
191         }
192         return result.toString();
193     }
194 
doCurrency(Map<String, Info> types)195     private static void doCurrency(Map<String, Info> types) {
196         Info info = Info.getInfo("currency");
197         Date now = new Date();
198         Date eoy = new Date(now.getYear() + 1, 0, 1); // Dec
199         for (String region : SDI.getCurrencyTerritories()) {
200             for (CurrencyDateInfo data : SDI.getCurrencyDateInfo(region)) {
201                 String currency = data.getCurrency();
202                 Date end = data.getEnd();
203                 boolean legalTender = data.isLegalTender();
204                 Status newStatus = end.after(eoy) && legalTender ? Status.regular : Status.deprecated;
205                 info.putBest(currency, newStatus);
206             }
207         }
208         info.put(LstrType.currency.unknown, Status.unknown);
209         // make sure we don't overlap.
210         // we want to keep any code that is valid in any territory, so
211         info.remove("XXX", Status.deprecated);
212         info.remove("XXX", Status.regular);
213 
214         // just to make sure info never disappears
215         Map<String, Status> oldCodes = OLD_VALIDITY.getCodeToStatus(LstrType.currency);
216         for (Entry<String, Status> entry : oldCodes.entrySet()) {
217             String key = entry.getKey();
218             Status oldStatus = entry.getValue();
219             Status newStatus = info.get(key);
220             if (!Objects.equal(oldStatus, newStatus)) {
221                 System.out.println("Status changed: " + key + ", " + oldStatus + " => " + newStatus);
222             }
223         }
224 
225         info.statusComment.put(Status.deprecated,
226             "Deprecated values are those that are not legal tender in some country after " + (1900 + now.getYear()) + ".\n"
227                 + "More detailed usage information needed for some implementations is in supplemental data.");
228     }
229 
doSubdivisions(Map<String, Info> types)230     private static void doSubdivisions(Map<String, Info> types) {
231         Info info = Info.getInfo("subdivision");
232         Map<String, R2<List<String>, String>> aliases = SDI.getLocaleAliasInfo().get("subdivision");
233         for (String container : SDI.getContainersForSubdivisions()) {
234             for (String contained : SDI.getContainedSubdivisions(container)) {
235                 Status status = aliases.containsKey(contained) ? Validity.Status.deprecated : Validity.Status.regular;
236                 info.put(contained.toLowerCase(Locale.ROOT).replace("-", ""), status);
237             }
238         }
239 
240         // find out which items were valid, but are no longer in the containment map
241         // add them as deprecated
242         Map<Status, Set<String>> oldSubdivisionData = OLD_VALIDITY.getStatusToCodes(LstrType.subdivision);
243         for (Entry<Status, Set<String>> entry : oldSubdivisionData.entrySet()) {
244             for (String oldSdId : entry.getValue()) {
245                 if (info.get(oldSdId) == null) {
246                     info.put(oldSdId, Status.deprecated);
247                 }
248             }
249         }
250 
251         info.statusComment.put(Status.deprecated,
252             "Deprecated values include those that are not formally deprecated in the country in question, but have their own region codes.\n"
253                 + "It also include codes that were previously in CLDR, for compatibility.");
254         info.statusComment.put(Status.unknown,
255             "Unknown/Undetermined subdivision codes (ZZZZ) are defined for all regular region codes.");
256     }
257 
doLstr(Map<String, Info> types)258     private static void doLstr(Map<String, Info> types) throws IOException {
259         Set<String> skippedScripts = new TreeSet<>();
260         for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : LSTREG.entrySet()) {
261             LstrType type = entry.getKey();
262             if (!type.isLstr || !type.isUnicode) {
263                 continue;
264             }
265             Info info = Info.getInfo(type.toString());
266             Map<String, R2<List<String>, String>> aliases = SDI.getLocaleAliasInfo().get(type == LstrType.region ? "territory" : type.toString());
267             if (aliases == null) {
268                 System.out.println("No aliases for: " + type);
269             }
270             // gather data
271             info.clear();
272             for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) {
273                 String code = entry2.getKey();
274                 if (type == LstrType.language && code.equals("aam")
275                     || type == LstrType.variant && code.equals("arevela")
276                     || type == LstrType.extlang && code.equals("lsg")
277                     ) {
278                     int debug = 0;
279                 }
280                 Map<LstrField, String> data = entry2.getValue();
281                 Validity.Status subtype = Validity.Status.regular;
282                 if (code.equals(type.unknown)) {
283                     subtype = Validity.Status.unknown;
284                 } else if (type.specials.contains(code)) {
285                     subtype = Validity.Status.special;
286                 } else if (aliases != null && aliases.containsKey(code)
287                     || data.containsKey(LstrField.Deprecated)) {
288                     subtype = Validity.Status.deprecated;
289                 } else if (data.get(LstrField.Description).startsWith("Private use")) {
290                     subtype = Validity.Status.private_use;
291                 }
292                 switch (type) {
293                 case language:
294                     if (subtype == Status.private_use && code.compareTo("qfz") < 0) {
295                         subtype = Status.reserved;
296                     } else if (code.equals("root")) {
297                         continue;
298                     }
299                     break;
300                 case region:
301                     if (containment.contains(code)) {
302                         subtype = Validity.Status.macroregion;
303                     } else if (code.equals("XA") || code.equals("XB")) {
304                         subtype = Validity.Status.special;
305                     }
306                     switch (subtype) {
307                     case regular:
308                         Info subInfo = Info.getInfo("subdivision");
309                         subInfo.put(code.toLowerCase(Locale.ROOT) + "zzzz", Status.unknown);
310                         break;
311                     case private_use:
312                         if (code.compareTo("X") < 0) {
313                             subtype = Status.reserved;
314                         }
315                         break;
316                     default:
317                         break;
318                     }
319                     break;
320                 case script:
321                     switch (code) {
322                     case "Aran":
323                     case "Qaag":
324                     case "Zsye":
325                     case "Zanb":
326                     case "Zinh":
327                     case "Zyyy":
328                         subtype = Status.special;
329                         break;
330                     default:
331                         switch (subtype) {
332                         case private_use:
333                             if (code.compareTo("Qaaq") < 0) {
334                                 subtype = Validity.Status.reserved;
335                             }
336                             break;
337                         case regular:
338                             ScriptMetadata.Info scriptInfo = ScriptMetadata.getInfo(code);
339                             if (scriptInfo == null && !code.equals("Hrkt")) {
340                                 skippedScripts.add(code);
341                                 continue;
342                             }
343                             break;
344                         default: // don't care about rest
345                             break;
346                         }
347                         break;
348                     }
349                     break;
350                 case variant:
351                     if (VARIANT_EXTRAS.contains(code)) {
352                         continue;
353                     }
354                 default:
355                     break;
356                 }
357                 info.put(code, subtype);
358             }
359         }
360         System.out.println("Skipping non-Unicode scripts: " + Joiner.on(' ').join(skippedScripts));
361     }
362 
363     static final Set<String> VARIANT_EXTRAS = ImmutableSet.of("POSIX", "REVISED", "SAAHO");
364 }
365