package org.unicode.cldr.tool; import java.io.IOException; import java.io.PrintWriter; import java.lang.invoke.MethodHandles; import java.util.Collection; import java.util.Date; import java.util.EnumMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import org.unicode.cldr.draft.FileUtilities; import org.unicode.cldr.draft.ScriptMetadata; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CLDRTool; import org.unicode.cldr.util.DtdType; import org.unicode.cldr.util.StandardCodes; import org.unicode.cldr.util.StandardCodes.LstrField; import org.unicode.cldr.util.StandardCodes.LstrType; import org.unicode.cldr.util.StringRange; import org.unicode.cldr.util.StringRange.Adder; import org.unicode.cldr.util.SupplementalDataInfo; import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; import org.unicode.cldr.util.Validity; import org.unicode.cldr.util.Validity.Status; import com.google.common.base.Joiner; import com.google.common.base.Objects; import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSetMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.Multimaps; import com.google.common.collect.SetMultimap; import com.google.common.collect.TreeMultimap; import com.ibm.icu.impl.Row.R2; import com.ibm.icu.util.ICUUncheckedIOException; @CLDRTool( alias = "generate-validity-data", url = "http://cldr.unicode.org/development/updating-codes/update-validity-xml") public class GenerateValidityXml { private static final Validity VALIDITY = Validity.getInstance(); private static Validity OLD_VALIDITY = Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/"); private static final Map>> LSTREG = StandardCodes.getEnumLstreg(); private static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance(); private static class MyAdder implements Adder { Appendable target; boolean twoCodePoints = false; long lastCodePoint = -1; @Override public void add(String start, String end) { try { long firstCodePoint = start.codePointAt(0); if (twoCodePoints) { firstCodePoint <<= 22; firstCodePoint |= start.codePointAt(1); } if (firstCodePoint == lastCodePoint) { target.append(' '); } else { target.append("\n\t\t\t"); } target.append(start); if (end != null) { target.append('~').append(end); } lastCodePoint = firstCodePoint; } catch (IOException e) { throw new ICUUncheckedIOException(e); } } public void reset(boolean b) { lastCodePoint = -1; twoCodePoints = b; } } static Set containment = SDI.getContainers(); static Map> codeToData = LSTREG.get(LstrType.region); static class Info { String mainComment; //private Relation statusMap = Relation.of(new EnumMap>(Validity.Status.class), TreeSet.class); Map codeToStatus = new TreeMap<>(); Map statusComment = new EnumMap<>(Status.class); Set newCodes = new TreeSet<>(); static Map types = new LinkedHashMap<>(); static Info getInfo(String myType) { Info info = types.get(myType); if (info == null) { types.put(myType, info = new Info()); } return info; } public SetMultimap getStatusMap() { TreeMultimap result = TreeMultimap.create(); Multimaps.invertFrom(Multimaps.forMap(codeToStatus), result); return ImmutableSetMultimap.copyOf(result); } public void put(String key, Status value) { codeToStatus.put(key, value); } public void remove(String key, Status value) { codeToStatus.remove(key, value); } public void clear() { codeToStatus.clear(); } public Set> entrySet() { return codeToStatus.entrySet(); } public Status get(String key) { return codeToStatus.get(key); } public void putBest(String currency, Status newStatus) { Status oldStatus = get(currency); if (oldStatus == null || newStatus.compareTo(oldStatus) < 0) { put(currency, newStatus); } } } static final Map types = Info.types; public static void main(String[] args) throws IOException { doLstr(types); doSubdivisions(types); doCurrency(types); // write file MyAdder adder = new MyAdder(); for (Entry entry : types.entrySet()) { String type = entry.getKey(); final Info info = entry.getValue(); Multimap subtypeMap = info.getStatusMap(); try (PrintWriter output = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "validity/" + type + ".xml")) { adder.target = output; output.append(DtdType.supplementalData.header(MethodHandles.lookup().lookupClass()) + "\t\n" + "\t\n"); for (Entry> entry2 : subtypeMap.asMap().entrySet()) { Validity.Status subtype = entry2.getKey(); Set set = (Set) entry2.getValue(); String comment = info.statusComment.get(entry2.getKey()); if (comment != null) { output.append("\t\t\n"); } output.append("\t\t"); final int size = set.size(); output.append("\t\t"); adder.reset(size > 600); // || type.equals("subdivision") StringRange.compact(set, adder, true); output.append("\n\t\t\n"); } // if (!info.newCodes.isEmpty()) { // output.append("\t\t\n"); // } output.append("\t\n\n"); } } // System.out.println("TODO: add Unknown subdivisions, add private_use currencies, ..."); } private static String showCodes(Set newCodes, String linePrefix) { StringBuilder result = new StringBuilder(); String last = ""; for (String s : newCodes) { String newPrefix = s.substring(0, s.indexOf('-')); if (last.equals(newPrefix)) { result.append(" "); } else { if (!last.isEmpty()) { result.append(linePrefix); } last = newPrefix; } result.append(s); } return result.toString(); } private static void doCurrency(Map types) { Info info = Info.getInfo("currency"); Date now = new Date(); Date eoy = new Date(now.getYear() + 1, 0, 1); // Dec for (String region : SDI.getCurrencyTerritories()) { for (CurrencyDateInfo data : SDI.getCurrencyDateInfo(region)) { String currency = data.getCurrency(); Date end = data.getEnd(); boolean legalTender = data.isLegalTender(); Status newStatus = end.after(eoy) && legalTender ? Status.regular : Status.deprecated; info.putBest(currency, newStatus); } } info.put(LstrType.currency.unknown, Status.unknown); // make sure we don't overlap. // we want to keep any code that is valid in any territory, so info.remove("XXX", Status.deprecated); info.remove("XXX", Status.regular); // just to make sure info never disappears Map oldCodes = OLD_VALIDITY.getCodeToStatus(LstrType.currency); for (Entry entry : oldCodes.entrySet()) { String key = entry.getKey(); Status oldStatus = entry.getValue(); Status newStatus = info.get(key); if (!Objects.equal(oldStatus, newStatus)) { System.out.println("Status changed: " + key + ", " + oldStatus + " => " + newStatus); } } info.statusComment.put(Status.deprecated, "Deprecated values are those that are not legal tender in some country after " + (1900 + now.getYear()) + ".\n" + "More detailed usage information needed for some implementations is in supplemental data."); } private static void doSubdivisions(Map types) { Info info = Info.getInfo("subdivision"); Map, String>> aliases = SDI.getLocaleAliasInfo().get("subdivision"); for (String container : SDI.getContainersForSubdivisions()) { for (String contained : SDI.getContainedSubdivisions(container)) { Status status = aliases.containsKey(contained) ? Validity.Status.deprecated : Validity.Status.regular; info.put(contained.toLowerCase(Locale.ROOT).replace("-", ""), status); } } // find out which items were valid, but are no longer in the containment map // add them as deprecated Map> oldSubdivisionData = OLD_VALIDITY.getStatusToCodes(LstrType.subdivision); for (Entry> entry : oldSubdivisionData.entrySet()) { for (String oldSdId : entry.getValue()) { if (info.get(oldSdId) == null) { info.put(oldSdId, Status.deprecated); } } } info.statusComment.put(Status.deprecated, "Deprecated values include those that are not formally deprecated in the country in question, but have their own region codes.\n" + "It also include codes that were previously in CLDR, for compatibility."); info.statusComment.put(Status.unknown, "Unknown/Undetermined subdivision codes (ZZZZ) are defined for all regular region codes."); } private static void doLstr(Map types) throws IOException { Set skippedScripts = new TreeSet<>(); for (Entry>> entry : LSTREG.entrySet()) { LstrType type = entry.getKey(); if (!type.isLstr || !type.isUnicode) { continue; } Info info = Info.getInfo(type.toString()); Map, String>> aliases = SDI.getLocaleAliasInfo().get(type == LstrType.region ? "territory" : type.toString()); if (aliases == null) { System.out.println("No aliases for: " + type); } // gather data info.clear(); for (Entry> entry2 : entry.getValue().entrySet()) { String code = entry2.getKey(); if (type == LstrType.language && code.startsWith("bh")) { int debug = 0; } Map data = entry2.getValue(); Validity.Status subtype = Validity.Status.regular; if (code.equals(type.unknown)) { subtype = Validity.Status.unknown; } else if (type.specials.contains(code)) { subtype = Validity.Status.special; } else if (aliases != null && aliases.containsKey(code)) { subtype = Validity.Status.deprecated; } else if (data.get(LstrField.Description).startsWith("Private use")) { subtype = Validity.Status.private_use; } switch (type) { case language: if (code.equals("root")) { continue; } break; case region: if (containment.contains(code)) { subtype = Validity.Status.macroregion; } else if (code.equals("XA") || code.equals("XB")) { subtype = Validity.Status.special; } if (subtype == Status.regular) { Info subInfo = Info.getInfo("subdivision"); subInfo.put(code.toLowerCase(Locale.ROOT) + "zzzz", Status.unknown); } break; case script: switch (code) { case "Qaag": case "Zsye": subtype = Status.special; break; default: if (subtype == Validity.Status.regular) { ScriptMetadata.Info scriptInfo = ScriptMetadata.getInfo(code); if (scriptInfo == null && !code.equals("Hrkt")) { skippedScripts.add(code); continue; } } break; } break; case variant: if (VARIANT_EXTRAS.contains(code)) { continue; } default: break; } info.put(code, subtype); } } System.out.println("Skipping non-Unicode scripts: " + Joiner.on(' ').join(skippedScripts)); } static final Set VARIANT_EXTRAS = ImmutableSet.of("POSIX", "REVISED", "SAAHO"); }