package org.unicode.cldr.test; import java.io.BufferedReader; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import java.util.regex.Matcher; import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; import org.unicode.cldr.util.Factory; import org.unicode.cldr.util.Pair; import org.unicode.cldr.util.PatternCache; import org.unicode.cldr.util.StandardCodes; import org.unicode.cldr.util.SupplementalDataInfo; import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; import org.unicode.cldr.util.XPathParts; import com.ibm.icu.impl.Relation; public class TestSupplementalData { static CLDRFile english; private static SupplementalDataInfo supplementalData; private static StandardCodes sc; public static void main(String[] args) throws IOException { // genData(); // if (true) return; Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); english = cldrFactory.make("en", true); root = cldrFactory.make("root", true); supplementalData = SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY); sc = StandardCodes.make(); showMultiZones(); checkPlurals(); System.out.println("Skipped Elements: " + supplementalData.getSkippedElements()); checkAgainstLanguageScript(); checkTerritoryMapping(); checkTelephoneCodeData(); } private static void showMultiZones() { // reverse the list Relation territoryToZones = Relation. of(new TreeMap>(), TreeSet.class); for (String zone : supplementalData.getCanonicalZones()) { territoryToZones.put(supplementalData.getZone_territory(zone), zone); } // gather the data // this could be slightly simpler using supplementalData.get Set singulars = new TreeSet<>(); for (String region : territoryToZones.keySet()) { final Set zones = territoryToZones.getAll(region); if (zones.size() == 1 || region.equals("001")) { singulars.addAll(zones); continue; } System.out.println(region + "\t" + english.getName("territory", region)); System.out.println("\t" + zones); } XPathParts xpp = XPathParts.getFrozenInstance(root.getFullXPath("//ldml/dates/timeZoneNames/singleCountries")); List singleCountries = Arrays.asList(xpp.getAttributeValue(-1, "list").split("\\s+")); singulars.addAll(singleCountries); singulars.remove("Etc/Unknown"); // remove special case System.out.println("Excluded Zones (not necessary in Survey tool): " + singulars); Set otherExclusions = root.getExcludedZones(); if (!otherExclusions.equals(singulars)) { throw new IllegalArgumentException("problem with excluded zones"); } for (Iterator it = english.iterator("//ldml/dates/timeZoneNames/zone"); it.hasNext();) { String distinguishedPath = it.next(); if (root.isPathExcludedForSurvey(distinguishedPath)) { System.out.println("EX\t" + distinguishedPath); } else { System.out.println("\t" + distinguishedPath); } } } private static void checkPlurals() { Relation pluralsToLocale = Relation. of(new HashMap>(), TreeSet.class); for (String locale : new TreeSet<>(supplementalData.getPluralLocales())) { PluralInfo pluralInfo = supplementalData.getPlurals(locale); System.out.println(locale + ":\t" + pluralInfo); pluralsToLocale.put(pluralInfo, locale); } String locale = "en_US"; PluralInfo pluralInfo = supplementalData.getPlurals(locale); System.out.println(locale + ":\t" + pluralInfo); for (PluralInfo pluralInfo2 : pluralsToLocale.keySet()) { System.out.println("Locales: \t" + pluralsToLocale.getAll(pluralInfo2)); final Map typeToExamples = pluralInfo2.getCountToStringExamplesMap(); for (Count type : typeToExamples.keySet()) { System.out.println("\tPlural Code: \t" + type + " \t=>\t" + typeToExamples.get(type)); } System.out.println(); } } private static void checkTelephoneCodeData() { System.out.println("==== territories for telephoneCodeData ===="); System.out.println(supplementalData.getTerritoriesForTelephoneCodeInfo()); System.out.println("==== telephone code data for 001 ===="); System.out.println(supplementalData.getTelephoneCodeInfoForTerritory("001")); System.out.println("==== telephone code data for US ===="); System.out.println(supplementalData.getTelephoneCodeInfoForTerritory("US")); System.out.println("==== all telephoneCodeData ===="); System.out.println(supplementalData.getTerritoryToTelephoneCodeInfo()); } static Matcher numericTerritory = PatternCache.get("[0-9]{3}").matcher(""); private static CLDRFile root; private static void checkTerritoryMapping() { Relation alpha3 = supplementalData.getAlpha3TerritoryMapping(); Set temp = new TreeSet<>(sc.getAvailableCodes("territory")); for (Iterator it = temp.iterator(); it.hasNext();) { String code = it.next(); if (numericTerritory.reset(code).matches()) { it.remove(); continue; } // if (sc.getFullData("territory", code).get(0).equals("PRIVATE USE")) { // it.remove(); // continue; // } } showAnyDifferences("alpha3", alpha3.keySet(), "sc", temp); } private static void showAnyDifferences(String title, Set set, String title2, Set set2) { if (!set.equals(set2)) { showFirstMinusSecond("Failure " + title + "-" + title2 + ": ", set, set2); showFirstMinusSecond("Failure " + title2 + "-" + title + ": ", set2, set); } } private static void showFirstMinusSecond(String title, Set name, Set availableCodes) { Set temp = getFirstMinusSecond(name, availableCodes); if (!temp.isEmpty()) { System.out.println(title + getFirstMinusSecond(name, availableCodes)); } } private static Set getFirstMinusSecond(Set name, Set availableCodes) { Set temp = new TreeSet<>(name); temp.removeAll(availableCodes); return temp; } static void checkAgainstLanguageScript() { Relation otherTerritoryToLanguages = Relation. of(new TreeMap>(), TreeSet.class, null); // get other language data for (String language : sc.getGoodAvailableCodes("language")) { Set newLanguageData = supplementalData.getBasicLanguageData(language); if (newLanguageData != null) { for (BasicLanguageData languageData : newLanguageData) { Set territories = new TreeSet<>(languageData.getTerritories()); territories.addAll(languageData.getTerritories()); if (territories != null) { Set scripts = new TreeSet<>(languageData.getScripts()); scripts.addAll(languageData.getScripts()); if (scripts == null || scripts.size() < 2) { otherTerritoryToLanguages.putAll(territories, language); } else { for (String script : scripts) { otherTerritoryToLanguages.putAll(territories, language + "_" + script); } } } } } } // compare them, listing differences for (String territory : sc.getGoodAvailableCodes("territory")) { Set languages = supplementalData.getTerritoryToLanguages(territory); Set otherLanguages = otherTerritoryToLanguages.getAll(territory); if (otherLanguages == null) otherLanguages = Collections.emptySet(); if (!Objects.equals(languages, otherLanguages)) { Set languagesLeftover = new TreeSet<>(languages); languagesLeftover.removeAll(otherLanguages); Set otherLanguagesLeftover = new TreeSet<>(otherLanguages); otherLanguagesLeftover.removeAll(languages); String territoryString = english.getName(CLDRFile.TERRITORY_NAME, territory); if (otherLanguagesLeftover.size() != 0) { for (String other : otherLanguagesLeftover) { String name = english.getName(other); System.out.println(territoryString + "\t" + territory + "\t" + name + "\t" + other); } } } } } /** * Temporary function to transform data * * @throws IOException */ public static void genData() throws IOException { BufferedReader codes = CldrUtility.getUTF8Data("territory_codes.txt"); Set sorted = new TreeSet<>(); while (true) { String line = codes.readLine(); if (line == null) break; line = line.split("#")[0].trim(); if (line.length() == 0) continue; String[] sourceValues = line.split("\\s+"); String[] values = new String[5]; for (int i = 0; i < values.length; ++i) { if (i >= sourceValues.length || sourceValues[i].equals("-")) values[i] = null; else values[i] = sourceValues[i]; } String alpha2 = values[0]; String numeric = values[1]; String alpha3 = values[2]; String internet = values[3]; if (internet != null) { internet = internet.replace("/", " "); } if (internet != null) internet = internet.toUpperCase(); String fips10 = values[4]; Pair item = new Pair(alpha2, new Pair(numeric, new Pair(alpha3, new Pair(fips10, internet)))); sorted.add(item); } for (Pair item : sorted) { // System.out.print(""); } codes.close(); } private static void showNonNull(String title, Object first, Object noDup) { if (first != null && !first.equals(noDup)) { System.out.print(" " + title + "=\"" + first + "\""); } } }