1 package org.unicode.cldr.tool; 2 3 import java.io.IOException; 4 import java.io.StringWriter; 5 import java.math.BigDecimal; 6 import java.util.HashSet; 7 import java.util.List; 8 import java.util.Map; 9 import java.util.Set; 10 import java.util.TreeMap; 11 import java.util.TreeSet; 12 13 import org.unicode.cldr.draft.FileUtilities; 14 import org.unicode.cldr.draft.Keyboard; 15 import org.unicode.cldr.util.CLDRConfig; 16 import org.unicode.cldr.util.CLDRFile; 17 import org.unicode.cldr.util.LanguageTagCanonicalizer; 18 import org.unicode.cldr.util.LanguageTagParser; 19 import org.unicode.cldr.util.SupplementalDataInfo; 20 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 21 22 import com.google.common.base.Splitter; 23 import com.google.common.collect.Multimap; 24 import com.google.common.collect.TreeMultimap; 25 import com.google.gson.Gson; 26 import com.google.gson.stream.JsonReader; 27 import com.google.gson.stream.JsonToken; 28 import com.google.gson.stream.JsonWriter; 29 30 public class KeymanCheck { 31 static Splitter SPACE = Splitter.on(' ').trimResults().omitEmptyStrings(); 32 static final List<String> board = SPACE.splitToList( 33 "aa-ET ab-GE an-ES apa-US ar-AE ar-DZ ar-IL ar-IQ ar-JO ar-KW ar-LB ar-LY ar-QA ar-SA ar-SD ar-SY ar-TN ar-YE arn-CL ast-ES av-RU awa-IN ay-BO az-AZ az-IR ba-RU bal-PK bar-AT bfy-IN bgc-IN bgq-IN bgq-PK bh-IN bho-IN bho-MU bho-NP bi-VU bm-ML bn-IN bns-IN bo-CN br-FR brx-Beng-IN brx-Deva-IN brx-Latn-IN bs-BA bug-ID ca-AD ca-ES ce-RU ceb-PH ch-GU cho-US chr-US cmn-Hans-SG cmn-Hant-HK co-FR cr-Cans-CA cr-Latn-CA cv-RU cy-GB dak-US de-AT de-CH de-LI de-LU dhd-IN din-SD doi-Arab-IN doi-Deva-IN dv-MV dz-BT ee-GH en-BD en-BM en-HK en-IE en-MY en-NZ en-PK en-SD en-SG en-TT en-TZ en-UG eo-001 es-419 es-BO es-CL es-CO es-CR es-CU es-DO es-EC es-GQ es-GT es-HN es-NI es-PA es-PE es-PR es-PY es-SV es-UY es-VE esu-US fa-AF ff-011 fj-FJ fo-FO fr-BE fr-CH fr-CI fr-LU fr-SN fy-NL ga-IE gbm-IN gd-GB gn-PG gv-IM gyn-GY ha-NG haw-US hi-Latn-IN hil-PH hmn-CN hne-IN ho-PG hoj-IN hsb-DE hu-SK hy-AM hz-NA ia-001 ig-NG ii-CN ik-US ilo-PH it-CH it-MT iu-Cans-CA iu-Latn-CA jbo-001 kfy-IN kg-CD ki-KE kj-AO kl-GL kok-Deva-IN kok-Knda-IN kok-Latn-IN kr-NG kri-SL ks-Arab-IN ks-Deva-IN ktu-CD ku-Arab-IQ ku-Latn-IQ kv-RU ky-KG la-001 lb-LU lg-UG li-NL lis-CN lkt-US lmn-IN lmo-IT ln-CD lu-CD mad-ID mag-IN mag-NP mai-NP mg-MG mh-MH mi-NZ min-ID ml-IN mn-CN mni-Beng-IN mrj-RU ms-SG mt-MT mtr-IN mup-IN mus-US mwr-IN na-NR nah-MX nap-IT nd-ZW nds-DE ne-IN new-NP nl-BE nn-NO noe-IN nr-ZA nso-ZA nv-US ny-MW oc-FR oj-CA om-ET os-RU pa-Arab-PK pap-CW pms-IT ps-AF pt-AO pt-TL qu-BO quc-GT raj-PK rm-CH rn-BI ro-MD ru-BY ru-KZ ru-UA sa-IN sah-RU sat-Beng-IN sat-Deva-IN sat-Latn-IN sat-Olck-IN sc-IT sck-IN scn-IT sco-GB sd-Arab-IN sd-Deva-IN sd-PK se-NO see-US sg-CF sgs-LT sjp-BD sm-WS sn-ZW sr-Cyrl-ME sr-Cyrl-RS sr-Latn-ME ss-ZA st-ZA sv-FI sw-KE sw-TZ sw-UG syl-BD ta-LK ta-SG tcy-IN tet-TL ti-ET tk-TM tn-BW tn-ZA to-TO tpi-PG ts-ZA tt-RU tw-GH ty-PF ug-CN ur-IN var-IN ve-ZA vec-IT wa-BE war-PH wen-DE wo-SN xh-ZA xnr-IN yi-US yo-NG"); 34 35 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = CLDRConfig.getInstance().getSupplementalDataInfo(); 36 main(String[] args)37 public static void main(String[] args) throws IOException { 38 Gson gson = new Gson(); 39 JsonReader reader = gson.newJsonReader(FileUtilities.openFile("/Users/markdavis/Google Drive/workspace/DATA/cldr/", "keyman.json")); 40 final StringWriter stringWriter = new StringWriter(); 41 JsonWriter writer = gson.newJsonWriter(stringWriter); 42 writer.setIndent(" "); 43 prettyprint(reader, writer); 44 reader.close(); 45 writer.close(); 46 //System.out.println(stringWriter); 47 } 48 49 static LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(); 50 prettyprint(JsonReader reader, JsonWriter writer)51 static void prettyprint(JsonReader reader, JsonWriter writer) throws IOException { 52 boolean afterId = false; 53 boolean afterName = false; 54 boolean afterLanguage = false; 55 String lastId = null; 56 Multimap<String, String> languageIdToName = TreeMultimap.create(); 57 58 main: while (true) { 59 JsonToken token = reader.peek(); 60 switch (token) { 61 case BEGIN_ARRAY: 62 reader.beginArray(); 63 writer.beginArray(); 64 break; 65 case END_ARRAY: 66 reader.endArray(); 67 writer.endArray(); 68 afterLanguage = false; 69 break; 70 case BEGIN_OBJECT: 71 reader.beginObject(); 72 writer.beginObject(); 73 break; 74 case END_OBJECT: 75 reader.endObject(); 76 writer.endObject(); 77 break; 78 case NAME: 79 String name = reader.nextName(); 80 switch (name) { 81 case "id": 82 afterId = afterLanguage; 83 break; 84 case "name": 85 afterName = afterLanguage; 86 break; 87 case "languages": 88 afterLanguage = true; 89 break; 90 } 91 writer.name(name); 92 break; 93 case STRING: 94 String s = reader.nextString(); 95 writer.value(s); 96 if (afterId) { 97 lastId = ltc.transform(s); 98 afterId = false; 99 } else if (afterName) { 100 languageIdToName.put(lastId, s); 101 afterName = false; 102 } 103 break; 104 case NUMBER: 105 String n = reader.nextString(); 106 writer.value(new BigDecimal(n)); 107 break; 108 case BOOLEAN: 109 boolean b = reader.nextBoolean(); 110 writer.value(b); 111 break; 112 case NULL: 113 reader.nextNull(); 114 writer.nullValue(); 115 break; 116 case END_DOCUMENT: 117 break main; 118 } 119 } 120 int count = 0; 121 CLDRFile en = CLDRConfig.getInstance().getEnglish(); 122 TreeMultimap<String, String> keyboardLangs = TreeMultimap.create(); 123 for (String kpid : Keyboard.getPlatformIDs()) { 124 for (String kid : Keyboard.getKeyboardIDs(kpid)) { 125 keyboardLangs.put(ltp.set(kid).getLanguageScript(), kid); 126 } 127 } 128 129 LikelySubtags likely = new LikelySubtags(); 130 LanguageTagParser ltp = new LanguageTagParser(); 131 132 Set<String> minBoard = new HashSet<>(); 133 for (String boardId : board) { 134 ltp.set(boardId); 135 ltp.setRegion(""); 136 String min = ltc.transform(ltp.toString()); 137 minBoard.add(min == null ? boardId : min); 138 } 139 140 TreeSet<String> langs = new TreeSet<>(); 141 langs.addAll(keyboardLangs.keySet()); 142 langs.addAll(languageIdToName.keySet()); 143 langs.addAll(minBoard); 144 for (String lang : langs) { 145 PopulationData pop = getPopulationData(lang); 146 System.out.println( 147 // ++count 148 // + "\t" + 149 en.getName(lang) 150 + "\t" + lang 151 + "\t" + (pop != null ? (long) pop.getLiteratePopulation() : "-1") 152 + "\t" + (keyboardLangs.containsKey(lang) ? "CLDR" : "") 153 + "\t" + (languageIdToName.containsKey(lang) ? "SIL" : "") 154 + "\t" + (minBoard.contains(lang) ? "GB" : "")); 155 } 156 } 157 158 static LanguageTagParser ltp = new LanguageTagParser(); 159 static LikelySubtags ls = new LikelySubtags(); 160 static Map<String, String> unfixedData = new TreeMap<>(); 161 static { 162 for (String s : SUPPLEMENTAL_DATA_INFO.getLanguagesForTerritoriesPopulationData()) { 163 String fixed = ltc.transform(s); 164 if (!fixed.equals(s)) { unfixedData.put(fixed, s)165 unfixedData.put(fixed, s); 166 System.out.println(s + " => " + fixed); 167 } 168 } 169 } 170 getPopulationData(String lang)171 private static PopulationData getPopulationData(String lang) { 172 PopulationData pop = SUPPLEMENTAL_DATA_INFO.getLanguagePopulationData(lang); 173 if (pop == null) { 174 String unfixed = unfixedData.get(lang); 175 if (unfixed != null) { 176 pop = SUPPLEMENTAL_DATA_INFO.getLanguagePopulationData(unfixed); 177 } 178 } 179 // if (pop == null) { 180 // final String maximize = ls.maximize(lang); 181 // if (maximize != null) { 182 // ltp.set(maximize); 183 // SUPPLEMENTAL_DATA_INFO.getLanguagePopulationData(ltp.getLanguageScript()); 184 // } 185 // } 186 return pop; 187 } 188 } 189