1 package org.unicode.cldr.tool;
2 
3 import java.io.IOException;
4 import java.io.StringWriter;
5 import java.math.BigDecimal;
6 import java.util.HashSet;
7 import java.util.List;
8 import java.util.Map;
9 import java.util.Set;
10 import java.util.TreeMap;
11 import java.util.TreeSet;
12 
13 import org.unicode.cldr.draft.FileUtilities;
14 import org.unicode.cldr.draft.Keyboard;
15 import org.unicode.cldr.util.CLDRConfig;
16 import org.unicode.cldr.util.CLDRFile;
17 import org.unicode.cldr.util.LanguageTagCanonicalizer;
18 import org.unicode.cldr.util.LanguageTagParser;
19 import org.unicode.cldr.util.SupplementalDataInfo;
20 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
21 
22 import com.google.common.base.Splitter;
23 import com.google.common.collect.Multimap;
24 import com.google.common.collect.TreeMultimap;
25 import com.google.gson.Gson;
26 import com.google.gson.stream.JsonReader;
27 import com.google.gson.stream.JsonToken;
28 import com.google.gson.stream.JsonWriter;
29 
30 public class KeymanCheck {
31     static Splitter SPACE = Splitter.on(' ').trimResults().omitEmptyStrings();
32     static final List<String> board = SPACE.splitToList(
33         "aa-ET ab-GE an-ES apa-US ar-AE ar-DZ ar-IL ar-IQ ar-JO ar-KW ar-LB ar-LY ar-QA ar-SA ar-SD ar-SY ar-TN ar-YE arn-CL ast-ES av-RU awa-IN ay-BO az-AZ az-IR ba-RU bal-PK bar-AT bfy-IN bgc-IN bgq-IN bgq-PK bh-IN bho-IN bho-MU bho-NP bi-VU bm-ML bn-IN bns-IN bo-CN br-FR brx-Beng-IN brx-Deva-IN brx-Latn-IN bs-BA bug-ID ca-AD ca-ES ce-RU ceb-PH ch-GU cho-US chr-US cmn-Hans-SG cmn-Hant-HK co-FR cr-Cans-CA cr-Latn-CA cv-RU cy-GB dak-US de-AT de-CH de-LI de-LU dhd-IN din-SD doi-Arab-IN doi-Deva-IN dv-MV dz-BT ee-GH en-BD en-BM en-HK en-IE en-MY en-NZ en-PK en-SD en-SG en-TT en-TZ en-UG eo-001 es-419 es-BO es-CL es-CO es-CR es-CU es-DO es-EC es-GQ es-GT es-HN es-NI es-PA es-PE es-PR es-PY es-SV es-UY es-VE esu-US fa-AF ff-011 fj-FJ fo-FO fr-BE fr-CH fr-CI fr-LU fr-SN fy-NL ga-IE gbm-IN gd-GB gn-PG gv-IM gyn-GY ha-NG haw-US hi-Latn-IN hil-PH hmn-CN hne-IN ho-PG hoj-IN hsb-DE hu-SK hy-AM hz-NA ia-001 ig-NG ii-CN ik-US ilo-PH it-CH it-MT iu-Cans-CA iu-Latn-CA jbo-001 kfy-IN kg-CD ki-KE kj-AO kl-GL kok-Deva-IN kok-Knda-IN kok-Latn-IN kr-NG kri-SL ks-Arab-IN ks-Deva-IN ktu-CD ku-Arab-IQ ku-Latn-IQ kv-RU ky-KG la-001 lb-LU lg-UG li-NL lis-CN lkt-US lmn-IN lmo-IT ln-CD lu-CD mad-ID mag-IN mag-NP mai-NP mg-MG mh-MH mi-NZ min-ID ml-IN mn-CN mni-Beng-IN mrj-RU ms-SG mt-MT mtr-IN mup-IN mus-US mwr-IN na-NR nah-MX nap-IT nd-ZW nds-DE ne-IN new-NP nl-BE nn-NO noe-IN nr-ZA nso-ZA nv-US ny-MW oc-FR oj-CA om-ET os-RU pa-Arab-PK pap-CW pms-IT ps-AF pt-AO pt-TL qu-BO quc-GT raj-PK rm-CH rn-BI ro-MD ru-BY ru-KZ ru-UA sa-IN sah-RU sat-Beng-IN sat-Deva-IN sat-Latn-IN sat-Olck-IN sc-IT sck-IN scn-IT sco-GB sd-Arab-IN sd-Deva-IN sd-PK se-NO see-US sg-CF sgs-LT sjp-BD sm-WS sn-ZW sr-Cyrl-ME sr-Cyrl-RS sr-Latn-ME ss-ZA st-ZA sv-FI sw-KE sw-TZ sw-UG syl-BD ta-LK ta-SG tcy-IN tet-TL ti-ET tk-TM tn-BW tn-ZA to-TO tpi-PG ts-ZA tt-RU tw-GH ty-PF ug-CN ur-IN var-IN ve-ZA vec-IT wa-BE war-PH wen-DE wo-SN xh-ZA xnr-IN yi-US yo-NG");
34 
35     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = CLDRConfig.getInstance().getSupplementalDataInfo();
36 
main(String[] args)37     public static void main(String[] args) throws IOException {
38         Gson gson = new Gson();
39         JsonReader reader = gson.newJsonReader(FileUtilities.openFile("/Users/markdavis/Google Drive/workspace/DATA/cldr/", "keyman.json"));
40         final StringWriter stringWriter = new StringWriter();
41         JsonWriter writer = gson.newJsonWriter(stringWriter);
42         writer.setIndent("  ");
43         prettyprint(reader, writer);
44         reader.close();
45         writer.close();
46         //System.out.println(stringWriter);
47     }
48 
49     static LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer();
50 
prettyprint(JsonReader reader, JsonWriter writer)51     static void prettyprint(JsonReader reader, JsonWriter writer) throws IOException {
52         boolean afterId = false;
53         boolean afterName = false;
54         boolean afterLanguage = false;
55         String lastId = null;
56         Multimap<String, String> languageIdToName = TreeMultimap.create();
57 
58         main: while (true) {
59             JsonToken token = reader.peek();
60             switch (token) {
61             case BEGIN_ARRAY:
62                 reader.beginArray();
63                 writer.beginArray();
64                 break;
65             case END_ARRAY:
66                 reader.endArray();
67                 writer.endArray();
68                 afterLanguage = false;
69                 break;
70             case BEGIN_OBJECT:
71                 reader.beginObject();
72                 writer.beginObject();
73                 break;
74             case END_OBJECT:
75                 reader.endObject();
76                 writer.endObject();
77                 break;
78             case NAME:
79                 String name = reader.nextName();
80                 switch (name) {
81                 case "id":
82                     afterId = afterLanguage;
83                     break;
84                 case "name":
85                     afterName = afterLanguage;
86                     break;
87                 case "languages":
88                     afterLanguage = true;
89                     break;
90                 }
91                 writer.name(name);
92                 break;
93             case STRING:
94                 String s = reader.nextString();
95                 writer.value(s);
96                 if (afterId) {
97                     lastId = ltc.transform(s);
98                     afterId = false;
99                 } else if (afterName) {
100                     languageIdToName.put(lastId, s);
101                     afterName = false;
102                 }
103                 break;
104             case NUMBER:
105                 String n = reader.nextString();
106                 writer.value(new BigDecimal(n));
107                 break;
108             case BOOLEAN:
109                 boolean b = reader.nextBoolean();
110                 writer.value(b);
111                 break;
112             case NULL:
113                 reader.nextNull();
114                 writer.nullValue();
115                 break;
116             case END_DOCUMENT:
117                 break main;
118             }
119         }
120         int count = 0;
121         CLDRFile en = CLDRConfig.getInstance().getEnglish();
122         TreeMultimap<String, String> keyboardLangs = TreeMultimap.create();
123         for (String kpid : Keyboard.getPlatformIDs()) {
124             for (String kid : Keyboard.getKeyboardIDs(kpid)) {
125                 keyboardLangs.put(ltp.set(kid).getLanguageScript(), kid);
126             }
127         }
128 
129         LikelySubtags likely = new LikelySubtags();
130         LanguageTagParser ltp = new LanguageTagParser();
131 
132         Set<String> minBoard = new HashSet<>();
133         for (String boardId : board) {
134             ltp.set(boardId);
135             ltp.setRegion("");
136             String min = ltc.transform(ltp.toString());
137             minBoard.add(min == null ? boardId : min);
138         }
139 
140         TreeSet<String> langs = new TreeSet<>();
141         langs.addAll(keyboardLangs.keySet());
142         langs.addAll(languageIdToName.keySet());
143         langs.addAll(minBoard);
144         for (String lang : langs) {
145             PopulationData pop = getPopulationData(lang);
146             System.out.println(
147 //                ++count
148 //                 + "\t" +
149                 en.getName(lang)
150                     + "\t" + lang
151                     + "\t" + (pop != null ? (long) pop.getLiteratePopulation() : "-1")
152                     + "\t" + (keyboardLangs.containsKey(lang) ? "CLDR" : "")
153                     + "\t" + (languageIdToName.containsKey(lang) ? "SIL" : "")
154                     + "\t" + (minBoard.contains(lang) ? "GB" : ""));
155         }
156     }
157 
158     static LanguageTagParser ltp = new LanguageTagParser();
159     static LikelySubtags ls = new LikelySubtags();
160     static Map<String, String> unfixedData = new TreeMap<>();
161     static {
162         for (String s : SUPPLEMENTAL_DATA_INFO.getLanguagesForTerritoriesPopulationData()) {
163             String fixed = ltc.transform(s);
164             if (!fixed.equals(s)) {
unfixedData.put(fixed, s)165                 unfixedData.put(fixed, s);
166                 System.out.println(s + " => " + fixed);
167             }
168         }
169     }
170 
getPopulationData(String lang)171     private static PopulationData getPopulationData(String lang) {
172         PopulationData pop = SUPPLEMENTAL_DATA_INFO.getLanguagePopulationData(lang);
173         if (pop == null) {
174             String unfixed = unfixedData.get(lang);
175             if (unfixed != null) {
176                 pop = SUPPLEMENTAL_DATA_INFO.getLanguagePopulationData(unfixed);
177             }
178         }
179 //        if (pop == null) {
180 //            final String maximize = ls.maximize(lang);
181 //            if (maximize != null) {
182 //                ltp.set(maximize);
183 //                SUPPLEMENTAL_DATA_INFO.getLanguagePopulationData(ltp.getLanguageScript());
184 //            }
185 //        }
186         return pop;
187     }
188 }
189