1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.util.Collection;
5 import java.util.HashSet;
6 import java.util.Map;
7 import java.util.Map.Entry;
8 import java.util.Set;
9 import java.util.TreeMap;
10 
11 import org.unicode.cldr.util.CLDRConfig;
12 import org.unicode.cldr.util.CLDRFile;
13 import org.unicode.cldr.util.CLDRFile.WinningChoice;
14 import org.unicode.cldr.util.CLDRPaths;
15 import org.unicode.cldr.util.Factory;
16 import org.unicode.cldr.util.Iso639Data;
17 import org.unicode.cldr.util.LanguageTagCanonicalizer;
18 import org.unicode.cldr.util.LanguageTagParser;
19 import org.unicode.cldr.util.SimpleFactory;
20 import org.unicode.cldr.util.StandardCodes;
21 import org.unicode.cldr.util.StandardCodes.LstrField;
22 import org.unicode.cldr.util.StandardCodes.LstrType;
23 import org.unicode.cldr.util.SupplementalDataInfo;
24 
25 import com.google.common.collect.ImmutableMap;
26 import com.google.common.collect.ImmutableMultimap;
27 import com.google.common.collect.Multimap;
28 import com.google.common.collect.TreeMultimap;
29 import com.ibm.icu.lang.UScript;
30 import com.ibm.icu.text.UnicodeSet;
31 
32 public class DeriveScripts {
33     private static final boolean SHOW = false;
34 
35     static final CLDRConfig CONFIG = CLDRConfig.getInstance();
36     static final SupplementalDataInfo SUP = CONFIG.getSupplementalDataInfo();
37     static final Multimap<String, String> LANG_TO_SCRIPT;
38     static final Map<String, String> SUPPRESS;
39 
40     static {
41         File[] paths = {
42 //            new File(CLDRPaths.MAIN_DIRECTORY),
43 //            new File(CLDRPaths.SEED_DIRECTORY),
44             new File(CLDRPaths.EXEMPLARS_DIRECTORY) };
45         final Factory fullCldrFactory = SimpleFactory.make(paths, ".*");
46         LikelySubtags ls = new LikelySubtags();
47         LanguageTagParser ltp = new LanguageTagParser();
48         Set<String> seen = new HashSet<>();
49 
50         Multimap<String, String> langToScript = TreeMultimap.create();
51 
52         Map<String, String> suppress = new TreeMap<>();
53         final Map<String, Map<LstrField, String>> langToInfo = StandardCodes.getLstregEnumRaw().get(LstrType.language);
54         for (Entry<String, Map<LstrField, String>> entry : langToInfo.entrySet()) {
55             final String suppressValue = entry.getValue().get(LstrField.Suppress_Script);
56             if (suppressValue != null) {
57                 final String langCode = entry.getKey();
58                 String likelyScript = ls.getLikelyScript(langCode);
59                 if (!likelyScript.equals("Zzzz")) {
60 //                    if (!suppressValue.equals(likelyScript)) {
61 //                        System.out.println("#" + langCode + "\tWarning: likely=" + likelyScript + ", suppress=" + suppressValue);
62 //                    } else {
63 //                        System.out.println("#" + langCode + "\tSuppress=Likely: " + suppressValue);
64 //                    }
65                     continue;
66                 }
suppress.put(langCode, suppressValue)67                 suppress.put(langCode, suppressValue);
68             }
69         }
70         SUPPRESS = ImmutableMap.copyOf(suppress);
71 
72         LanguageTagCanonicalizer canon = new LanguageTagCanonicalizer();
73 
74         for (String file : fullCldrFactory.getAvailable()) {
75             String langScript = ltp.set(file).getLanguage();
76             if (!file.equals(langScript)) { // skip other variants
77                 continue;
78             }
79 //            System.out.println(file);
80 //            if (!seen.add(lang)) { // add if not present
81 //                continue;
82 //            }
83             String lang = canon.transform(ltp.getLanguage());
84             if (lang.equals("root")) {
85                 continue;
86             }
87 
88 //            String likelyScript = ls.getLikelyScript(lang);
89 //            if (!likelyScript.equals("Zzzz")) {
90 //                continue;
91 //            }
92 
93             String script = "";
94 //            script = ltp.getScript();
95 //            if (!script.isEmpty()) {
96 //                add(langToScript, lang, script);
97 //                continue;
98 //            }
99 
100             CLDRFile cldrFile = fullCldrFactory.make(lang, false);
101             UnicodeSet exemplars = cldrFile.getExemplarSet("", WinningChoice.WINNING);
102             for (String s : exemplars) {
103                 int scriptNum = UScript.getScript(s.codePointAt(0));
104                 if (scriptNum != UScript.COMMON && scriptNum != UScript.INHERITED && scriptNum != UScript.UNKNOWN) {
105                     script = UScript.getShortName(scriptNum);
106                     break;
107                 }
108             }
109             if (!script.isEmpty()) {
add(langToScript, lang, script)110                 add(langToScript, lang, script);
111             }
112         }
113         LANG_TO_SCRIPT = ImmutableMultimap.copyOf(langToScript);
114     }
115 
add(Multimap<String, String> langToScript, String lang, String script)116     private static void add(Multimap<String, String> langToScript, String lang, String script) {
117         if (script != null) {
118             if (langToScript.put(lang, script)) {
119                 if (SHOW) System.out.println("# Adding from actual exemplars: " + lang + ", " + script);
120             }
121         }
122     }
123 
getLanguageToScript()124     public static Multimap<String, String> getLanguageToScript() {
125         return LANG_TO_SCRIPT;
126     }
127 
showLine(String language, String scriptField, String status)128     public static void showLine(String language, String scriptField, String status) {
129         CLDRFile english = CONFIG.getEnglish();
130         System.out.println(language + ";\t" + scriptField + "\t# " + english.getName(CLDRFile.LANGUAGE_NAME, language)
131             + ";\t" + status
132             + ";\t" + Iso639Data.getScope(language)
133             + ";\t" + Iso639Data.getType(language));
134     }
135 
main(String[] args)136     public static void main(String[] args) {
137         LikelySubtags ls = new LikelySubtags();
138         CLDRFile english = CONFIG.getEnglish();
139         int count = 0;
140 
141         int i = 0;
142         System.out.println("#From Suppress Script");
143         for (Entry<String, String> entry : SUPPRESS.entrySet()) {
144             showLine(entry.getKey(), entry.getValue(), "Suppress");
145             ++i;
146         }
147         System.out.println("#total:\t" + i);
148         i = 0;
149         boolean haveMore = true;
150 
151         System.out.println("\n#From Exemplars");
152         for (int scriptCount = 1; haveMore; ++scriptCount) {
153             haveMore = false;
154             if (scriptCount != 1) {
155                 System.out.println("\n#NEEDS RESOLUTION:\t" + scriptCount + " scripts");
156             }
157             for (Entry<String, Collection<String>> entry : getLanguageToScript().asMap().entrySet()) {
158                 Collection<String> scripts = entry.getValue();
159                 final int scriptsSize = scripts.size();
160                 if (scriptsSize != scriptCount) {
161                     if (scriptsSize > scriptCount) {
162                         haveMore = true;
163                     }
164                     continue;
165                 }
166 
167                 String lang = entry.getKey();
168                 showLine(lang, scripts.size() == 1 ? scripts.iterator().next() : scripts.toString(), "Exemplars" + (scripts.size() == 1 ? "" : "*"));
169                 ++i;
170                 String likelyScript = scriptsSize == 1 ? "" : ls.getLikelyScript(lang);
171                 System.out.println(++count + "\t" + scriptsSize + "\t" + lang + "\t" + english.getName(lang)
172                     + "\t" + scripts + "\t" + likelyScript
173 //                + "\t" + script + "\t" + english.getName(CLDRFile.SCRIPT_NAME, script)
174                 );
175             }
176             System.out.println("#total:\t" + i);
177             i = 0;
178         }
179     }
180 
getSuppress()181     public static Map<String, String> getSuppress() {
182         return SUPPRESS;
183     }
184 }
185