1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.util.Collection;
5 import java.util.HashSet;
6 import java.util.Map;
7 import java.util.Map.Entry;
8 import java.util.Set;
9 import java.util.TreeMap;
10 
11 import org.unicode.cldr.util.CLDRConfig;
12 import org.unicode.cldr.util.CLDRFile;
13 import org.unicode.cldr.util.CLDRFile.WinningChoice;
14 import org.unicode.cldr.util.CLDRPaths;
15 import org.unicode.cldr.util.Factory;
16 import org.unicode.cldr.util.Iso639Data;
17 import org.unicode.cldr.util.LanguageTagCanonicalizer;
18 import org.unicode.cldr.util.LanguageTagParser;
19 import org.unicode.cldr.util.SimpleFactory;
20 import org.unicode.cldr.util.StandardCodes;
21 import org.unicode.cldr.util.StandardCodes.LstrField;
22 import org.unicode.cldr.util.StandardCodes.LstrType;
23 import org.unicode.cldr.util.SupplementalDataInfo;
24 
25 import com.google.common.collect.ImmutableMap;
26 import com.google.common.collect.ImmutableMultimap;
27 import com.google.common.collect.Multimap;
28 import com.google.common.collect.TreeMultimap;
29 import com.ibm.icu.lang.UScript;
30 import com.ibm.icu.text.UnicodeSet;
31 
32 public class DeriveScripts {
33     private static final boolean SHOW = false;
34 
35     static final CLDRConfig CONFIG = CLDRConfig.getInstance();
36     static final SupplementalDataInfo SUP = CONFIG.getSupplementalDataInfo();
37     static final Multimap<String, String> LANG_TO_SCRIPT;
38     static final Map<String, String> SUPPRESS;
39 
40     static {
41         File[] paths = {
42 //            new File(CLDRPaths.MAIN_DIRECTORY),
43 //            new File(CLDRPaths.SEED_DIRECTORY),
44             new File(CLDRPaths.EXEMPLARS_DIRECTORY) };
45         final Factory fullCldrFactory = SimpleFactory.make(paths, ".*");
46         LikelySubtags ls = new LikelySubtags();
47         LanguageTagParser ltp = new LanguageTagParser();
48         Set<String> seen = new HashSet<>();
49 
50         Multimap<String, String> langToScript = TreeMultimap.create();
51 
52         Map<String, String> suppress = new TreeMap<>();
53         final Map<String, Map<LstrField, String>> langToInfo = StandardCodes.getLstregEnumRaw().get(LstrType.language);
54         for (Entry<String, Map<LstrField, String>> entry : langToInfo.entrySet()) {
55             final String suppressValue = entry.getValue().get(LstrField.Suppress_Script);
56             if (suppressValue != null) {
57                 final String langCode = entry.getKey();
58                 String likelyScript = ls.getLikelyScript(langCode);
59                 if (!likelyScript.equals("Zzzz")) {
60 //                    if (!suppressValue.equals(likelyScript)) {
61 //                        System.out.println("#" + langCode + "\tWarning: likely=" + likelyScript + ", suppress=" + suppressValue);
62 //                    } else {
63 //                        System.out.println("#" + langCode + "\tSuppress=Likely: " + suppressValue);
64 //                    }
65                     continue;
66                 }
suppress.put(langCode, suppressValue)67                 suppress.put(langCode, suppressValue);
68             }
69         }
70         SUPPRESS = ImmutableMap.copyOf(suppress);
71 
72         LanguageTagCanonicalizer canon = new LanguageTagCanonicalizer();
73 
74         for (String file : fullCldrFactory.getAvailable()) {
75             String langScript = ltp.set(file).getLanguage();
76             if (!file.equals(langScript)) { // skip other variants
77                 continue;
78             }
79 //            System.out.println(file);
80 //            if (!seen.add(lang)) { // add if not present
81 //                continue;
82 //            }
83             String lang = canon.transform(ltp.getLanguage());
84             if (lang.equals("root")) {
85                 continue;
86             }
87 
88 //            String likelyScript = ls.getLikelyScript(lang);
89 //            if (!likelyScript.equals("Zzzz")) {
90 //                continue;
91 //            }
92 
93             String script = "";
94 //            script = ltp.getScript();
95 //            if (!script.isEmpty()) {
96 //                add(langToScript, lang, script);
97 //                continue;
98 //            }
99 
100             CLDRFile cldrFile;
101             try {
102                 cldrFile = fullCldrFactory.make(lang, false);
103             } catch(final SimpleFactory.NoSourceDirectoryException nsde) {
104                 throw new RuntimeException("Cannot load locale "+ lang+" for " + file
105                     + " (canonicalized from " + ltp.getLanguage()+")", nsde);
106             }
107             UnicodeSet exemplars = cldrFile.getExemplarSet("", WinningChoice.WINNING);
108             for (String s : exemplars) {
109                 int scriptNum = UScript.getScript(s.codePointAt(0));
110                 if (scriptNum != UScript.COMMON && scriptNum != UScript.INHERITED && scriptNum != UScript.UNKNOWN) {
111                     script = UScript.getShortName(scriptNum);
112                     break;
113                 }
114             }
115             if (!script.isEmpty()) {
add(langToScript, lang, script)116                 add(langToScript, lang, script);
117             }
118         }
119         LANG_TO_SCRIPT = ImmutableMultimap.copyOf(langToScript);
120     }
121 
add(Multimap<String, String> langToScript, String lang, String script)122     private static void add(Multimap<String, String> langToScript, String lang, String script) {
123         if (script != null) {
124             if (langToScript.put(lang, script)) {
125                 if (SHOW) System.out.println("# Adding from actual exemplars: " + lang + ", " + script);
126             }
127         }
128     }
129 
getLanguageToScript()130     public static Multimap<String, String> getLanguageToScript() {
131         return LANG_TO_SCRIPT;
132     }
133 
showLine(String language, String scriptField, String status)134     public static void showLine(String language, String scriptField, String status) {
135         CLDRFile english = CONFIG.getEnglish();
136         System.out.println(language + ";\t" + scriptField + "\t# " + english.getName(CLDRFile.LANGUAGE_NAME, language)
137             + ";\t" + status
138             + ";\t" + Iso639Data.getScope(language)
139             + ";\t" + Iso639Data.getType(language));
140     }
141 
main(String[] args)142     public static void main(String[] args) {
143         LikelySubtags ls = new LikelySubtags();
144         CLDRFile english = CONFIG.getEnglish();
145         int count = 0;
146 
147         int i = 0;
148         System.out.println("#From Suppress Script");
149         for (Entry<String, String> entry : SUPPRESS.entrySet()) {
150             showLine(entry.getKey(), entry.getValue(), "Suppress");
151             ++i;
152         }
153         System.out.println("#total:\t" + i);
154         i = 0;
155         boolean haveMore = true;
156 
157         System.out.println("\n#From Exemplars");
158         for (int scriptCount = 1; haveMore; ++scriptCount) {
159             haveMore = false;
160             if (scriptCount != 1) {
161                 System.out.println("\n#NEEDS RESOLUTION:\t" + scriptCount + " scripts");
162             }
163             for (Entry<String, Collection<String>> entry : getLanguageToScript().asMap().entrySet()) {
164                 Collection<String> scripts = entry.getValue();
165                 final int scriptsSize = scripts.size();
166                 if (scriptsSize != scriptCount) {
167                     if (scriptsSize > scriptCount) {
168                         haveMore = true;
169                     }
170                     continue;
171                 }
172 
173                 String lang = entry.getKey();
174                 showLine(lang, scripts.size() == 1 ? scripts.iterator().next() : scripts.toString(), "Exemplars" + (scripts.size() == 1 ? "" : "*"));
175                 ++i;
176                 String likelyScript = scriptsSize == 1 ? "" : ls.getLikelyScript(lang);
177                 System.out.println(++count + "\t" + scriptsSize + "\t" + lang + "\t" + english.getName(lang)
178                     + "\t" + scripts + "\t" + likelyScript
179 //                + "\t" + script + "\t" + english.getName(CLDRFile.SCRIPT_NAME, script)
180                 );
181             }
182             System.out.println("#total:\t" + i);
183             i = 0;
184         }
185     }
186 
getSuppress()187     public static Map<String, String> getSuppress() {
188         return SUPPRESS;
189     }
190 }
191