1 package org.unicode.cldr.unittest;
2 
3 import java.util.Arrays;
4 import java.util.BitSet;
5 import java.util.Collections;
6 import java.util.EnumMap;
7 import java.util.HashSet;
8 import java.util.Iterator;
9 import java.util.LinkedHashSet;
10 import java.util.Map.Entry;
11 import java.util.Set;
12 import java.util.TreeSet;
13 
14 import org.unicode.cldr.draft.EnumLookup;
15 import org.unicode.cldr.draft.ScriptMetadata;
16 import org.unicode.cldr.draft.ScriptMetadata.IdUsage;
17 import org.unicode.cldr.draft.ScriptMetadata.Info;
18 import org.unicode.cldr.draft.ScriptMetadata.Shaping;
19 import org.unicode.cldr.draft.ScriptMetadata.Trinary;
20 import org.unicode.cldr.util.CLDRConfig;
21 import org.unicode.cldr.util.CLDRFile;
22 import org.unicode.cldr.util.Containment;
23 import org.unicode.cldr.util.StandardCodes;
24 import org.unicode.cldr.util.With;
25 import org.unicode.cldr.util.XPathParts;
26 
27 import com.ibm.icu.dev.util.CollectionUtilities;
28 import com.ibm.icu.impl.Relation;
29 import com.ibm.icu.impl.Row;
30 import com.ibm.icu.lang.UCharacter;
31 import com.ibm.icu.lang.UProperty;
32 import com.ibm.icu.lang.UScript;
33 import com.ibm.icu.text.UTF16;
34 import com.ibm.icu.text.UnicodeSet;
35 import com.ibm.icu.util.VersionInfo;
36 
37 public class TestScriptMetadata extends TestFmwkPlus {
38     private static final VersionInfo ICU_UNICODE_VERSION = UCharacter.getUnicodeVersion();
39     static CLDRConfig testInfo = CLDRConfig.getInstance();
40 
main(String[] args)41     public static void main(String[] args) {
42         new TestScriptMetadata().run(args);
43     }
44 
TestLookup()45     public void TestLookup() {
46         EnumLookup<IdUsage> temp = EnumLookup.of(IdUsage.class);
47         assertEquals("", IdUsage.LIMITED_USE, temp.forString("limited Use"));
48     }
49 
TestScriptOfSample()50     public void TestScriptOfSample() {
51         BitSet bitset = new BitSet();
52         for (String script : new TreeSet<String>(ScriptMetadata.getScripts())) {
53             Info info0 = ScriptMetadata.getInfo(script);
54             int codePointCount = UTF16.countCodePoint(info0.sampleChar);
55             assertEquals("Sample must be single character", 1, codePointCount);
56             if (ICU_UNICODE_VERSION.compareTo(info0.age) >= 0) {
57                 int scriptCode = UScript.getScriptExtensions(
58                     info0.sampleChar.codePointAt(0), bitset);
59                 assertTrue(script + ": The sample character must have a " +
60                     "single, valid script, no ScriptExtensions: " + scriptCode,
61                     scriptCode >= 0);
62             }
63         }
64     }
65 
TestBasic()66     public void TestBasic() {
67         Info info0 = ScriptMetadata.getInfo(UScript.LATIN);
68         if (ScriptMetadata.errors.size() != 0) {
69             if (ScriptMetadata.errors.size() == 1) {
70                 logln("ScriptMetadata initialization errors\t"
71                     + ScriptMetadata.errors.size() + "\t"
72                     + CollectionUtilities.join(ScriptMetadata.errors, "\n"));
73             } else {
74                 errln("ScriptMetadata initialization errors\t"
75                     + ScriptMetadata.errors.size() + "\t"
76                     + CollectionUtilities.join(ScriptMetadata.errors, "\n"));
77             }
78         }
79 
80         // Latin Latn 2 L European Recommended no no no no
81         assertEquals("Latin-rank", 2, info0.rank);
82         assertEquals("Latin-country", "IT", info0.originCountry);
83         assertEquals("Latin-sample", "L", info0.sampleChar);
84         assertEquals("Latin-id usage", ScriptMetadata.IdUsage.RECOMMENDED,
85             info0.idUsage);
86         assertEquals("Latin-ime?", Trinary.NO, info0.ime);
87         assertEquals("Latin-lb letters?", Trinary.NO, info0.lbLetters);
88         assertEquals("Latin-rtl?", Trinary.NO, info0.rtl);
89         assertEquals("Latin-shaping", Shaping.MIN, info0.shapingReq);
90         assertEquals("Latin-density", 1, info0.density);
91         assertEquals("Latin-Case", Trinary.YES, info0.hasCase);
92 
93         info0 = ScriptMetadata.getInfo(UScript.HEBREW);
94         assertEquals("Arabic-rtl", Trinary.YES, info0.rtl);
95         assertEquals("Arabic-shaping", Shaping.NO, info0.shapingReq);
96         assertEquals("Arabic-Case", Trinary.NO, info0.hasCase);
97     }
98 
99     @SuppressWarnings("deprecation")
TestScripts()100     public void TestScripts() {
101         UnicodeSet temp = new UnicodeSet();
102         Set<String> missingScripts = new TreeSet<String>();
103         Relation<IdUsage, String> map = Relation.of(
104             new EnumMap<IdUsage, Set<String>>(IdUsage.class),
105             LinkedHashSet.class);
106         for (int i = UScript.COMMON; i < UScript.CODE_LIMIT; ++i) {
107             Info info = ScriptMetadata.getInfo(i);
108             if (info != null) {
109                 map.put(info.idUsage,
110                     UScript.getName(i) + "\t(" + UScript.getShortName(i)
111                         + ")\t" + info);
112             } else {
113                 // There are many script codes that are not "real"; there are no
114                 // Unicode characters for them.
115                 // separate those out.
116                 temp.applyIntPropertyValue(UProperty.SCRIPT, i);
117                 if (temp.size() != 0) { // is real
118                     errln("Missing script metadata for " + UScript.getName(i)
119                         + "\t(" + UScript.getShortName(i));
120                 } else { // is not real
121                     missingScripts.add(UScript.getShortName(i));
122                 }
123             }
124         }
125         for (Entry<IdUsage, String> entry : map.keyValueSet()) {
126             logln("Script metadata found for script:" + entry.getValue());
127         }
128         if (!missingScripts.isEmpty()) {
129             logln("No script metadata for the following scripts (no Unicode characters defined): "
130                 + missingScripts.toString());
131         }
132     }
133 
134     // lifted from ShowLanguages
getEnglishTypes(String type, int code, StandardCodes sc, CLDRFile english)135     private static Set<String> getEnglishTypes(String type, int code,
136         StandardCodes sc, CLDRFile english) {
137         Set<String> result = new HashSet<String>(
138             sc.getSurveyToolDisplayCodes(type));
139         XPathParts parts = new XPathParts();
140         for (Iterator<String> it = english.getAvailableIterator(code); it
141             .hasNext();) {
142             parts.set(it.next());
143             String newType = parts.getAttributeValue(-1, "type");
144             if (!result.contains(newType)) {
145                 result.add(newType);
146             }
147         }
148         return result;
149     }
150 
151     // lifted from ShowLanguages
getScriptsToShow(StandardCodes sc, CLDRFile english)152     private static Set<String> getScriptsToShow(StandardCodes sc,
153         CLDRFile english) {
154         return getEnglishTypes("script", CLDRFile.SCRIPT_NAME, sc, english);
155     }
156 
TestShowLanguages()157     public void TestShowLanguages() {
158         // lifted from ShowLanguages - this is what ShowLanguages tried to do.
159         StandardCodes sc = testInfo.getStandardCodes();
160         CLDRFile english = testInfo.getEnglish();
161         Set<String> bads = new TreeSet<String>();
162         UnicodeSet temp = new UnicodeSet();
163         for (String s : getScriptsToShow(sc, english)) {
164             if (ScriptMetadata.getInfo(s) == null) {
165                 // There are many script codes that are not "real"; there are no
166                 // Unicode characters for them.
167                 // separate those out.
168                 temp.applyIntPropertyValue(UProperty.SCRIPT,
169                     UScript.getCodeFromName(s));
170                 if (temp.size() != 0) { // is real
171                     bads.add(s);
172                 }
173             }
174         }
175         if (!bads.isEmpty()) {
176             errln("No metadata for scripts: " + bads.toString());
177         }
178     }
179 
TestGeographicGrouping()180     public void TestGeographicGrouping() {
181         CLDRFile english = testInfo.getEnglish();
182         Set<Row.R3<IdUsage, String, String>> lines = new TreeSet<Row.R3<IdUsage, String, String>>();
183         Set<String> extras = ScriptMetadata.getExtras();
184         for (Entry<String, Info> sc : ScriptMetadata.iterable()) {
185             String scriptCode = sc.getKey();
186             if (extras.contains(scriptCode)) {
187                 continue;
188             }
189             Info info = sc.getValue();
190             String continent = Containment.getContinent(info.originCountry);
191             String container = !continent.equals("142") ? continent
192                 : Containment.getSubcontinent(info.originCountry);
193 
194             lines.add(Row.of(
195                 info.idUsage,
196                 english.getName(CLDRFile.TERRITORY_NAME, continent),
197                 info.idUsage
198                     + "\t"
199                     + english.getName(CLDRFile.TERRITORY_NAME,
200                         container)
201                     + "\t" + scriptCode + "\t"
202                     + english.getName(CLDRFile.SCRIPT_NAME, scriptCode)));
203         }
204         for (Row.R3<IdUsage, String, String> s : lines) {
205             logln(s.get2());
206         }
207     }
208 
TestScriptCategories()209     public void TestScriptCategories() {
210 
211         // test completeness
212         Set<String> scripts = new TreeSet<String>(ScriptMetadata.getScripts());
213         scripts.removeAll(Arrays.asList("Zinh", "Zyyy", "Zzzz"));
214         logln("All: " + scripts);
215         for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) {
216             logln(x + ": " + x.scripts.toString());
217             scripts.removeAll(x.scripts);
218         }
219         assertEquals("Completeness", Collections.EMPTY_SET, scripts);
220 
221         // test no overlap
222         assertEquals("Overlap", Collections.EMPTY_SET, scripts);
223         for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) {
224             for (ScriptMetadata.Groupings y : ScriptMetadata.Groupings.values()) {
225                 if (y == x)
226                     continue;
227                 assertTrue("overlap",
228                     Collections.disjoint(x.scripts, y.scripts));
229             }
230         }
231 
232         // assertEqualsX(Groupings.EUROPEAN, ScriptCategories.OLD_EUROPEAN);
233         // assertEqualsX(Groupings.MIDDLE_EASTERN,
234         // ScriptCategories.OLD_MIDDLE_EASTERN);
235         // assertEqualsX(Groupings.SOUTH_ASIAN,
236         // ScriptCategories.OLD_SOUTH_ASIAN);
237         // assertEqualsX(Groupings.SOUTHEAST_ASIAN,
238         // ScriptCategories.OLD_SOUTHEAST_ASIAN);
239         // assertEqualsX(Groupings.EAST_ASIAN, ScriptCategories.OLD_EAST_ASIAN);
240         // assertEqualsX(Groupings.AFRICAN, ScriptCategories.OLD_AFRICAN);
241         // assertEqualsX(Groupings.AMERICAN, ScriptCategories.OLD_AMERICAN);
242         //
243         // assertEqualsX("Historic: ", ScriptCategories.HISTORIC_SCRIPTS,
244         // ScriptCategories.OLD_HISTORIC_SCRIPTS);
245         //
246     }
247 
248 //    private void assertEqualsX(Groupings aRaw, Set<String> bRaw) {
249 //        assertEqualsX(aRaw.toString(), aRaw.scripts, bRaw);
250 //    }
251 
assertEqualsX(String title, Set<String> a, Set<String> bRaw)252     public void assertEqualsX(String title, Set<String> a, Set<String> bRaw) {
253         TreeSet<String> b = With.in(bRaw).toCollection(
254             ScriptMetadata.TO_SHORT_SCRIPT, new TreeSet<String>());
255 
256         Set<String> a_b = new TreeSet<String>(a);
257         a_b.removeAll(b);
258         Set<String> b_a = new TreeSet<String>(b);
259         b_a.removeAll(a);
260         assertEquals(title + " New vs Old, ", a_b.toString(), b_a.toString());
261     }
262 
263 }
264