1 /**
2 *******************************************************************************
3 * Copyright (C) 1996-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 */
7 
8 package com.ibm.icu.dev.test.lang;
9 
10 import java.util.BitSet;
11 import java.util.Locale;
12 
13 import com.ibm.icu.dev.test.TestFmwk;
14 import com.ibm.icu.lang.UProperty;
15 import com.ibm.icu.lang.UScript;
16 import com.ibm.icu.lang.UScript.ScriptUsage;
17 import com.ibm.icu.text.UnicodeSet;
18 import com.ibm.icu.util.ULocale;
19 
20 public class TestUScript extends TestFmwk {
21 
22     /**
23     * Constructor
24     */
TestUScript()25     public TestUScript()
26     {
27     }
28 
main(String[] args)29     public static void main(String[] args) throws Exception {
30         new TestUScript().run(args);
31     }
32 
scriptsToString(int[] scripts)33     private static String scriptsToString(int[] scripts) {
34         if(scripts == null) {
35             return "null";
36         }
37         StringBuilder sb = new StringBuilder();
38         for(int script : scripts) {
39             if(sb.length() > 0) {
40                 sb.append(' ');
41             }
42             sb.append(UScript.getShortName(script));
43         }
44         return sb.toString();
45     }
46 
assertEqualScripts(String msg, int[] expectedScripts, int[] actualScripts)47     private void assertEqualScripts(String msg, int[] expectedScripts, int[] actualScripts) {
48         assertEquals(msg, scriptsToString(expectedScripts), scriptsToString(actualScripts));
49     }
50 
TestLocaleGetCode()51     public void TestLocaleGetCode(){
52         final ULocale[] testNames={
53         /* test locale */
54         new ULocale("en"), new ULocale("en_US"),
55         new ULocale("sr"), new ULocale("ta") ,
56         new ULocale("te_IN"),
57         new ULocale("hi"),
58         new ULocale("he"), new ULocale("ar"),
59         new ULocale("abcde"),
60         new ULocale("abcde_cdef"),
61         new ULocale("iw")
62         };
63         final int[] expected ={
64                 /* locales should return */
65                 UScript.LATIN, UScript.LATIN,
66                 UScript.CYRILLIC, UScript.TAMIL,
67                 UScript.TELUGU,UScript.DEVANAGARI,
68                 UScript.HEBREW, UScript.ARABIC,
69                 UScript.INVALID_CODE,UScript.INVALID_CODE,
70                 UScript.HEBREW
71         };
72         int i =0;
73         int numErrors =0;
74 
75         for( ; i<testNames.length; i++){
76             int[] code = UScript.getCode(testNames[i]);
77 
78             if(code==null){
79                 if(expected[i]!=UScript.INVALID_CODE){
80                     logln("Error getting script code Got: null" + " Expected: " +expected[i] +" for name "+testNames[i]);
81                     numErrors++;
82                 }
83                 // getCode returns null if the code could not be found
84                 continue;
85             }
86             if((code[0] != expected[i])){
87                 logln("Error getting script code Got: " +code[0] + " Expected: " +expected[i] +" for name "+testNames[i]);
88                 numErrors++;
89             }
90         }
91         reportDataErrors(numErrors);
92 
93         //
94         ULocale defaultLoc = ULocale.getDefault();
95         ULocale esperanto = new ULocale("eo_DE");
96         ULocale.setDefault(esperanto);
97         int[] code = UScript.getCode(esperanto);
98         if(code != null){
99             if( code[0] != UScript.LATIN){
100                 errln("Did not get the expected script code for Esperanto");
101             }
102         }else{
103             warnln("Could not load the locale data.");
104         }
105         ULocale.setDefault(defaultLoc);
106 
107         // Should work regardless of whether we have locale data for the language.
108         assertEqualScripts("tg script: Cyrl",  // Tajik
109                 new int[] { UScript.CYRILLIC },
110                 UScript.getCode(new ULocale("tg")));
111         assertEqualScripts("xsr script: Deva",  // Sherpa
112                 new int[] { UScript.DEVANAGARI },
113                 UScript.getCode(new ULocale("xsr")));
114 
115         // Multi-script languages.
116         assertEqualScripts("ja scripts: Kana Hira Hani",
117                 new int[] { UScript.KATAKANA, UScript.HIRAGANA, UScript.HAN },
118                 UScript.getCode(ULocale.JAPANESE));
119         assertEqualScripts("ko scripts: Hang Hani",
120                 new int[] { UScript.HANGUL, UScript.HAN },
121                 UScript.getCode(ULocale.KOREAN));
122         assertEqualScripts("zh script: Hani",
123                 new int[] { UScript.HAN },
124                 UScript.getCode(ULocale.CHINESE));
125         assertEqualScripts("zh-Hant scripts: Hani Bopo",
126                 new int[] { UScript.HAN, UScript.BOPOMOFO },
127                 UScript.getCode(ULocale.TRADITIONAL_CHINESE));
128         assertEqualScripts("zh-TW scripts: Hani Bopo",
129                 new int[] { UScript.HAN, UScript.BOPOMOFO },
130                 UScript.getCode(ULocale.TAIWAN));
131 
132         // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
133         assertEqualScripts("ro-RO script: Latn",
134                 new int[] { UScript.LATIN },
135                 UScript.getCode("ro-RO"));  // String not ULocale
136     }
137 
reportDataErrors(int numErrors)138     private void reportDataErrors(int numErrors) {
139         if (numErrors >0) {
140             // assume missing locale data, so not an error, just a warning
141             if (isModularBuild() || noData()) {
142                 // if nodata is set don't even warn
143                 warnln("Could not find locale data");
144             } else {
145                 errln("encountered " + numErrors + " errors.");
146             }
147         }
148     }
149 
TestMultipleCode()150     public void TestMultipleCode(){
151         final String[] testNames = { "ja" ,"ko_KR","zh","zh_TW"};
152         final int[][] expected = {
153                                 {UScript.KATAKANA,UScript.HIRAGANA,UScript.HAN},
154                                 {UScript.HANGUL, UScript.HAN},
155                                 {UScript.HAN},
156                                 {UScript.HAN,UScript.BOPOMOFO}
157                               };
158 
159         int numErrors = 0;
160         for(int i=0; i<testNames.length;i++){
161             int[] code = UScript.getCode(testNames[i]);
162             int[] expt = (int[]) expected[i];
163             if(code!=null){
164                 for(int j =0; j< code.length;j++){
165                     if(code[j]!=expt[j]){
166                         numErrors++;
167                         logln("Error getting script code Got: " +code[j] + " Expected: " +expt[j] +" for name "+testNames[i]);
168                     }
169                 }
170             }else{
171                 numErrors++;
172                 logln("Error getting script code for name "+testNames[i]);
173             }
174         }
175         reportDataErrors(numErrors);
176 
177         //cover UScript.getCode(Locale)
178         Locale[] testLocales = new Locale[] {
179             Locale.JAPANESE,
180             Locale.KOREA,
181             Locale.CHINESE,
182             Locale.TAIWAN };
183         logln("Testing UScript.getCode(Locale) ...");
184         numErrors = 0;
185         for(int i=0; i<testNames.length;i++){
186             logln("  Testing locale: " + testLocales[i].getDisplayName());
187             int[] code = UScript.getCode(testLocales[i]);
188             int[] expt = (int[]) expected[i];
189             if(code!=null){
190                 for(int j =0; j< code.length;j++){
191                     if(code[j]!=expt[j]){
192                         numErrors++;
193                         logln("  Error getting script code Got: " +code[j] + " Expected: " +expt[j] +" for name "+testNames[i]);
194                     }
195                 }
196             }else{
197                 numErrors++;
198                 logln("  Error getting script code for name "+testNames[i]);
199             }
200         }
201         reportDataErrors(numErrors);
202     }
203 
TestGetCode()204     public void TestGetCode(){
205 
206         final String[] testNames={
207             /* test locale */
208             "en", "en_US", "sr", "ta", "gu", "te_IN",
209             "hi", "he", "ar",
210             /* test abbr */
211             "Hani", "Hang","Hebr","Hira",
212             "Knda","Kana","Khmr","Lao",
213             "Latn",/*"Latf","Latg",*/
214             "Mlym", "Mong",
215 
216             /* test names */
217             "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
218             "GOTHIC",  "GREEK",  "GUJARATI", "COMMON", "INHERITED",
219             /* test lower case names */
220             "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
221             "oriya",     "runic",     "sinhala", "syriac","tamil",
222             "telugu",    "thaana",    "thai",    "tibetan",
223             /* test the bounds*/
224             "Cans", "arabic","Yi","Zyyy"
225         };
226         final int[] expected ={
227             /* locales should return */
228             UScript.LATIN, UScript.LATIN,
229             UScript.CYRILLIC, UScript.TAMIL, UScript.GUJARATI,
230             UScript.TELUGU,UScript.DEVANAGARI,
231             UScript.HEBREW, UScript.ARABIC,
232             /* abbr should return */
233             UScript.HAN, UScript.HANGUL, UScript.HEBREW, UScript.HIRAGANA,
234             UScript.KANNADA, UScript.KATAKANA, UScript.KHMER, UScript.LAO,
235             UScript.LATIN,/* UScript.LATIN, UScript.LATIN,*/
236             UScript.MALAYALAM, UScript.MONGOLIAN,
237             /* names should return */
238             UScript.CYRILLIC, UScript.DESERET, UScript.DEVANAGARI, UScript.ETHIOPIC, UScript.GEORGIAN,
239             UScript.GOTHIC, UScript.GREEK, UScript.GUJARATI, UScript.COMMON, UScript.INHERITED,
240             /* lower case names should return */
241             UScript.MALAYALAM, UScript.MONGOLIAN, UScript.MYANMAR, UScript.OGHAM, UScript.OLD_ITALIC,
242             UScript.ORIYA, UScript.RUNIC, UScript.SINHALA, UScript.SYRIAC, UScript.TAMIL,
243             UScript.TELUGU, UScript.THAANA, UScript.THAI, UScript.TIBETAN,
244             /* bounds */
245             UScript.CANADIAN_ABORIGINAL, UScript.ARABIC, UScript.YI, UScript.COMMON
246         };
247         int i =0;
248         int numErrors =0;
249 
250         for( ; i<testNames.length; i++){
251             int[] code = UScript.getCode(testNames[i]);
252             if(code == null){
253                 if(expected[i]==UScript.INVALID_CODE){
254                     // getCode returns null if the code could not be found
255                     continue;
256                 }
257                 // currently commented out until jitterbug#2678 is fixed
258                 logln("Error getting script code Got: null" + " Expected: " +expected[i] +" for name "+testNames[i]);
259                 numErrors++;
260                 continue;
261             }
262             if((code[0] != expected[i])){
263                 logln("Error getting script code Got: " +code[0] + " Expected: " +expected[i] +" for name "+testNames[i]);
264                 numErrors++;
265             }
266         }
267         reportDataErrors(numErrors);
268     }
269 
TestGetName()270     public void TestGetName(){
271 
272         final int[] testCodes={
273             /* names should return */
274             UScript.CYRILLIC, UScript.DESERET, UScript.DEVANAGARI, UScript.ETHIOPIC, UScript.GEORGIAN,
275             UScript.GOTHIC, UScript.GREEK, UScript.GUJARATI,
276         };
277 
278         final String[] expectedNames={
279 
280             /* test names */
281             "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
282             "Gothic",  "Greek",  "Gujarati",
283         };
284         int i =0;
285         int numErrors=0;
286         while(i< testCodes.length){
287             String scriptName  = UScript.getName(testCodes[i]);
288             if(!expectedNames[i].equals(scriptName)){
289                 logln("Error getting abbreviations Got: " +scriptName +" Expected: "+expectedNames[i]);
290                 numErrors++;
291             }
292             i++;
293         }
294         if(numErrors >0 ){
295             warnln("encountered " + numErrors + " errors in UScript.getName()");
296         }
297 
298     }
TestGetShortName()299     public void TestGetShortName(){
300         final int[] testCodes={
301             /* abbr should return */
302             UScript.HAN, UScript.HANGUL, UScript.HEBREW, UScript.HIRAGANA,
303             UScript.KANNADA, UScript.KATAKANA, UScript.KHMER, UScript.LAO,
304             UScript.LATIN,
305             UScript.MALAYALAM, UScript.MONGOLIAN,
306         };
307 
308         final String[] expectedAbbr={
309               /* test abbr */
310             "Hani", "Hang","Hebr","Hira",
311             "Knda","Kana","Khmr","Laoo",
312             "Latn",
313             "Mlym", "Mong",
314         };
315         int i=0;
316         int numErrors=0;
317         while(i<testCodes.length){
318             String  shortName = UScript.getShortName(testCodes[i]);
319             if(!expectedAbbr[i].equals(shortName)){
320                 logln("Error getting abbreviations Got: " +shortName+ " Expected: " +expectedAbbr[i]);
321                 numErrors++;
322             }
323             i++;
324         }
325         if(numErrors >0 ){
326             warnln("encountered " + numErrors + " errors in UScript.getShortName()");
327         }
328     }
TestGetScript()329     public void TestGetScript(){
330         int codepoints[][] = new int[][] {
331                 {0x0000FF9D, UScript.KATAKANA },
332                 {0x0000FFBE, UScript.HANGUL },
333                 {0x0000FFC7, UScript.HANGUL },
334                 {0x0000FFCF, UScript.HANGUL },
335                 {0x0000FFD7, UScript.HANGUL},
336                 {0x0000FFDC, UScript.HANGUL},
337                 {0x00010300, UScript.OLD_ITALIC},
338                 {0x00010330, UScript.GOTHIC},
339                 {0x0001034A, UScript.GOTHIC},
340                 {0x00010400, UScript.DESERET},
341                 {0x00010428, UScript.DESERET},
342                 {0x0001D167, UScript.INHERITED},
343                 {0x0001D17B, UScript.INHERITED},
344                 {0x0001D185, UScript.INHERITED},
345                 {0x0001D1AA, UScript.INHERITED},
346                 {0x00020000, UScript.HAN},
347                 {0x00000D02, UScript.MALAYALAM},
348                 {0x00000D00, UScript.UNKNOWN},
349                 {0x00000000, UScript.COMMON},
350                 {0x0001D169, UScript.INHERITED },
351                 {0x0001D182, UScript.INHERITED },
352                 {0x0001D18B, UScript.INHERITED },
353                 {0x0001D1AD, UScript.INHERITED },
354         };
355 
356         int i =0;
357         int code = UScript.INVALID_CODE;
358         boolean passed = true;
359 
360         while(i< codepoints.length){
361             code = UScript.getScript(codepoints[i][0]);
362 
363             if(code != codepoints[i][1]){
364                 logln("UScript.getScript for codepoint 0x"+ hex(codepoints[i][0])+" failed");
365                 passed = false;
366             }
367 
368             i++;
369         }
370         if(!passed){
371            errln("UScript.getScript failed.");
372         }
373     }
374 
TestGetScriptOfCharsWithScriptExtensions()375     public void TestGetScriptOfCharsWithScriptExtensions() {
376         /* test characters which have Script_Extensions */
377         if(!(
378             UScript.COMMON==UScript.getScript(0x0640) &&
379             UScript.INHERITED==UScript.getScript(0x0650) &&
380             UScript.ARABIC==UScript.getScript(0xfdf2))
381         ) {
382             errln("UScript.getScript(character with Script_Extensions) failed");
383         }
384     }
385 
TestHasScript()386     public void TestHasScript() {
387         if(!(
388             !UScript.hasScript(0x063f, UScript.COMMON) &&
389             UScript.hasScript(0x063f, UScript.ARABIC) &&  /* main Script value */
390             !UScript.hasScript(0x063f, UScript.SYRIAC) &&
391             !UScript.hasScript(0x063f, UScript.THAANA))
392         ) {
393             errln("UScript.hasScript(U+063F, ...) is wrong");
394         }
395         if(!(
396             !UScript.hasScript(0x0640, UScript.COMMON) &&  /* main Script value */
397             UScript.hasScript(0x0640, UScript.ARABIC) &&
398             UScript.hasScript(0x0640, UScript.SYRIAC) &&
399             !UScript.hasScript(0x0640, UScript.THAANA))
400         ) {
401             errln("UScript.hasScript(U+0640, ...) is wrong");
402         }
403         if(!(
404             !UScript.hasScript(0x0650, UScript.INHERITED) &&  /* main Script value */
405             UScript.hasScript(0x0650, UScript.ARABIC) &&
406             UScript.hasScript(0x0650, UScript.SYRIAC) &&
407             !UScript.hasScript(0x0650, UScript.THAANA))
408         ) {
409             errln("UScript.hasScript(U+0650, ...) is wrong");
410         }
411         if(!(
412             !UScript.hasScript(0x0660, UScript.COMMON) &&  /* main Script value */
413             UScript.hasScript(0x0660, UScript.ARABIC) &&
414             !UScript.hasScript(0x0660, UScript.SYRIAC) &&
415             UScript.hasScript(0x0660, UScript.THAANA))
416         ) {
417             errln("UScript.hasScript(U+0660, ...) is wrong");
418         }
419         if(!(
420             !UScript.hasScript(0xfdf2, UScript.COMMON) &&
421             UScript.hasScript(0xfdf2, UScript.ARABIC) &&  /* main Script value */
422             !UScript.hasScript(0xfdf2, UScript.SYRIAC) &&
423             UScript.hasScript(0xfdf2, UScript.THAANA))
424         ) {
425             errln("UScript.hasScript(U+FDF2, ...) is wrong");
426         }
427         if(UScript.hasScript(0x0640, 0xaffe)) {
428             // An unguarded implementation might go into an infinite loop.
429             errln("UScript.hasScript(U+0640, bogus 0xaffe) is wrong");
430         }
431     }
432 
TestGetScriptExtensions()433     public void TestGetScriptExtensions() {
434         BitSet scripts=new BitSet(UScript.CODE_LIMIT);
435 
436         /* invalid code points */
437         if(UScript.getScriptExtensions(-1, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
438                 !scripts.get(UScript.UNKNOWN)) {
439             errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}");
440         }
441         if(UScript.getScriptExtensions(0x110000, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
442                 !scripts.get(UScript.UNKNOWN)) {
443             errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}");
444         }
445 
446         /* normal usage */
447         if(UScript.getScriptExtensions(0x063f, scripts)!=UScript.ARABIC || scripts.cardinality()!=1 ||
448                 !scripts.get(UScript.ARABIC)) {
449             errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}");
450         }
451         if(UScript.getScriptExtensions(0x0640, scripts)>-3 || scripts.cardinality()<3 ||
452            !scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC) || !scripts.get(UScript.MANDAIC)
453         ) {
454             errln("UScript.getScriptExtensions(U+0640) failed");
455         }
456         if(UScript.getScriptExtensions(0xfdf2, scripts)!=-2 || scripts.cardinality()!=2 ||
457                 !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
458             errln("UScript.getScriptExtensions(U+FDF2) failed");
459         }
460         if(UScript.getScriptExtensions(0xff65, scripts)!=-6 || scripts.cardinality()!=6 ||
461                 !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
462             errln("UScript.getScriptExtensions(U+FF65) failed");
463         }
464     }
465 
TestScriptMetadataAPI()466     public void TestScriptMetadataAPI() {
467         /* API & code coverage. */
468         String sample = UScript.getSampleString(UScript.LATIN);
469         if(sample.length()!=1 || UScript.getScript(sample.charAt(0))!=UScript.LATIN) {
470             errln("UScript.getSampleString(Latn) failed");
471         }
472         sample = UScript.getSampleString(UScript.INVALID_CODE);
473         if(sample.length()!=0) {
474             errln("UScript.getSampleString(invalid) failed");
475         }
476 
477         if(UScript.getUsage(UScript.LATIN)!=ScriptUsage.RECOMMENDED ||
478                 UScript.getUsage(UScript.YI)!=ScriptUsage.ASPIRATIONAL ||
479                 UScript.getUsage(UScript.CHEROKEE)!=ScriptUsage.LIMITED_USE ||
480                 UScript.getUsage(UScript.COPTIC)!=ScriptUsage.EXCLUDED ||
481                 UScript.getUsage(UScript.CIRTH)!=ScriptUsage.NOT_ENCODED ||
482                 UScript.getUsage(UScript.INVALID_CODE)!=ScriptUsage.NOT_ENCODED ||
483                 UScript.getUsage(UScript.CODE_LIMIT)!=ScriptUsage.NOT_ENCODED) {
484             errln("UScript.getUsage() failed");
485         }
486 
487         if(UScript.isRightToLeft(UScript.LATIN) ||
488                 UScript.isRightToLeft(UScript.CIRTH) ||
489                 !UScript.isRightToLeft(UScript.ARABIC) ||
490                 !UScript.isRightToLeft(UScript.HEBREW)) {
491             errln("UScript.isRightToLeft() failed");
492         }
493 
494         if(UScript.breaksBetweenLetters(UScript.LATIN) ||
495                 UScript.breaksBetweenLetters(UScript.CIRTH) ||
496                 !UScript.breaksBetweenLetters(UScript.HAN) ||
497                 !UScript.breaksBetweenLetters(UScript.THAI)) {
498             errln("UScript.breaksBetweenLetters() failed");
499         }
500 
501         if(UScript.isCased(UScript.CIRTH) ||
502                 UScript.isCased(UScript.HAN) ||
503                 !UScript.isCased(UScript.LATIN) ||
504                 !UScript.isCased(UScript.GREEK)) {
505             errln("UScript.isCased() failed");
506         }
507     }
508 
509     /**
510      * Maps a special script code to the most common script of its encoded characters.
511      */
getCharScript(int script)512     private static final int getCharScript(int script) {
513         switch(script) {
514         case UScript.SIMPLIFIED_HAN:
515         case UScript.TRADITIONAL_HAN:
516             return UScript.HAN;
517         case UScript.JAPANESE:
518             return UScript.HIRAGANA;
519         case UScript.KOREAN:
520             return UScript.HANGUL;
521         default:
522             return script;
523         }
524     }
525 
TestScriptMetadata()526     public void TestScriptMetadata() {
527         UnicodeSet rtl = new UnicodeSet("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]");
528         // So far, sample characters are uppercase.
529         // Georgian is special.
530         UnicodeSet cased = new UnicodeSet("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]");
531         for(int sc = 0; sc < UScript.CODE_LIMIT; ++sc) {
532             String sn = UScript.getShortName(sc);
533             ScriptUsage usage = UScript.getUsage(sc);
534             String sample = UScript.getSampleString(sc);
535             UnicodeSet scriptSet = new UnicodeSet();
536             scriptSet.applyIntPropertyValue(UProperty.SCRIPT, sc);
537             if(usage == ScriptUsage.NOT_ENCODED) {
538                 assertTrue(sn + " not encoded, no sample", sample.length() == 0);  // Java 6: sample.isEmpty()
539                 assertFalse(sn + " not encoded, not RTL", UScript.isRightToLeft(sc));
540                 assertFalse(sn + " not encoded, not LB letters", UScript.breaksBetweenLetters(sc));
541                 assertFalse(sn + " not encoded, not cased", UScript.isCased(sc));
542                 assertTrue(sn + " not encoded, no characters", scriptSet.isEmpty());
543             } else {
544                 assertFalse(sn + " encoded, has a sample character", sample.length() == 0);  // Java 6: sample.isEmpty()
545                 int firstChar = sample.codePointAt(0);
546                 int charScript = getCharScript(sc);
547                 assertEquals(sn + " script(sample(script))",
548                              charScript, UScript.getScript(firstChar));
549                 assertEquals(sn + " RTL vs. set", rtl.contains(firstChar), UScript.isRightToLeft(sc));
550                 assertEquals(sn + " cased vs. set", cased.contains(firstChar), UScript.isCased(sc));
551                 assertEquals(sn + " encoded, has characters", sc == charScript, !scriptSet.isEmpty());
552                 if(UScript.isRightToLeft(sc)) {
553                     rtl.removeAll(scriptSet);
554                 }
555                 if(UScript.isCased(sc)) {
556                     cased.removeAll(scriptSet);
557                 }
558             }
559         }
560         assertEquals("no remaining RTL characters", "[]", rtl.toPattern(true));
561         assertEquals("no remaining cased characters", "[]", cased.toPattern(true));
562 
563         assertTrue("Hani breaks between letters", UScript.breaksBetweenLetters(UScript.HAN));
564         assertTrue("Thai breaks between letters", UScript.breaksBetweenLetters(UScript.THAI));
565         assertFalse("Latn does not break between letters", UScript.breaksBetweenLetters(UScript.LATIN));
566     }
567 
TestScriptNames()568     public void TestScriptNames(){
569         for(int i=0; i<UScript.CODE_LIMIT;i++){
570             String name = UScript.getName(i);
571             if(name.equals("") ){
572                 errln("FAILED: getName for code : "+i);
573             }
574             String shortName= UScript.getShortName(i);
575             if(shortName.equals("")){
576                 errln("FAILED: getName for code : "+i);
577             }
578         }
579     }
TestAllCodepoints()580     public void TestAllCodepoints(){
581         int code;
582         //String oldId="";
583         //String oldAbbrId="";
584         for( int i =0; i <= 0x10ffff; i++){
585           code =UScript.INVALID_CODE;
586           code = UScript.getScript(i);
587           if(code==UScript.INVALID_CODE){
588                 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
589           }
590           String id =UScript.getName(code);
591           if(id.indexOf("INVALID")>=0){
592                  errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
593           }
594           String abbr = UScript.getShortName(code);
595           if(abbr.indexOf("INV")>=0){
596                  errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
597           }
598         }
599     }
TestNewCode()600     public void TestNewCode(){
601         /*
602          * These script codes were originally added to ICU pre-3.6, so that ICU would
603          * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
604          * These script codes were added with only short names because we don't
605          * want to invent long names ourselves.
606          * Unicode 5 and later encode some of these scripts and give them long names.
607          * Whenever this happens, the long script names here need to be updated.
608          */
609         String[] expectedLong = new String[]{
610             "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
611             "Egyd", "Egyh", "Egyptian_Hieroglyphs",
612             "Geok", "Hans", "Hant", "Pahawh_Hmong", "Hung", "Inds",
613             "Javanese", "Kayah_Li", "Latf", "Latg",
614             "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
615             "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
616             "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
617             "Zxxx", "Unknown",
618             "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
619             "Moon", "Meetei_Mayek",
620             /* new in ICU 4.0 */
621             "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
622             "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
623             "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
624             "Zmth", "Zsym",
625             /* new in ICU 4.4 */
626             "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
627             /* new in ICU 4.6 */
628             "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
629             "Loma", "Mende_Kikakui", "Meroitic_Cursive",
630             "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
631             /* new in ICU 4.8 */
632             "Afak", "Jurc", "Mro", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
633             /* new in ICU 49 */
634             "Hluw", "Khojki", "Tirhuta",
635             /* new in ICU 52 */
636             "Caucasian_Albanian", "Mahajani",
637             /* new in ICU 54 */
638             "Ahom", "Hatr", "Modi", "Mult", "Pau_Cin_Hau", "Siddham"
639         };
640         String[] expectedShort = new String[]{
641             "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
642             "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
643             "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
644             "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
645             "Zxxx", "Zzzz",
646             "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
647             "Moon", "Mtei",
648             /* new in ICU 4.0 */
649             "Armi", "Avst", "Cakm", "Kore",
650             "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
651             "Zmth", "Zsym",
652             /* new in ICU 4.4 */
653             "Bamu", "Lisu", "Nkgb", "Sarb",
654             /* new in ICU 4.6 */
655             "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
656             "Narb", "Nbat", "Palm", "Sind", "Wara",
657             /* new in ICU 4.8 */
658             "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
659             /* new in ICU 49 */
660             "Hluw", "Khoj", "Tirh",
661             /* new in ICU 52 */
662             "Aghb", "Mahj",
663             /* new in ICU 54 */
664             "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd"
665         };
666         if(expectedLong.length!=(UScript.CODE_LIMIT-UScript.BALINESE)) {
667             errln("need to add new script codes in lang.TestUScript.java!");
668             return;
669         }
670         int j = 0;
671         int i = 0;
672         for(i=UScript.BALINESE; i<UScript.CODE_LIMIT; i++, j++){
673             String name = UScript.getName(i);
674             if(name==null || !name.equals(expectedLong[j])){
675                 errln("UScript.getName failed for code"+ i + name +"!=" +expectedLong[j]);
676             }
677             name = UScript.getShortName(i);
678             if(name==null || !name.equals(expectedShort[j])){
679                 errln("UScript.getShortName failed for code"+ i + name +"!=" +expectedShort[j]);
680             }
681         }
682         for(i=0; i<expectedLong.length; i++){
683             int[] ret = UScript.getCode(expectedShort[i]);
684             if(ret.length>1){
685                 errln("UScript.getCode did not return expected number of codes for script"+ expectedShort[i]+". EXPECTED: 1 GOT: "+ ret.length);
686             }
687             if(ret[0]!= (UScript.BALINESE+i)){
688                 errln("UScript.getCode did not return expected code for script"+ expectedShort[i]+". EXPECTED: "+ (UScript.BALINESE+i)+" GOT: %i\n"+ ret[0] );
689             }
690         }
691     }
692 }
693