1 /** 2 ******************************************************************************* 3 * Copyright (C) 1996-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8 package com.ibm.icu.dev.test.lang; 9 10 import java.util.BitSet; 11 import java.util.Locale; 12 13 import com.ibm.icu.dev.test.TestFmwk; 14 import com.ibm.icu.lang.UProperty; 15 import com.ibm.icu.lang.UScript; 16 import com.ibm.icu.lang.UScript.ScriptUsage; 17 import com.ibm.icu.text.UnicodeSet; 18 import com.ibm.icu.util.ULocale; 19 20 public class TestUScript extends TestFmwk { 21 22 /** 23 * Constructor 24 */ TestUScript()25 public TestUScript() 26 { 27 } 28 main(String[] args)29 public static void main(String[] args) throws Exception { 30 new TestUScript().run(args); 31 } 32 scriptsToString(int[] scripts)33 private static String scriptsToString(int[] scripts) { 34 if(scripts == null) { 35 return "null"; 36 } 37 StringBuilder sb = new StringBuilder(); 38 for(int script : scripts) { 39 if(sb.length() > 0) { 40 sb.append(' '); 41 } 42 sb.append(UScript.getShortName(script)); 43 } 44 return sb.toString(); 45 } 46 assertEqualScripts(String msg, int[] expectedScripts, int[] actualScripts)47 private void assertEqualScripts(String msg, int[] expectedScripts, int[] actualScripts) { 48 assertEquals(msg, scriptsToString(expectedScripts), scriptsToString(actualScripts)); 49 } 50 TestLocaleGetCode()51 public void TestLocaleGetCode(){ 52 final ULocale[] testNames={ 53 /* test locale */ 54 new ULocale("en"), new ULocale("en_US"), 55 new ULocale("sr"), new ULocale("ta") , 56 new ULocale("te_IN"), 57 new ULocale("hi"), 58 new ULocale("he"), new ULocale("ar"), 59 new ULocale("abcde"), 60 new ULocale("abcde_cdef"), 61 new ULocale("iw") 62 }; 63 final int[] expected ={ 64 /* locales should return */ 65 UScript.LATIN, UScript.LATIN, 66 UScript.CYRILLIC, UScript.TAMIL, 67 UScript.TELUGU,UScript.DEVANAGARI, 68 UScript.HEBREW, UScript.ARABIC, 69 UScript.INVALID_CODE,UScript.INVALID_CODE, 70 UScript.HEBREW 71 }; 72 int i =0; 73 int numErrors =0; 74 75 for( ; i<testNames.length; i++){ 76 int[] code = UScript.getCode(testNames[i]); 77 78 if(code==null){ 79 if(expected[i]!=UScript.INVALID_CODE){ 80 logln("Error getting script code Got: null" + " Expected: " +expected[i] +" for name "+testNames[i]); 81 numErrors++; 82 } 83 // getCode returns null if the code could not be found 84 continue; 85 } 86 if((code[0] != expected[i])){ 87 logln("Error getting script code Got: " +code[0] + " Expected: " +expected[i] +" for name "+testNames[i]); 88 numErrors++; 89 } 90 } 91 reportDataErrors(numErrors); 92 93 // 94 ULocale defaultLoc = ULocale.getDefault(); 95 ULocale esperanto = new ULocale("eo_DE"); 96 ULocale.setDefault(esperanto); 97 int[] code = UScript.getCode(esperanto); 98 if(code != null){ 99 if( code[0] != UScript.LATIN){ 100 errln("Did not get the expected script code for Esperanto"); 101 } 102 }else{ 103 warnln("Could not load the locale data."); 104 } 105 ULocale.setDefault(defaultLoc); 106 107 // Should work regardless of whether we have locale data for the language. 108 assertEqualScripts("tg script: Cyrl", // Tajik 109 new int[] { UScript.CYRILLIC }, 110 UScript.getCode(new ULocale("tg"))); 111 assertEqualScripts("xsr script: Deva", // Sherpa 112 new int[] { UScript.DEVANAGARI }, 113 UScript.getCode(new ULocale("xsr"))); 114 115 // Multi-script languages. 116 assertEqualScripts("ja scripts: Kana Hira Hani", 117 new int[] { UScript.KATAKANA, UScript.HIRAGANA, UScript.HAN }, 118 UScript.getCode(ULocale.JAPANESE)); 119 assertEqualScripts("ko scripts: Hang Hani", 120 new int[] { UScript.HANGUL, UScript.HAN }, 121 UScript.getCode(ULocale.KOREAN)); 122 assertEqualScripts("zh script: Hani", 123 new int[] { UScript.HAN }, 124 UScript.getCode(ULocale.CHINESE)); 125 assertEqualScripts("zh-Hant scripts: Hani Bopo", 126 new int[] { UScript.HAN, UScript.BOPOMOFO }, 127 UScript.getCode(ULocale.TRADITIONAL_CHINESE)); 128 assertEqualScripts("zh-TW scripts: Hani Bopo", 129 new int[] { UScript.HAN, UScript.BOPOMOFO }, 130 UScript.getCode(ULocale.TAIWAN)); 131 132 // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro). 133 assertEqualScripts("ro-RO script: Latn", 134 new int[] { UScript.LATIN }, 135 UScript.getCode("ro-RO")); // String not ULocale 136 } 137 reportDataErrors(int numErrors)138 private void reportDataErrors(int numErrors) { 139 if (numErrors >0) { 140 // assume missing locale data, so not an error, just a warning 141 if (isModularBuild() || noData()) { 142 // if nodata is set don't even warn 143 warnln("Could not find locale data"); 144 } else { 145 errln("encountered " + numErrors + " errors."); 146 } 147 } 148 } 149 TestMultipleCode()150 public void TestMultipleCode(){ 151 final String[] testNames = { "ja" ,"ko_KR","zh","zh_TW"}; 152 final int[][] expected = { 153 {UScript.KATAKANA,UScript.HIRAGANA,UScript.HAN}, 154 {UScript.HANGUL, UScript.HAN}, 155 {UScript.HAN}, 156 {UScript.HAN,UScript.BOPOMOFO} 157 }; 158 159 int numErrors = 0; 160 for(int i=0; i<testNames.length;i++){ 161 int[] code = UScript.getCode(testNames[i]); 162 int[] expt = (int[]) expected[i]; 163 if(code!=null){ 164 for(int j =0; j< code.length;j++){ 165 if(code[j]!=expt[j]){ 166 numErrors++; 167 logln("Error getting script code Got: " +code[j] + " Expected: " +expt[j] +" for name "+testNames[i]); 168 } 169 } 170 }else{ 171 numErrors++; 172 logln("Error getting script code for name "+testNames[i]); 173 } 174 } 175 reportDataErrors(numErrors); 176 177 //cover UScript.getCode(Locale) 178 Locale[] testLocales = new Locale[] { 179 Locale.JAPANESE, 180 Locale.KOREA, 181 Locale.CHINESE, 182 Locale.TAIWAN }; 183 logln("Testing UScript.getCode(Locale) ..."); 184 numErrors = 0; 185 for(int i=0; i<testNames.length;i++){ 186 logln(" Testing locale: " + testLocales[i].getDisplayName()); 187 int[] code = UScript.getCode(testLocales[i]); 188 int[] expt = (int[]) expected[i]; 189 if(code!=null){ 190 for(int j =0; j< code.length;j++){ 191 if(code[j]!=expt[j]){ 192 numErrors++; 193 logln(" Error getting script code Got: " +code[j] + " Expected: " +expt[j] +" for name "+testNames[i]); 194 } 195 } 196 }else{ 197 numErrors++; 198 logln(" Error getting script code for name "+testNames[i]); 199 } 200 } 201 reportDataErrors(numErrors); 202 } 203 TestGetCode()204 public void TestGetCode(){ 205 206 final String[] testNames={ 207 /* test locale */ 208 "en", "en_US", "sr", "ta", "gu", "te_IN", 209 "hi", "he", "ar", 210 /* test abbr */ 211 "Hani", "Hang","Hebr","Hira", 212 "Knda","Kana","Khmr","Lao", 213 "Latn",/*"Latf","Latg",*/ 214 "Mlym", "Mong", 215 216 /* test names */ 217 "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN", 218 "GOTHIC", "GREEK", "GUJARATI", "COMMON", "INHERITED", 219 /* test lower case names */ 220 "malayalam", "mongolian", "myanmar", "ogham", "old-italic", 221 "oriya", "runic", "sinhala", "syriac","tamil", 222 "telugu", "thaana", "thai", "tibetan", 223 /* test the bounds*/ 224 "Cans", "arabic","Yi","Zyyy" 225 }; 226 final int[] expected ={ 227 /* locales should return */ 228 UScript.LATIN, UScript.LATIN, 229 UScript.CYRILLIC, UScript.TAMIL, UScript.GUJARATI, 230 UScript.TELUGU,UScript.DEVANAGARI, 231 UScript.HEBREW, UScript.ARABIC, 232 /* abbr should return */ 233 UScript.HAN, UScript.HANGUL, UScript.HEBREW, UScript.HIRAGANA, 234 UScript.KANNADA, UScript.KATAKANA, UScript.KHMER, UScript.LAO, 235 UScript.LATIN,/* UScript.LATIN, UScript.LATIN,*/ 236 UScript.MALAYALAM, UScript.MONGOLIAN, 237 /* names should return */ 238 UScript.CYRILLIC, UScript.DESERET, UScript.DEVANAGARI, UScript.ETHIOPIC, UScript.GEORGIAN, 239 UScript.GOTHIC, UScript.GREEK, UScript.GUJARATI, UScript.COMMON, UScript.INHERITED, 240 /* lower case names should return */ 241 UScript.MALAYALAM, UScript.MONGOLIAN, UScript.MYANMAR, UScript.OGHAM, UScript.OLD_ITALIC, 242 UScript.ORIYA, UScript.RUNIC, UScript.SINHALA, UScript.SYRIAC, UScript.TAMIL, 243 UScript.TELUGU, UScript.THAANA, UScript.THAI, UScript.TIBETAN, 244 /* bounds */ 245 UScript.CANADIAN_ABORIGINAL, UScript.ARABIC, UScript.YI, UScript.COMMON 246 }; 247 int i =0; 248 int numErrors =0; 249 250 for( ; i<testNames.length; i++){ 251 int[] code = UScript.getCode(testNames[i]); 252 if(code == null){ 253 if(expected[i]==UScript.INVALID_CODE){ 254 // getCode returns null if the code could not be found 255 continue; 256 } 257 // currently commented out until jitterbug#2678 is fixed 258 logln("Error getting script code Got: null" + " Expected: " +expected[i] +" for name "+testNames[i]); 259 numErrors++; 260 continue; 261 } 262 if((code[0] != expected[i])){ 263 logln("Error getting script code Got: " +code[0] + " Expected: " +expected[i] +" for name "+testNames[i]); 264 numErrors++; 265 } 266 } 267 reportDataErrors(numErrors); 268 } 269 TestGetName()270 public void TestGetName(){ 271 272 final int[] testCodes={ 273 /* names should return */ 274 UScript.CYRILLIC, UScript.DESERET, UScript.DEVANAGARI, UScript.ETHIOPIC, UScript.GEORGIAN, 275 UScript.GOTHIC, UScript.GREEK, UScript.GUJARATI, 276 }; 277 278 final String[] expectedNames={ 279 280 /* test names */ 281 "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian", 282 "Gothic", "Greek", "Gujarati", 283 }; 284 int i =0; 285 int numErrors=0; 286 while(i< testCodes.length){ 287 String scriptName = UScript.getName(testCodes[i]); 288 if(!expectedNames[i].equals(scriptName)){ 289 logln("Error getting abbreviations Got: " +scriptName +" Expected: "+expectedNames[i]); 290 numErrors++; 291 } 292 i++; 293 } 294 if(numErrors >0 ){ 295 warnln("encountered " + numErrors + " errors in UScript.getName()"); 296 } 297 298 } TestGetShortName()299 public void TestGetShortName(){ 300 final int[] testCodes={ 301 /* abbr should return */ 302 UScript.HAN, UScript.HANGUL, UScript.HEBREW, UScript.HIRAGANA, 303 UScript.KANNADA, UScript.KATAKANA, UScript.KHMER, UScript.LAO, 304 UScript.LATIN, 305 UScript.MALAYALAM, UScript.MONGOLIAN, 306 }; 307 308 final String[] expectedAbbr={ 309 /* test abbr */ 310 "Hani", "Hang","Hebr","Hira", 311 "Knda","Kana","Khmr","Laoo", 312 "Latn", 313 "Mlym", "Mong", 314 }; 315 int i=0; 316 int numErrors=0; 317 while(i<testCodes.length){ 318 String shortName = UScript.getShortName(testCodes[i]); 319 if(!expectedAbbr[i].equals(shortName)){ 320 logln("Error getting abbreviations Got: " +shortName+ " Expected: " +expectedAbbr[i]); 321 numErrors++; 322 } 323 i++; 324 } 325 if(numErrors >0 ){ 326 warnln("encountered " + numErrors + " errors in UScript.getShortName()"); 327 } 328 } TestGetScript()329 public void TestGetScript(){ 330 int codepoints[][] = new int[][] { 331 {0x0000FF9D, UScript.KATAKANA }, 332 {0x0000FFBE, UScript.HANGUL }, 333 {0x0000FFC7, UScript.HANGUL }, 334 {0x0000FFCF, UScript.HANGUL }, 335 {0x0000FFD7, UScript.HANGUL}, 336 {0x0000FFDC, UScript.HANGUL}, 337 {0x00010300, UScript.OLD_ITALIC}, 338 {0x00010330, UScript.GOTHIC}, 339 {0x0001034A, UScript.GOTHIC}, 340 {0x00010400, UScript.DESERET}, 341 {0x00010428, UScript.DESERET}, 342 {0x0001D167, UScript.INHERITED}, 343 {0x0001D17B, UScript.INHERITED}, 344 {0x0001D185, UScript.INHERITED}, 345 {0x0001D1AA, UScript.INHERITED}, 346 {0x00020000, UScript.HAN}, 347 {0x00000D02, UScript.MALAYALAM}, 348 {0x00000D00, UScript.UNKNOWN}, 349 {0x00000000, UScript.COMMON}, 350 {0x0001D169, UScript.INHERITED }, 351 {0x0001D182, UScript.INHERITED }, 352 {0x0001D18B, UScript.INHERITED }, 353 {0x0001D1AD, UScript.INHERITED }, 354 }; 355 356 int i =0; 357 int code = UScript.INVALID_CODE; 358 boolean passed = true; 359 360 while(i< codepoints.length){ 361 code = UScript.getScript(codepoints[i][0]); 362 363 if(code != codepoints[i][1]){ 364 logln("UScript.getScript for codepoint 0x"+ hex(codepoints[i][0])+" failed"); 365 passed = false; 366 } 367 368 i++; 369 } 370 if(!passed){ 371 errln("UScript.getScript failed."); 372 } 373 } 374 TestGetScriptOfCharsWithScriptExtensions()375 public void TestGetScriptOfCharsWithScriptExtensions() { 376 /* test characters which have Script_Extensions */ 377 if(!( 378 UScript.COMMON==UScript.getScript(0x0640) && 379 UScript.INHERITED==UScript.getScript(0x0650) && 380 UScript.ARABIC==UScript.getScript(0xfdf2)) 381 ) { 382 errln("UScript.getScript(character with Script_Extensions) failed"); 383 } 384 } 385 TestHasScript()386 public void TestHasScript() { 387 if(!( 388 !UScript.hasScript(0x063f, UScript.COMMON) && 389 UScript.hasScript(0x063f, UScript.ARABIC) && /* main Script value */ 390 !UScript.hasScript(0x063f, UScript.SYRIAC) && 391 !UScript.hasScript(0x063f, UScript.THAANA)) 392 ) { 393 errln("UScript.hasScript(U+063F, ...) is wrong"); 394 } 395 if(!( 396 !UScript.hasScript(0x0640, UScript.COMMON) && /* main Script value */ 397 UScript.hasScript(0x0640, UScript.ARABIC) && 398 UScript.hasScript(0x0640, UScript.SYRIAC) && 399 !UScript.hasScript(0x0640, UScript.THAANA)) 400 ) { 401 errln("UScript.hasScript(U+0640, ...) is wrong"); 402 } 403 if(!( 404 !UScript.hasScript(0x0650, UScript.INHERITED) && /* main Script value */ 405 UScript.hasScript(0x0650, UScript.ARABIC) && 406 UScript.hasScript(0x0650, UScript.SYRIAC) && 407 !UScript.hasScript(0x0650, UScript.THAANA)) 408 ) { 409 errln("UScript.hasScript(U+0650, ...) is wrong"); 410 } 411 if(!( 412 !UScript.hasScript(0x0660, UScript.COMMON) && /* main Script value */ 413 UScript.hasScript(0x0660, UScript.ARABIC) && 414 !UScript.hasScript(0x0660, UScript.SYRIAC) && 415 UScript.hasScript(0x0660, UScript.THAANA)) 416 ) { 417 errln("UScript.hasScript(U+0660, ...) is wrong"); 418 } 419 if(!( 420 !UScript.hasScript(0xfdf2, UScript.COMMON) && 421 UScript.hasScript(0xfdf2, UScript.ARABIC) && /* main Script value */ 422 !UScript.hasScript(0xfdf2, UScript.SYRIAC) && 423 UScript.hasScript(0xfdf2, UScript.THAANA)) 424 ) { 425 errln("UScript.hasScript(U+FDF2, ...) is wrong"); 426 } 427 if(UScript.hasScript(0x0640, 0xaffe)) { 428 // An unguarded implementation might go into an infinite loop. 429 errln("UScript.hasScript(U+0640, bogus 0xaffe) is wrong"); 430 } 431 } 432 TestGetScriptExtensions()433 public void TestGetScriptExtensions() { 434 BitSet scripts=new BitSet(UScript.CODE_LIMIT); 435 436 /* invalid code points */ 437 if(UScript.getScriptExtensions(-1, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 || 438 !scripts.get(UScript.UNKNOWN)) { 439 errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}"); 440 } 441 if(UScript.getScriptExtensions(0x110000, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 || 442 !scripts.get(UScript.UNKNOWN)) { 443 errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}"); 444 } 445 446 /* normal usage */ 447 if(UScript.getScriptExtensions(0x063f, scripts)!=UScript.ARABIC || scripts.cardinality()!=1 || 448 !scripts.get(UScript.ARABIC)) { 449 errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}"); 450 } 451 if(UScript.getScriptExtensions(0x0640, scripts)>-3 || scripts.cardinality()<3 || 452 !scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC) || !scripts.get(UScript.MANDAIC) 453 ) { 454 errln("UScript.getScriptExtensions(U+0640) failed"); 455 } 456 if(UScript.getScriptExtensions(0xfdf2, scripts)!=-2 || scripts.cardinality()!=2 || 457 !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) { 458 errln("UScript.getScriptExtensions(U+FDF2) failed"); 459 } 460 if(UScript.getScriptExtensions(0xff65, scripts)!=-6 || scripts.cardinality()!=6 || 461 !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) { 462 errln("UScript.getScriptExtensions(U+FF65) failed"); 463 } 464 } 465 TestScriptMetadataAPI()466 public void TestScriptMetadataAPI() { 467 /* API & code coverage. */ 468 String sample = UScript.getSampleString(UScript.LATIN); 469 if(sample.length()!=1 || UScript.getScript(sample.charAt(0))!=UScript.LATIN) { 470 errln("UScript.getSampleString(Latn) failed"); 471 } 472 sample = UScript.getSampleString(UScript.INVALID_CODE); 473 if(sample.length()!=0) { 474 errln("UScript.getSampleString(invalid) failed"); 475 } 476 477 if(UScript.getUsage(UScript.LATIN)!=ScriptUsage.RECOMMENDED || 478 UScript.getUsage(UScript.YI)!=ScriptUsage.ASPIRATIONAL || 479 UScript.getUsage(UScript.CHEROKEE)!=ScriptUsage.LIMITED_USE || 480 UScript.getUsage(UScript.COPTIC)!=ScriptUsage.EXCLUDED || 481 UScript.getUsage(UScript.CIRTH)!=ScriptUsage.NOT_ENCODED || 482 UScript.getUsage(UScript.INVALID_CODE)!=ScriptUsage.NOT_ENCODED || 483 UScript.getUsage(UScript.CODE_LIMIT)!=ScriptUsage.NOT_ENCODED) { 484 errln("UScript.getUsage() failed"); 485 } 486 487 if(UScript.isRightToLeft(UScript.LATIN) || 488 UScript.isRightToLeft(UScript.CIRTH) || 489 !UScript.isRightToLeft(UScript.ARABIC) || 490 !UScript.isRightToLeft(UScript.HEBREW)) { 491 errln("UScript.isRightToLeft() failed"); 492 } 493 494 if(UScript.breaksBetweenLetters(UScript.LATIN) || 495 UScript.breaksBetweenLetters(UScript.CIRTH) || 496 !UScript.breaksBetweenLetters(UScript.HAN) || 497 !UScript.breaksBetweenLetters(UScript.THAI)) { 498 errln("UScript.breaksBetweenLetters() failed"); 499 } 500 501 if(UScript.isCased(UScript.CIRTH) || 502 UScript.isCased(UScript.HAN) || 503 !UScript.isCased(UScript.LATIN) || 504 !UScript.isCased(UScript.GREEK)) { 505 errln("UScript.isCased() failed"); 506 } 507 } 508 509 /** 510 * Maps a special script code to the most common script of its encoded characters. 511 */ getCharScript(int script)512 private static final int getCharScript(int script) { 513 switch(script) { 514 case UScript.SIMPLIFIED_HAN: 515 case UScript.TRADITIONAL_HAN: 516 return UScript.HAN; 517 case UScript.JAPANESE: 518 return UScript.HIRAGANA; 519 case UScript.KOREAN: 520 return UScript.HANGUL; 521 default: 522 return script; 523 } 524 } 525 TestScriptMetadata()526 public void TestScriptMetadata() { 527 UnicodeSet rtl = new UnicodeSet("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]"); 528 // So far, sample characters are uppercase. 529 // Georgian is special. 530 UnicodeSet cased = new UnicodeSet("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]"); 531 for(int sc = 0; sc < UScript.CODE_LIMIT; ++sc) { 532 String sn = UScript.getShortName(sc); 533 ScriptUsage usage = UScript.getUsage(sc); 534 String sample = UScript.getSampleString(sc); 535 UnicodeSet scriptSet = new UnicodeSet(); 536 scriptSet.applyIntPropertyValue(UProperty.SCRIPT, sc); 537 if(usage == ScriptUsage.NOT_ENCODED) { 538 assertTrue(sn + " not encoded, no sample", sample.length() == 0); // Java 6: sample.isEmpty() 539 assertFalse(sn + " not encoded, not RTL", UScript.isRightToLeft(sc)); 540 assertFalse(sn + " not encoded, not LB letters", UScript.breaksBetweenLetters(sc)); 541 assertFalse(sn + " not encoded, not cased", UScript.isCased(sc)); 542 assertTrue(sn + " not encoded, no characters", scriptSet.isEmpty()); 543 } else { 544 assertFalse(sn + " encoded, has a sample character", sample.length() == 0); // Java 6: sample.isEmpty() 545 int firstChar = sample.codePointAt(0); 546 int charScript = getCharScript(sc); 547 assertEquals(sn + " script(sample(script))", 548 charScript, UScript.getScript(firstChar)); 549 assertEquals(sn + " RTL vs. set", rtl.contains(firstChar), UScript.isRightToLeft(sc)); 550 assertEquals(sn + " cased vs. set", cased.contains(firstChar), UScript.isCased(sc)); 551 assertEquals(sn + " encoded, has characters", sc == charScript, !scriptSet.isEmpty()); 552 if(UScript.isRightToLeft(sc)) { 553 rtl.removeAll(scriptSet); 554 } 555 if(UScript.isCased(sc)) { 556 cased.removeAll(scriptSet); 557 } 558 } 559 } 560 assertEquals("no remaining RTL characters", "[]", rtl.toPattern(true)); 561 assertEquals("no remaining cased characters", "[]", cased.toPattern(true)); 562 563 assertTrue("Hani breaks between letters", UScript.breaksBetweenLetters(UScript.HAN)); 564 assertTrue("Thai breaks between letters", UScript.breaksBetweenLetters(UScript.THAI)); 565 assertFalse("Latn does not break between letters", UScript.breaksBetweenLetters(UScript.LATIN)); 566 } 567 TestScriptNames()568 public void TestScriptNames(){ 569 for(int i=0; i<UScript.CODE_LIMIT;i++){ 570 String name = UScript.getName(i); 571 if(name.equals("") ){ 572 errln("FAILED: getName for code : "+i); 573 } 574 String shortName= UScript.getShortName(i); 575 if(shortName.equals("")){ 576 errln("FAILED: getName for code : "+i); 577 } 578 } 579 } TestAllCodepoints()580 public void TestAllCodepoints(){ 581 int code; 582 //String oldId=""; 583 //String oldAbbrId=""; 584 for( int i =0; i <= 0x10ffff; i++){ 585 code =UScript.INVALID_CODE; 586 code = UScript.getScript(i); 587 if(code==UScript.INVALID_CODE){ 588 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 589 } 590 String id =UScript.getName(code); 591 if(id.indexOf("INVALID")>=0){ 592 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 593 } 594 String abbr = UScript.getShortName(code); 595 if(abbr.indexOf("INV")>=0){ 596 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 597 } 598 } 599 } TestNewCode()600 public void TestNewCode(){ 601 /* 602 * These script codes were originally added to ICU pre-3.6, so that ICU would 603 * have all ISO 15924 script codes. ICU was then based on Unicode 4.1. 604 * These script codes were added with only short names because we don't 605 * want to invent long names ourselves. 606 * Unicode 5 and later encode some of these scripts and give them long names. 607 * Whenever this happens, the long script names here need to be updated. 608 */ 609 String[] expectedLong = new String[]{ 610 "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", 611 "Egyd", "Egyh", "Egyptian_Hieroglyphs", 612 "Geok", "Hans", "Hant", "Pahawh_Hmong", "Hung", "Inds", 613 "Javanese", "Kayah_Li", "Latf", "Latg", 614 "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs", 615 "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician", 616 "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform", 617 "Zxxx", "Unknown", 618 "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese", 619 "Moon", "Meetei_Mayek", 620 /* new in ICU 4.0 */ 621 "Imperial_Aramaic", "Avestan", "Chakma", "Kore", 622 "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv", 623 "Inscriptional_Parthian", "Samaritan", "Tai_Viet", 624 "Zmth", "Zsym", 625 /* new in ICU 4.4 */ 626 "Bamum", "Lisu", "Nkgb", "Old_South_Arabian", 627 /* new in ICU 4.6 */ 628 "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel", 629 "Loma", "Mende_Kikakui", "Meroitic_Cursive", 630 "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi", 631 /* new in ICU 4.8 */ 632 "Afak", "Jurc", "Mro", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole", 633 /* new in ICU 49 */ 634 "Hluw", "Khojki", "Tirhuta", 635 /* new in ICU 52 */ 636 "Caucasian_Albanian", "Mahajani", 637 /* new in ICU 54 */ 638 "Ahom", "Hatr", "Modi", "Mult", "Pau_Cin_Hau", "Siddham" 639 }; 640 String[] expectedShort = new String[]{ 641 "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp", 642 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg", 643 "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx", 644 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux", 645 "Zxxx", "Zzzz", 646 "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund", 647 "Moon", "Mtei", 648 /* new in ICU 4.0 */ 649 "Armi", "Avst", "Cakm", "Kore", 650 "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt", 651 "Zmth", "Zsym", 652 /* new in ICU 4.4 */ 653 "Bamu", "Lisu", "Nkgb", "Sarb", 654 /* new in ICU 4.6 */ 655 "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc", 656 "Narb", "Nbat", "Palm", "Sind", "Wara", 657 /* new in ICU 4.8 */ 658 "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole", 659 /* new in ICU 49 */ 660 "Hluw", "Khoj", "Tirh", 661 /* new in ICU 52 */ 662 "Aghb", "Mahj", 663 /* new in ICU 54 */ 664 "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd" 665 }; 666 if(expectedLong.length!=(UScript.CODE_LIMIT-UScript.BALINESE)) { 667 errln("need to add new script codes in lang.TestUScript.java!"); 668 return; 669 } 670 int j = 0; 671 int i = 0; 672 for(i=UScript.BALINESE; i<UScript.CODE_LIMIT; i++, j++){ 673 String name = UScript.getName(i); 674 if(name==null || !name.equals(expectedLong[j])){ 675 errln("UScript.getName failed for code"+ i + name +"!=" +expectedLong[j]); 676 } 677 name = UScript.getShortName(i); 678 if(name==null || !name.equals(expectedShort[j])){ 679 errln("UScript.getShortName failed for code"+ i + name +"!=" +expectedShort[j]); 680 } 681 } 682 for(i=0; i<expectedLong.length; i++){ 683 int[] ret = UScript.getCode(expectedShort[i]); 684 if(ret.length>1){ 685 errln("UScript.getCode did not return expected number of codes for script"+ expectedShort[i]+". EXPECTED: 1 GOT: "+ ret.length); 686 } 687 if(ret[0]!= (UScript.BALINESE+i)){ 688 errln("UScript.getCode did not return expected code for script"+ expectedShort[i]+". EXPECTED: "+ (UScript.BALINESE+i)+" GOT: %i\n"+ ret[0] ); 689 } 690 } 691 } 692 } 693