1 /** 2 ******************************************************************************* 3 * Copyright (C) 1996-2015, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8 package com.ibm.icu.dev.test.lang; 9 10 import java.io.BufferedReader; 11 import java.io.IOException; 12 import java.util.Arrays; 13 import java.util.Locale; 14 15 import com.ibm.icu.dev.test.TestFmwk; 16 import com.ibm.icu.dev.test.TestUtil; 17 import com.ibm.icu.impl.Norm2AllModes; 18 import com.ibm.icu.impl.Normalizer2Impl; 19 import com.ibm.icu.impl.PatternProps; 20 import com.ibm.icu.impl.UCharacterName; 21 import com.ibm.icu.impl.Utility; 22 import com.ibm.icu.lang.UCharacter; 23 import com.ibm.icu.lang.UCharacterCategory; 24 import com.ibm.icu.lang.UCharacterDirection; 25 import com.ibm.icu.lang.UCharacterEnums; 26 import com.ibm.icu.lang.UProperty; 27 import com.ibm.icu.lang.UScript; 28 import com.ibm.icu.text.Normalizer2; 29 import com.ibm.icu.text.UTF16; 30 import com.ibm.icu.text.UnicodeSet; 31 import com.ibm.icu.text.UnicodeSetIterator; 32 import com.ibm.icu.util.RangeValueIterator; 33 import com.ibm.icu.util.ULocale; 34 import com.ibm.icu.util.ValueIterator; 35 import com.ibm.icu.util.VersionInfo; 36 37 /** 38 * Testing class for UCharacter 39 * Mostly following the test cases for ICU 40 * @author Syn Wee Quek 41 * @since nov 04 2000 42 */ 43 public final class UCharacterTest extends TestFmwk 44 { 45 // private variables ============================================= 46 47 /** 48 * ICU4J data version number 49 */ 50 private final VersionInfo VERSION_ = VersionInfo.getInstance("7.0.0.0"); 51 52 // constructor =================================================== 53 54 /** 55 * Constructor 56 */ UCharacterTest()57 public UCharacterTest() 58 { 59 } 60 61 // public methods ================================================ 62 main(String[] arg)63 public static void main(String[] arg) 64 { 65 try 66 { 67 UCharacterTest test = new UCharacterTest(); 68 test.run(arg); 69 } 70 catch (Exception e) 71 { 72 e.printStackTrace(); 73 } 74 } 75 76 /** 77 * Testing the letter and number determination in UCharacter 78 */ TestLetterNumber()79 public void TestLetterNumber() 80 { 81 for (int i = 0x0041; i < 0x005B; i ++) 82 if (!UCharacter.isLetter(i)) 83 errln("FAIL \\u" + hex(i) + " expected to be a letter"); 84 85 for (int i = 0x0660; i < 0x066A; i ++) 86 if (UCharacter.isLetter(i)) 87 errln("FAIL \\u" + hex(i) + " expected not to be a letter"); 88 89 for (int i = 0x0660; i < 0x066A; i ++) 90 if (!UCharacter.isDigit(i)) 91 errln("FAIL \\u" + hex(i) + " expected to be a digit"); 92 93 for (int i = 0x0041; i < 0x005B; i ++) 94 if (!UCharacter.isLetterOrDigit(i)) 95 errln("FAIL \\u" + hex(i) + " expected not to be a digit"); 96 97 for (int i = 0x0660; i < 0x066A; i ++) 98 if (!UCharacter.isLetterOrDigit(i)) 99 errln("FAIL \\u" + hex(i) + 100 "expected to be either a letter or a digit"); 101 102 /* 103 * The following checks work only starting from Unicode 4.0. 104 * Check the version number here. 105 */ 106 VersionInfo version = UCharacter.getUnicodeVersion(); 107 if(version.getMajor()<4 || version.equals(VersionInfo.getInstance(4, 0, 1))) { 108 return; 109 } 110 111 112 113 /* 114 * Sanity check: 115 * Verify that exactly the digit characters have decimal digit values. 116 * This assumption is used in the implementation of u_digit() 117 * (which checks nt=de) 118 * compared with the parallel java.lang.Character.digit() 119 * (which checks Nd). 120 * 121 * This was not true in Unicode 3.2 and earlier. 122 * Unicode 4.0 fixed discrepancies. 123 * Unicode 4.0.1 re-introduced problems in this area due to an 124 * unintentionally incomplete last-minute change. 125 */ 126 String digitsPattern = "[:Nd:]"; 127 String decimalValuesPattern = "[:Numeric_Type=Decimal:]"; 128 129 UnicodeSet digits, decimalValues; 130 131 digits= new UnicodeSet(digitsPattern); 132 decimalValues=new UnicodeSet(decimalValuesPattern); 133 134 135 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", true); 136 137 138 } 139 140 /** 141 * Tests for space determination in UCharacter 142 */ TestSpaces()143 public void TestSpaces() 144 { 145 int spaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005}; 146 int nonspaces[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0074}; 147 int whitespaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c /* ,0x200b */}; // 0x200b was "Zs" in Unicode 4.0, but it is "Cf" in Unicode 4.1 148 int nonwhitespaces[] = {0x0061, 0x0062, 0x003c, 0x0028, 0x003f, 0x00a0, 0x2007, 0x202f, 0xfefe, 0x200b}; 149 150 int size = spaces.length; 151 for (int i = 0; i < size; i ++) 152 { 153 if (!UCharacter.isSpaceChar(spaces[i])) 154 { 155 errln("FAIL \\u" + hex(spaces[i]) + 156 " expected to be a space character"); 157 break; 158 } 159 160 if (UCharacter.isSpaceChar(nonspaces[i])) 161 { 162 errln("FAIL \\u" + hex(nonspaces[i]) + 163 " expected not to be space character"); 164 break; 165 } 166 167 if (!UCharacter.isWhitespace(whitespaces[i])) 168 { 169 errln("FAIL \\u" + hex(whitespaces[i]) + 170 " expected to be a white space character"); 171 break; 172 } 173 if (UCharacter.isWhitespace(nonwhitespaces[i])) 174 { 175 errln("FAIL \\u" + hex(nonwhitespaces[i]) + 176 " expected not to be a space character"); 177 break; 178 } 179 logln("Ok \\u" + hex(spaces[i]) + " and \\u" + 180 hex(nonspaces[i]) + " and \\u" + hex(whitespaces[i]) + 181 " and \\u" + hex(nonwhitespaces[i])); 182 } 183 184 int patternWhiteSpace[] = {0x9, 0xd, 0x20, 0x85, 185 0x200e, 0x200f, 0x2028, 0x2029}; 186 int nonPatternWhiteSpace[] = {0x8, 0xe, 0x21, 0x86, 0xa0, 0xa1, 187 0x1680, 0x1681, 0x180e, 0x180f, 188 0x1FFF, 0x2000, 0x200a, 0x200b, 189 0x2010, 0x202f, 0x2030, 0x205f, 190 0x2060, 0x3000, 0x3001}; 191 for (int i = 0; i < patternWhiteSpace.length; i ++) { 192 if (!PatternProps.isWhiteSpace(patternWhiteSpace[i])) { 193 errln("\\u" + Utility.hex(patternWhiteSpace[i], 4) 194 + " expected to be a Pattern_White_Space"); 195 } 196 } 197 for (int i = 0; i < nonPatternWhiteSpace.length; i ++) { 198 if (PatternProps.isWhiteSpace(nonPatternWhiteSpace[i])) { 199 errln("\\u" + Utility.hex(nonPatternWhiteSpace[i], 4) 200 + " expected to be a non-Pattern_White_Space"); 201 } 202 } 203 204 // TODO: propose public API for constants like uchar.h's U_GC_*_MASK 205 // (http://bugs.icu-project.org/trac/ticket/7461) 206 int GC_Z_MASK = 207 (1 << UCharacter.SPACE_SEPARATOR) | 208 (1 << UCharacter.LINE_SEPARATOR) | 209 (1 << UCharacter.PARAGRAPH_SEPARATOR); 210 211 // UCharacter.isWhitespace(c) should be the same as Character.isWhitespace(). 212 // This uses logln() because Character.isWhitespace() differs between Java versions, thus 213 // it is not necessarily an error if there is a difference between 214 // particular Java and ICU versions. 215 // However, you need to run tests with -v to see the output. 216 // Also note that, at least as of Unicode 5.2, 217 // there are no supplementary white space characters. 218 for (int c = 0; c <= 0xffff; ++c) { 219 boolean j = Character.isWhitespace(c); 220 boolean i = UCharacter.isWhitespace(c); 221 boolean u = UCharacter.isUWhiteSpace(c); 222 boolean z = (UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK) & 223 GC_Z_MASK) != 0; 224 if (j != i) { 225 logln(String.format( 226 "isWhitespace(U+%04x) difference: JDK %5b ICU %5b Unicode WS %5b Z Separator %5b", 227 c, j, i, u, z)); 228 } else if (j || i || u || z) { 229 logln(String.format( 230 "isWhitespace(U+%04x) FYI: JDK %5b ICU %5b Unicode WS %5b Z Separator %5b", 231 c, j, i, u, z)); 232 } 233 } 234 for (char c = 0; c <= 0xff; ++c) { 235 boolean j = Character.isSpace(c); 236 boolean i = UCharacter.isSpace(c); 237 boolean z = (UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK) & 238 GC_Z_MASK) != 0; 239 if (j != i) { 240 logln(String.format( 241 "isSpace(U+%04x) difference: JDK %5b ICU %5b Z Separator %5b", 242 (int)c, j, i, z)); 243 } else if (j || i || z) { 244 logln(String.format( 245 "isSpace(U+%04x) FYI: JDK %5b ICU %5b Z Separator %5b", 246 (int)c, j, i, z)); 247 } 248 } 249 } 250 251 /** 252 * Test various implementations of Pattern_Syntax & Pattern_White_Space. 253 */ TestPatternProperties()254 public void TestPatternProperties() { 255 UnicodeSet syn_pp = new UnicodeSet(); 256 UnicodeSet syn_prop = new UnicodeSet("[:Pattern_Syntax:]"); 257 UnicodeSet syn_list = new UnicodeSet( 258 "[!-/\\:-@\\[-\\^`\\{-~"+ 259 "\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE\u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7"+ 260 "\u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E\u2190-\u245F\u2500-\u2775"+ 261 "\u2794-\u2BFF\u2E00-\u2E7F\u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]"); 262 UnicodeSet ws_pp = new UnicodeSet(); 263 UnicodeSet ws_prop = new UnicodeSet("[:Pattern_White_Space:]"); 264 UnicodeSet ws_list = new UnicodeSet("[\\u0009-\\u000D\\ \\u0085\\u200E\\u200F\\u2028\\u2029]"); 265 UnicodeSet syn_ws_pp = new UnicodeSet(); 266 UnicodeSet syn_ws_prop = new UnicodeSet(syn_prop).addAll(ws_prop); 267 for(int c=0; c<=0xffff; ++c) { 268 if(PatternProps.isSyntax(c)) { 269 syn_pp.add(c); 270 } 271 if(PatternProps.isWhiteSpace(c)) { 272 ws_pp.add(c); 273 } 274 if(PatternProps.isSyntaxOrWhiteSpace(c)) { 275 syn_ws_pp.add(c); 276 } 277 } 278 compareUSets(syn_pp, syn_prop, 279 "PatternProps.isSyntax()", "[:Pattern_Syntax:]", true); 280 compareUSets(syn_pp, syn_list, 281 "PatternProps.isSyntax()", "[Pattern_Syntax ranges]", true); 282 compareUSets(ws_pp, ws_prop, 283 "PatternProps.isWhiteSpace()", "[:Pattern_White_Space:]", true); 284 compareUSets(ws_pp, ws_list, 285 "PatternProps.isWhiteSpace()", "[Pattern_White_Space ranges]", true); 286 compareUSets(syn_ws_pp, syn_ws_prop, 287 "PatternProps.isSyntaxOrWhiteSpace()", 288 "[[:Pattern_Syntax:][:Pattern_White_Space:]]", true); 289 } 290 291 /** 292 * Tests for defined and undefined characters 293 */ TestDefined()294 public void TestDefined() 295 { 296 int undefined[] = {0xfff1, 0xfff7, 0xfa6e}; 297 int defined[] = {0x523E, 0x004f88, 0x00fffd}; 298 299 int size = undefined.length; 300 for (int i = 0; i < size; i ++) 301 { 302 if (UCharacter.isDefined(undefined[i])) 303 { 304 errln("FAIL \\u" + hex(undefined[i]) + 305 " expected not to be defined"); 306 break; 307 } 308 if (!UCharacter.isDefined(defined[i])) 309 { 310 errln("FAIL \\u" + hex(defined[i]) + " expected defined"); 311 break; 312 } 313 } 314 } 315 316 /** 317 * Tests for base characters and their cellwidth 318 */ TestBase()319 public void TestBase() 320 { 321 int base[] = {0x0061, 0x000031, 0x0003d2}; 322 int nonbase[] = {0x002B, 0x000020, 0x00203B}; 323 int size = base.length; 324 for (int i = 0; i < size; i ++) 325 { 326 if (UCharacter.isBaseForm(nonbase[i])) 327 { 328 errln("FAIL \\u" + hex(nonbase[i]) + 329 " expected not to be a base character"); 330 break; 331 } 332 if (!UCharacter.isBaseForm(base[i])) 333 { 334 errln("FAIL \\u" + hex(base[i]) + 335 " expected to be a base character"); 336 break; 337 } 338 } 339 } 340 341 /** 342 * Tests for digit characters 343 */ TestDigits()344 public void TestDigits() 345 { 346 int digits[] = {0x0030, 0x000662, 0x000F23, 0x000ED5, 0x002160}; 347 348 //special characters not in the properties table 349 int digits2[] = {0x3007, 0x004e00, 0x004e8c, 0x004e09, 0x0056d8, 350 0x004e94, 0x00516d, 0x4e03, 0x00516b, 0x004e5d}; 351 int nondigits[] = {0x0010, 0x000041, 0x000122, 0x0068FE}; 352 353 int digitvalues[] = {0, 2, 3, 5, 1}; 354 int digitvalues2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; 355 356 int size = digits.length; 357 for (int i = 0; i < size; i ++) { 358 if (UCharacter.isDigit(digits[i]) && 359 UCharacter.digit(digits[i]) != digitvalues[i]) 360 { 361 errln("FAIL \\u" + hex(digits[i]) + 362 " expected digit with value " + digitvalues[i]); 363 break; 364 } 365 } 366 size = nondigits.length; 367 for (int i = 0; i < size; i ++) 368 if (UCharacter.isDigit(nondigits[i])) 369 { 370 errln("FAIL \\u" + hex(nondigits[i]) + " expected nondigit"); 371 break; 372 } 373 374 size = digits2.length; 375 for (int i = 0; i < 10; i ++) { 376 if (UCharacter.isDigit(digits2[i]) && 377 UCharacter.digit(digits2[i]) != digitvalues2[i]) 378 { 379 errln("FAIL \\u" + hex(digits2[i]) + 380 " expected digit with value " + digitvalues2[i]); 381 break; 382 } 383 } 384 } 385 386 /** 387 * Tests for numeric characters 388 */ TestNumeric()389 public void TestNumeric() 390 { 391 if (UCharacter.getNumericValue(0x00BC) != -2) { 392 errln("Numeric value of 0x00BC expected to be -2"); 393 } 394 395 for (int i = '0'; i < '9'; i ++) { 396 int n1 = UCharacter.getNumericValue(i); 397 double n2 = UCharacter.getUnicodeNumericValue(i); 398 if (n1 != n2 || n1 != (i - '0')) { 399 errln("Numeric value of " + (char)i + " expected to be " + 400 (i - '0')); 401 } 402 } 403 for (int i = 'A'; i < 'F'; i ++) { 404 int n1 = UCharacter.getNumericValue(i); 405 double n2 = UCharacter.getUnicodeNumericValue(i); 406 if (n2 != UCharacter.NO_NUMERIC_VALUE || n1 != (i - 'A' + 10)) { 407 errln("Numeric value of " + (char)i + " expected to be " + 408 (i - 'A' + 10)); 409 } 410 } 411 for (int i = 0xFF21; i < 0xFF26; i ++) { 412 // testing full wideth latin characters A-F 413 int n1 = UCharacter.getNumericValue(i); 414 double n2 = UCharacter.getUnicodeNumericValue(i); 415 if (n2 != UCharacter.NO_NUMERIC_VALUE || n1 != (i - 0xFF21 + 10)) { 416 errln("Numeric value of " + (char)i + " expected to be " + 417 (i - 0xFF21 + 10)); 418 } 419 } 420 // testing han numbers 421 int han[] = {0x96f6, 0, 0x58f9, 1, 0x8cb3, 2, 0x53c3, 3, 422 0x8086, 4, 0x4f0d, 5, 0x9678, 6, 0x67d2, 7, 423 0x634c, 8, 0x7396, 9, 0x5341, 10, 0x62fe, 10, 424 0x767e, 100, 0x4f70, 100, 0x5343, 1000, 0x4edf, 1000, 425 0x824c, 10000, 0x5104, 100000000}; 426 for (int i = 0; i < han.length; i += 2) { 427 if (UCharacter.getHanNumericValue(han[i]) != han[i + 1]) { 428 errln("Numeric value of \\u" + 429 Integer.toHexString(han[i]) + " expected to be " + 430 han[i + 1]); 431 } 432 } 433 } 434 435 /** 436 * Tests for version 437 */ TestVersion()438 public void TestVersion() 439 { 440 if (!UCharacter.getUnicodeVersion().equals(VERSION_)) 441 errln("FAIL expected: " + VERSION_ + " got: " + UCharacter.getUnicodeVersion()); 442 } 443 444 /** 445 * Tests for control characters 446 */ TestISOControl()447 public void TestISOControl() 448 { 449 int control[] = {0x001b, 0x000097, 0x000082}; 450 int noncontrol[] = {0x61, 0x000031, 0x0000e2}; 451 452 int size = control.length; 453 for (int i = 0; i < size; i ++) 454 { 455 if (!UCharacter.isISOControl(control[i])) 456 { 457 errln("FAIL 0x" + Integer.toHexString(control[i]) + 458 " expected to be a control character"); 459 break; 460 } 461 if (UCharacter.isISOControl(noncontrol[i])) 462 { 463 errln("FAIL 0x" + Integer.toHexString(noncontrol[i]) + 464 " expected to be not a control character"); 465 break; 466 } 467 468 logln("Ok 0x" + Integer.toHexString(control[i]) + " and 0x" + 469 Integer.toHexString(noncontrol[i])); 470 } 471 } 472 473 /** 474 * Test Supplementary 475 */ TestSupplementary()476 public void TestSupplementary() 477 { 478 for (int i = 0; i < 0x10000; i ++) { 479 if (UCharacter.isSupplementary(i)) { 480 errln("Codepoint \\u" + Integer.toHexString(i) + 481 " is not supplementary"); 482 } 483 } 484 for (int i = 0x10000; i < 0x10FFFF; i ++) { 485 if (!UCharacter.isSupplementary(i)) { 486 errln("Codepoint \\u" + Integer.toHexString(i) + 487 " is supplementary"); 488 } 489 } 490 } 491 492 /** 493 * Test mirroring 494 */ TestMirror()495 public void TestMirror() 496 { 497 if (!(UCharacter.isMirrored(0x28) && UCharacter.isMirrored(0xbb) && 498 UCharacter.isMirrored(0x2045) && UCharacter.isMirrored(0x232a) 499 && !UCharacter.isMirrored(0x27) && 500 !UCharacter.isMirrored(0x61) && !UCharacter.isMirrored(0x284) 501 && !UCharacter.isMirrored(0x3400))) { 502 errln("isMirrored() does not work correctly"); 503 } 504 505 if (!(UCharacter.getMirror(0x3c) == 0x3e && 506 UCharacter.getMirror(0x5d) == 0x5b && 507 UCharacter.getMirror(0x208d) == 0x208e && 508 UCharacter.getMirror(0x3017) == 0x3016 && 509 510 UCharacter.getMirror(0xbb) == 0xab && 511 UCharacter.getMirror(0x2215) == 0x29F5 && 512 UCharacter.getMirror(0x29F5) == 0x2215 && /* large delta between the code points */ 513 514 UCharacter.getMirror(0x2e) == 0x2e && 515 UCharacter.getMirror(0x6f3) == 0x6f3 && 516 UCharacter.getMirror(0x301c) == 0x301c && 517 UCharacter.getMirror(0xa4ab) == 0xa4ab && 518 519 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ 520 UCharacter.getMirror(0x2018) == 0x2018 && 521 UCharacter.getMirror(0x201b) == 0x201b && 522 UCharacter.getMirror(0x301d) == 0x301d)) { 523 errln("getMirror() does not work correctly"); 524 } 525 526 /* verify that Bidi_Mirroring_Glyph roundtrips */ 527 UnicodeSet set=new UnicodeSet("[:Bidi_Mirrored:]"); 528 UnicodeSetIterator iter=new UnicodeSetIterator(set); 529 int start, end, c2, c3; 530 while(iter.nextRange() && (start=iter.codepoint)>=0) { 531 end=iter.codepointEnd; 532 do { 533 c2=UCharacter.getMirror(start); 534 c3=UCharacter.getMirror(c2); 535 if(c3!=start) { 536 errln("getMirror() does not roundtrip: U+"+hex(start)+"->U+"+hex(c2)+"->U+"+hex(c3)); 537 } 538 c3=UCharacter.getBidiPairedBracket(start); 539 if(UCharacter.getIntPropertyValue(start, UProperty.BIDI_PAIRED_BRACKET_TYPE)==UCharacter.BidiPairedBracketType.NONE) { 540 if(c3!=start) { 541 errln("u_getBidiPairedBracket(U+"+hex(start)+") != self for bpt(c)==None"); 542 } 543 } else { 544 if(c3!=c2) { 545 errln("u_getBidiPairedBracket(U+"+hex(start)+") != U+"+hex(c2)+" = bmg(c)'"); 546 } 547 } 548 } while(++start<=end); 549 } 550 551 // verify that Unicode Corrigendum #6 reverts mirrored status of the following 552 if (UCharacter.isMirrored(0x2018) || 553 UCharacter.isMirrored(0x201d) || 554 UCharacter.isMirrored(0x201f) || 555 UCharacter.isMirrored(0x301e)) { 556 errln("Unicode Corrigendum #6 conflict, one or more of 2018/201d/201f/301e has mirrored property"); 557 } 558 } 559 560 /** 561 * Tests for printable characters 562 */ TestPrint()563 public void TestPrint() 564 { 565 int printable[] = {0x0042, 0x00005f, 0x002014}; 566 int nonprintable[] = {0x200c, 0x00009f, 0x00001b}; 567 568 int size = printable.length; 569 for (int i = 0; i < size; i ++) 570 { 571 if (!UCharacter.isPrintable(printable[i])) 572 { 573 errln("FAIL \\u" + hex(printable[i]) + 574 " expected to be a printable character"); 575 break; 576 } 577 if (UCharacter.isPrintable(nonprintable[i])) 578 { 579 errln("FAIL \\u" + hex(nonprintable[i]) + 580 " expected not to be a printable character"); 581 break; 582 } 583 logln("Ok \\u" + hex(printable[i]) + " and \\u" + 584 hex(nonprintable[i])); 585 } 586 587 // test all ISO 8 controls 588 for (int ch = 0; ch <= 0x9f; ++ ch) { 589 if (ch == 0x20) { 590 // skip ASCII graphic characters and continue with DEL 591 ch = 0x7f; 592 } 593 if (UCharacter.isPrintable(ch)) { 594 errln("Fail \\u" + hex(ch) + 595 " is a ISO 8 control character hence not printable\n"); 596 } 597 } 598 599 /* test all Latin-1 graphic characters */ 600 for (int ch = 0x20; ch <= 0xff; ++ ch) { 601 if (ch == 0x7f) { 602 ch = 0xa0; 603 } 604 if (!UCharacter.isPrintable(ch) 605 && ch != 0x00AD/* Unicode 4.0 changed the defintion of soft hyphen to be a Cf*/) { 606 errln("Fail \\u" + hex(ch) + 607 " is a Latin-1 graphic character\n"); 608 } 609 } 610 } 611 612 /** 613 * Testing for identifier characters 614 */ TestIdentifier()615 public void TestIdentifier() 616 { 617 int unicodeidstart[] = {0x0250, 0x0000e2, 0x000061}; 618 int nonunicodeidstart[] = {0x2000, 0x00000a, 0x002019}; 619 int unicodeidpart[] = {0x005f, 0x000032, 0x000045}; 620 int nonunicodeidpart[] = {0x2030, 0x0000a3, 0x000020}; 621 int idignore[] = {0x0006, 0x0010, 0x206b}; 622 int nonidignore[] = {0x0075, 0x0000a3, 0x000061}; 623 624 int size = unicodeidstart.length; 625 for (int i = 0; i < size; i ++) 626 { 627 if (!UCharacter.isUnicodeIdentifierStart(unicodeidstart[i])) 628 { 629 errln("FAIL \\u" + hex(unicodeidstart[i]) + 630 " expected to be a unicode identifier start character"); 631 break; 632 } 633 if (UCharacter.isUnicodeIdentifierStart(nonunicodeidstart[i])) 634 { 635 errln("FAIL \\u" + hex(nonunicodeidstart[i]) + 636 " expected not to be a unicode identifier start " + 637 "character"); 638 break; 639 } 640 if (!UCharacter.isUnicodeIdentifierPart(unicodeidpart[i])) 641 { 642 errln("FAIL \\u" + hex(unicodeidpart[i]) + 643 " expected to be a unicode identifier part character"); 644 break; 645 } 646 if (UCharacter.isUnicodeIdentifierPart(nonunicodeidpart[i])) 647 { 648 errln("FAIL \\u" + hex(nonunicodeidpart[i]) + 649 " expected not to be a unicode identifier part " + 650 "character"); 651 break; 652 } 653 if (!UCharacter.isIdentifierIgnorable(idignore[i])) 654 { 655 errln("FAIL \\u" + hex(idignore[i]) + 656 " expected to be a ignorable unicode character"); 657 break; 658 } 659 if (UCharacter.isIdentifierIgnorable(nonidignore[i])) 660 { 661 errln("FAIL \\u" + hex(nonidignore[i]) + 662 " expected not to be a ignorable unicode character"); 663 break; 664 } 665 logln("Ok \\u" + hex(unicodeidstart[i]) + " and \\u" + 666 hex(nonunicodeidstart[i]) + " and \\u" + 667 hex(unicodeidpart[i]) + " and \\u" + 668 hex(nonunicodeidpart[i]) + " and \\u" + 669 hex(idignore[i]) + " and \\u" + hex(nonidignore[i])); 670 } 671 } 672 673 /** 674 * Tests for the character types, direction.<br> 675 * This method reads in UnicodeData.txt file for testing purposes. A 676 * default path is provided relative to the src path, however the user 677 * could set a system property to change the directory path.<br> 678 * e.g. java -DUnicodeData="data_directory_path" 679 * com.ibm.icu.dev.test.lang.UCharacterTest 680 */ TestUnicodeData()681 public void TestUnicodeData() 682 { 683 // this is the 2 char category types used in the UnicodeData file 684 final String TYPE = 685 "LuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCfCoCsPdPsPePcPoSmScSkSoPiPf"; 686 687 // directorionality types used in the UnicodeData file 688 // padded by spaces to make each type size 4 689 final String DIR = 690 "L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI "; 691 692 Normalizer2 nfc = Normalizer2.getNFCInstance(); 693 Normalizer2 nfkc = Normalizer2.getNFKCInstance(); 694 695 BufferedReader input = null; 696 try { 697 input = TestUtil.getDataReader("unicode/UnicodeData.txt"); 698 int numErrors = 0; 699 700 for (;;) { 701 String s = input.readLine(); 702 if(s == null) { 703 break; 704 } 705 if(s.length()<4 || s.startsWith("#")) { 706 continue; 707 } 708 String[] fields = s.split(";", -1); 709 assert (fields.length == 15 ) : "Number of fields is " + fields.length + ": " + s; 710 711 int ch = Integer.parseInt(fields[0], 16); 712 713 // testing the general category 714 int type = TYPE.indexOf(fields[2]); 715 if (type < 0) 716 type = 0; 717 else 718 type = (type >> 1) + 1; 719 if (UCharacter.getType(ch) != type) 720 { 721 errln("FAIL \\u" + hex(ch) + " expected type " + type); 722 break; 723 } 724 725 if (UCharacter.getIntPropertyValue(ch, 726 UProperty.GENERAL_CATEGORY_MASK) != (1 << type)) { 727 errln("error: getIntPropertyValue(\\u" + 728 Integer.toHexString(ch) + 729 ", UProperty.GENERAL_CATEGORY_MASK) != " + 730 "getMask(getType(ch))"); 731 } 732 733 // testing combining class 734 int cc = Integer.parseInt(fields[3]); 735 if (UCharacter.getCombiningClass(ch) != cc) 736 { 737 errln("FAIL \\u" + hex(ch) + " expected combining " + 738 "class " + cc); 739 break; 740 } 741 if (nfkc.getCombiningClass(ch) != cc) 742 { 743 errln("FAIL \\u" + hex(ch) + " expected NFKC combining " + 744 "class " + cc); 745 break; 746 } 747 748 // testing the direction 749 String d = fields[4]; 750 if (d.length() == 1) 751 d = d + " "; 752 753 int dir = DIR.indexOf(d) >> 2; 754 if (UCharacter.getDirection(ch) != dir) 755 { 756 errln("FAIL \\u" + hex(ch) + 757 " expected direction " + dir + " but got " + UCharacter.getDirection(ch)); 758 break; 759 } 760 761 byte bdir = (byte)dir; 762 if (UCharacter.getDirectionality(ch) != bdir) 763 { 764 errln("FAIL \\u" + hex(ch) + 765 " expected directionality " + bdir + " but got " + 766 UCharacter.getDirectionality(ch)); 767 break; 768 } 769 770 /* get Decomposition_Type & Decomposition_Mapping, field 5 */ 771 int dt; 772 if(fields[5].length()==0) { 773 /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */ 774 if(ch==0xac00 || ch==0xd7a3) { 775 dt=UCharacter.DecompositionType.CANONICAL; 776 } else { 777 dt=UCharacter.DecompositionType.NONE; 778 } 779 } else { 780 d=fields[5]; 781 dt=-1; 782 if(d.charAt(0)=='<') { 783 int end=d.indexOf('>', 1); 784 if(end>=0) { 785 dt=UCharacter.getPropertyValueEnum(UProperty.DECOMPOSITION_TYPE, d.substring(1, end)); 786 while(d.charAt(++end)==' ') {} // skip spaces 787 d=d.substring(end); 788 } 789 } else { 790 dt=UCharacter.DecompositionType.CANONICAL; 791 } 792 } 793 String dm; 794 if(dt>UCharacter.DecompositionType.NONE) { 795 if(ch==0xac00) { 796 dm="\u1100\u1161"; 797 } else if(ch==0xd7a3) { 798 dm="\ud788\u11c2"; 799 } else { 800 String[] dmChars=d.split(" +"); 801 StringBuilder dmb=new StringBuilder(dmChars.length); 802 for(String dmc : dmChars) { 803 dmb.appendCodePoint(Integer.parseInt(dmc, 16)); 804 } 805 dm=dmb.toString(); 806 } 807 } else { 808 dm=null; 809 } 810 if(dt<0) { 811 errln(String.format("error in UnicodeData.txt: syntax error in U+%04lX decomposition field", ch)); 812 return; 813 } 814 int i=UCharacter.getIntPropertyValue(ch, UProperty.DECOMPOSITION_TYPE); 815 assertEquals( 816 String.format("error: UCharacter.getIntPropertyValue(U+%04x, UProperty.DECOMPOSITION_TYPE) is wrong", ch), 817 dt, i); 818 /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */ 819 String mapping=nfkc.getRawDecomposition(ch); 820 assertEquals( 821 String.format("error: nfkc.getRawDecomposition(U+%04x) is wrong", ch), 822 dm, mapping); 823 /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */ 824 if(dt!=UCharacter.DecompositionType.CANONICAL) { 825 dm=null; 826 } 827 mapping=nfc.getRawDecomposition(ch); 828 assertEquals( 829 String.format("error: nfc.getRawDecomposition(U+%04x) is wrong", ch), 830 dm, mapping); 831 /* recompose */ 832 if(dt==UCharacter.DecompositionType.CANONICAL 833 && !UCharacter.hasBinaryProperty(ch, UProperty.FULL_COMPOSITION_EXCLUSION)) { 834 int a=dm.codePointAt(0); 835 int b=dm.codePointBefore(dm.length()); 836 int composite=nfc.composePair(a, b); 837 assertEquals( 838 String.format( 839 "error: nfc U+%04X decomposes to U+%04X+U+%04X "+ 840 "but does not compose back (instead U+%04X)", 841 ch, a, b, composite), 842 ch, composite); 843 /* 844 * Note: NFKC has fewer round-trip mappings than NFC, 845 * so we can't just test nfkc.composePair(a, b) here without further data. 846 */ 847 } 848 849 // testing iso comment 850 try{ 851 String isocomment = fields[11]; 852 String comment = UCharacter.getISOComment(ch); 853 if (comment == null) { 854 comment = ""; 855 } 856 if (!comment.equals(isocomment)) { 857 errln("FAIL \\u" + hex(ch) + 858 " expected iso comment " + isocomment); 859 break; 860 } 861 }catch(Exception e){ 862 if(e.getMessage().indexOf("unames.icu") >= 0){ 863 numErrors++; 864 }else{ 865 throw e; 866 } 867 } 868 869 String upper = fields[12]; 870 int tempchar = ch; 871 if (upper.length() > 0) { 872 tempchar = Integer.parseInt(upper, 16); 873 } 874 int resultCp = UCharacter.toUpperCase(ch); 875 if (resultCp != tempchar) { 876 errln("FAIL \\u" + Utility.hex(ch, 4) 877 + " expected uppercase \\u" 878 + Utility.hex(tempchar, 4) 879 + " but got \\u" 880 + Utility.hex(resultCp, 4)); 881 break; 882 } 883 884 String lower = fields[13]; 885 tempchar = ch; 886 if (lower.length() > 0) { 887 tempchar = Integer.parseInt(lower, 16); 888 } 889 if (UCharacter.toLowerCase(ch) != tempchar) { 890 errln("FAIL \\u" + Utility.hex(ch, 4) 891 + " expected lowercase \\u" 892 + Utility.hex(tempchar, 4)); 893 break; 894 } 895 896 897 898 String title = fields[14]; 899 tempchar = ch; 900 if (title.length() > 0) { 901 tempchar = Integer.parseInt(title, 16); 902 } 903 if (UCharacter.toTitleCase(ch) != tempchar) { 904 errln("FAIL \\u" + Utility.hex(ch, 4) 905 + " expected titlecase \\u" 906 + Utility.hex(tempchar, 4)); 907 break; 908 } 909 } 910 if(numErrors > 0){ 911 warnln("Could not find unames.icu"); 912 } 913 } catch (Exception e) { 914 e.printStackTrace(); 915 } finally { 916 if (input != null) { 917 try { 918 input.close(); 919 } catch (IOException ignored) { 920 } 921 } 922 } 923 924 if (UCharacter.UnicodeBlock.of(0x0041) 925 != UCharacter.UnicodeBlock.BASIC_LATIN 926 || UCharacter.getIntPropertyValue(0x41, UProperty.BLOCK) 927 != UCharacter.UnicodeBlock.BASIC_LATIN.getID()) { 928 errln("UCharacter.UnicodeBlock.of(\\u0041) property failed! " 929 + "Expected : " 930 + UCharacter.UnicodeBlock.BASIC_LATIN.getID() + " got " 931 + UCharacter.UnicodeBlock.of(0x0041)); 932 } 933 934 // sanity check on repeated properties 935 for (int ch = 0xfffe; ch <= 0x10ffff;) { 936 int type = UCharacter.getType(ch); 937 if (UCharacter.getIntPropertyValue(ch, 938 UProperty.GENERAL_CATEGORY_MASK) 939 != (1 << type)) { 940 errln("error: UCharacter.getIntPropertyValue(\\u" 941 + Integer.toHexString(ch) 942 + ", UProperty.GENERAL_CATEGORY_MASK) != " 943 + "getMask(getType())"); 944 } 945 if (type != UCharacterCategory.UNASSIGNED) { 946 errln("error: UCharacter.getType(\\u" + Utility.hex(ch, 4) 947 + " != UCharacterCategory.UNASSIGNED (returns " 948 + UCharacterCategory.toString(UCharacter.getType(ch)) 949 + ")"); 950 } 951 if ((ch & 0xffff) == 0xfffe) { 952 ++ ch; 953 } 954 else { 955 ch += 0xffff; 956 } 957 } 958 959 // test that PUA is not "unassigned" 960 for(int ch = 0xe000; ch <= 0x10fffd;) { 961 int type = UCharacter.getType(ch); 962 if (UCharacter.getIntPropertyValue(ch, 963 UProperty.GENERAL_CATEGORY_MASK) 964 != (1 << type)) { 965 errln("error: UCharacter.getIntPropertyValue(\\u" 966 + Integer.toHexString(ch) 967 + ", UProperty.GENERAL_CATEGORY_MASK) != " 968 + "getMask(getType())"); 969 } 970 971 if (type == UCharacterCategory.UNASSIGNED) { 972 errln("error: UCharacter.getType(\\u" 973 + Utility.hex(ch, 4) 974 + ") == UCharacterCategory.UNASSIGNED"); 975 } 976 else if (type != UCharacterCategory.PRIVATE_USE) { 977 logln("PUA override: UCharacter.getType(\\u" 978 + Utility.hex(ch, 4) + ")=" + type); 979 } 980 if (ch == 0xf8ff) { 981 ch = 0xf0000; 982 } 983 else if (ch == 0xffffd) { 984 ch = 0x100000; 985 } 986 else { 987 ++ ch; 988 } 989 } 990 } 991 992 993 /** 994 * Test for the character names 995 */ TestNames()996 public void TestNames() 997 { 998 try{ 999 int length = UCharacterName.INSTANCE.getMaxCharNameLength(); 1000 if (length < 83) { // Unicode 3.2 max char name length 1001 errln("getMaxCharNameLength()=" + length + " is too short"); 1002 } 1003 1004 int c[] = {0x0061, //LATIN SMALL LETTER A 1005 0x000284, //LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK 1006 0x003401, //CJK UNIFIED IDEOGRAPH-3401 1007 0x007fed, //CJK UNIFIED IDEOGRAPH-7FED 1008 0x00ac00, //HANGUL SYLLABLE GA 1009 0x00d7a3, //HANGUL SYLLABLE HIH 1010 0x00d800, 0x00dc00, //LINEAR B SYLLABLE B008 A 1011 0xff08, //FULLWIDTH LEFT PARENTHESIS 1012 0x00ffe5, //FULLWIDTH YEN SIGN 1013 0x00ffff, //null 1014 0x0023456 //CJK UNIFIED IDEOGRAPH-23456 1015 }; 1016 String name[] = { 1017 "LATIN SMALL LETTER A", 1018 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", 1019 "CJK UNIFIED IDEOGRAPH-3401", 1020 "CJK UNIFIED IDEOGRAPH-7FED", 1021 "HANGUL SYLLABLE GA", 1022 "HANGUL SYLLABLE HIH", 1023 "", 1024 "", 1025 "FULLWIDTH LEFT PARENTHESIS", 1026 "FULLWIDTH YEN SIGN", 1027 "", 1028 "CJK UNIFIED IDEOGRAPH-23456" 1029 }; 1030 String oldname[] = {"", "", "", 1031 "", 1032 "", "", "", "", "", "", 1033 "", ""}; 1034 String extendedname[] = {"LATIN SMALL LETTER A", 1035 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", 1036 "CJK UNIFIED IDEOGRAPH-3401", 1037 "CJK UNIFIED IDEOGRAPH-7FED", 1038 "HANGUL SYLLABLE GA", 1039 "HANGUL SYLLABLE HIH", 1040 "<lead surrogate-D800>", 1041 "<trail surrogate-DC00>", 1042 "FULLWIDTH LEFT PARENTHESIS", 1043 "FULLWIDTH YEN SIGN", 1044 "<noncharacter-FFFF>", 1045 "CJK UNIFIED IDEOGRAPH-23456"}; 1046 1047 int size = c.length; 1048 String str; 1049 int uc; 1050 1051 for (int i = 0; i < size; i ++) 1052 { 1053 // modern Unicode character name 1054 str = UCharacter.getName(c[i]); 1055 if ((str == null && name[i].length() > 0) || 1056 (str != null && !str.equals(name[i]))) 1057 { 1058 errln("FAIL \\u" + hex(c[i]) + " expected name " + 1059 name[i]); 1060 break; 1061 } 1062 1063 // 1.0 Unicode character name 1064 str = UCharacter.getName1_0(c[i]); 1065 if ((str == null && oldname[i].length() > 0) || 1066 (str != null && !str.equals(oldname[i]))) 1067 { 1068 errln("FAIL \\u" + hex(c[i]) + " expected 1.0 name " + 1069 oldname[i]); 1070 break; 1071 } 1072 1073 // extended character name 1074 str = UCharacter.getExtendedName(c[i]); 1075 if (str == null || !str.equals(extendedname[i])) 1076 { 1077 errln("FAIL \\u" + hex(c[i]) + " expected extended name " + 1078 extendedname[i]); 1079 break; 1080 } 1081 1082 // retrieving unicode character from modern name 1083 uc = UCharacter.getCharFromName(name[i]); 1084 if (uc != c[i] && name[i].length() != 0) 1085 { 1086 errln("FAIL " + name[i] + " expected character \\u" + 1087 hex(c[i])); 1088 break; 1089 } 1090 1091 //retrieving unicode character from 1.0 name 1092 uc = UCharacter.getCharFromName1_0(oldname[i]); 1093 if (uc != c[i] && oldname[i].length() != 0) 1094 { 1095 errln("FAIL " + oldname[i] + " expected 1.0 character \\u" + 1096 hex(c[i])); 1097 break; 1098 } 1099 1100 //retrieving unicode character from 1.0 name 1101 uc = UCharacter.getCharFromExtendedName(extendedname[i]); 1102 if (uc != c[i] && i != 0 && (i == 1 || i == 6)) 1103 { 1104 errln("FAIL " + extendedname[i] + 1105 " expected extended character \\u" + hex(c[i])); 1106 break; 1107 } 1108 } 1109 1110 // test getName works with mixed-case names (new in 2.0) 1111 if (0x61 != UCharacter.getCharFromName("LATin smALl letTER A")) { 1112 errln("FAIL: 'LATin smALl letTER A' should result in character " 1113 + "U+0061"); 1114 } 1115 1116 if (getInclusion() >= 5) { 1117 // extra testing different from icu 1118 for (int i = UCharacter.MIN_VALUE; i < UCharacter.MAX_VALUE; i ++) 1119 { 1120 str = UCharacter.getName(i); 1121 if (str != null && UCharacter.getCharFromName(str) != i) 1122 { 1123 errln("FAIL \\u" + hex(i) + " " + str + 1124 " retrieval of name and vice versa" ); 1125 break; 1126 } 1127 } 1128 } 1129 1130 // Test getCharNameCharacters 1131 if (getInclusion() >= 10) { 1132 boolean map[] = new boolean[256]; 1133 1134 UnicodeSet set = new UnicodeSet(1, 0); // empty set 1135 UnicodeSet dumb = new UnicodeSet(1, 0); // empty set 1136 1137 // uprv_getCharNameCharacters() will likely return more lowercase 1138 // letters than actual character names contain because 1139 // it includes all the characters in lowercased names of 1140 // general categories, for the full possible set of extended names. 1141 UCharacterName.INSTANCE.getCharNameCharacters(set); 1142 1143 // build set the dumb (but sure-fire) way 1144 Arrays.fill(map, false); 1145 1146 int maxLength = 0; 1147 for (int cp = 0; cp < 0x110000; ++ cp) { 1148 String n = UCharacter.getExtendedName(cp); 1149 int len = n.length(); 1150 if (len > maxLength) { 1151 maxLength = len; 1152 } 1153 1154 for (int i = 0; i < len; ++ i) { 1155 char ch = n.charAt(i); 1156 if (!map[ch & 0xff]) { 1157 dumb.add(ch); 1158 map[ch & 0xff] = true; 1159 } 1160 } 1161 } 1162 1163 length = UCharacterName.INSTANCE.getMaxCharNameLength(); 1164 if (length != maxLength) { 1165 errln("getMaxCharNameLength()=" + length 1166 + " differs from the maximum length " + maxLength 1167 + " of all extended names"); 1168 } 1169 1170 // compare the sets. Where is my uset_equals?!! 1171 boolean ok = true; 1172 for (int i = 0; i < 256; ++ i) { 1173 if (set.contains(i) != dumb.contains(i)) { 1174 if (0x61 <= i && i <= 0x7a // a-z 1175 && set.contains(i) && !dumb.contains(i)) { 1176 // ignore lowercase a-z that are in set but not in dumb 1177 ok = true; 1178 } 1179 else { 1180 ok = false; 1181 break; 1182 } 1183 } 1184 } 1185 1186 String pattern1 = set.toPattern(true); 1187 String pattern2 = dumb.toPattern(true); 1188 1189 if (!ok) { 1190 errln("FAIL: getCharNameCharacters() returned " + pattern1 1191 + " expected " + pattern2 1192 + " (too many lowercase a-z are ok)"); 1193 } else { 1194 logln("Ok: getCharNameCharacters() returned " + pattern1); 1195 } 1196 } 1197 // improve code coverage 1198 String expected = "LATIN SMALL LETTER A|LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK|"+ 1199 "CJK UNIFIED IDEOGRAPH-3401|CJK UNIFIED IDEOGRAPH-7FED|HANGUL SYLLABLE GA|"+ 1200 "HANGUL SYLLABLE HIH|LINEAR B SYLLABLE B008 A|FULLWIDTH LEFT PARENTHESIS|"+ 1201 "FULLWIDTH YEN SIGN|"+ 1202 "null|"+ // getName returns null because 0xFFFF does not have a name, but has an extended name! 1203 "CJK UNIFIED IDEOGRAPH-23456"; 1204 String separator= "|"; 1205 String source = Utility.valueOf(c); 1206 String result = UCharacter.getName(source, separator); 1207 if(!result.equals(expected)){ 1208 errln("UCharacter.getName did not return the expected result.\n\t Expected: "+ expected+"\n\t Got: "+ result); 1209 } 1210 1211 }catch(IllegalArgumentException e){ 1212 if(e.getMessage().indexOf("unames.icu") >= 0){ 1213 warnln("Could not find unames.icu"); 1214 }else{ 1215 throw e; 1216 } 1217 } 1218 1219 } 1220 TestUCharFromNameUnderflow()1221 public void TestUCharFromNameUnderflow() { 1222 // Ticket #10889: Underflow crash when there is no dash. 1223 int c = UCharacter.getCharFromExtendedName("<NO BREAK SPACE>"); 1224 if(c >= 0) { 1225 errln("UCharacter.getCharFromExtendedName(<NO BREAK SPACE>) = U+" + hex(c) + 1226 " but should fail (-1)"); 1227 } 1228 1229 // Test related edge cases. 1230 c = UCharacter.getCharFromExtendedName("<-00a0>"); 1231 if(c >= 0) { 1232 errln("UCharacter.getCharFromExtendedName(<-00a0>) = U+" + hex(c) + 1233 " but should fail (-1)"); 1234 } 1235 1236 c = UCharacter.getCharFromExtendedName("<control->"); 1237 if(c >= 0) { 1238 errln("UCharacter.getCharFromExtendedName(<control->) = U+" + hex(c) + 1239 " but should fail (-1)"); 1240 } 1241 1242 c = UCharacter.getCharFromExtendedName("<control-111111>"); 1243 if(c >= 0) { 1244 errln("UCharacter.getCharFromExtendedName(<control-111111>) = U+" + hex(c) + 1245 " but should fail (-1)"); 1246 } 1247 } 1248 1249 /** 1250 * Testing name iteration 1251 */ TestNameIteration()1252 public void TestNameIteration()throws Exception 1253 { 1254 try { 1255 ValueIterator iterator = UCharacter.getExtendedNameIterator(); 1256 ValueIterator.Element element = new ValueIterator.Element(); 1257 ValueIterator.Element old = new ValueIterator.Element(); 1258 // testing subrange 1259 iterator.setRange(-10, -5); 1260 if (iterator.next(element)) { 1261 errln("Fail, expected iterator to return false when range is set outside the meaningful range"); 1262 } 1263 iterator.setRange(0x110000, 0x111111); 1264 if (iterator.next(element)) { 1265 errln("Fail, expected iterator to return false when range is set outside the meaningful range"); 1266 } 1267 try { 1268 iterator.setRange(50, 10); 1269 errln("Fail, expected exception when encountered invalid range"); 1270 } catch (Exception e) { 1271 } 1272 1273 iterator.setRange(-10, 10); 1274 if (!iterator.next(element) || element.integer != 0) { 1275 errln("Fail, expected iterator to return 0 when range start limit is set outside the meaningful range"); 1276 } 1277 1278 iterator.setRange(0x10FFFE, 0x200000); 1279 int last = 0; 1280 while (iterator.next(element)) { 1281 last = element.integer; 1282 } 1283 if (last != 0x10FFFF) { 1284 errln("Fail, expected iterator to return 0x10FFFF when range end limit is set outside the meaningful range"); 1285 } 1286 1287 iterator = UCharacter.getNameIterator(); 1288 iterator.setRange(0xF, 0x45); 1289 while (iterator.next(element)) { 1290 if (element.integer <= old.integer) { 1291 errln("FAIL next returned a less codepoint \\u" + 1292 Integer.toHexString(element.integer) + " than \\u" + 1293 Integer.toHexString(old.integer)); 1294 break; 1295 } 1296 if (!UCharacter.getName(element.integer).equals(element.value)) 1297 { 1298 errln("FAIL next codepoint \\u" + 1299 Integer.toHexString(element.integer) + 1300 " does not have the expected name " + 1301 UCharacter.getName(element.integer) + 1302 " instead have the name " + (String)element.value); 1303 break; 1304 } 1305 old.integer = element.integer; 1306 } 1307 1308 iterator.reset(); 1309 iterator.next(element); 1310 if (element.integer != 0x20) { 1311 errln("FAIL reset in iterator"); 1312 } 1313 1314 iterator.setRange(0, 0x110000); 1315 old.integer = 0; 1316 while (iterator.next(element)) { 1317 if (element.integer != 0 && element.integer <= old.integer) { 1318 errln("FAIL next returned a less codepoint \\u" + 1319 Integer.toHexString(element.integer) + " than \\u" + 1320 Integer.toHexString(old.integer)); 1321 break; 1322 } 1323 if (!UCharacter.getName(element.integer).equals(element.value)) 1324 { 1325 errln("FAIL next codepoint \\u" + 1326 Integer.toHexString(element.integer) + 1327 " does not have the expected name " + 1328 UCharacter.getName(element.integer) + 1329 " instead have the name " + (String)element.value); 1330 break; 1331 } 1332 for (int i = old.integer + 1; i < element.integer; i ++) { 1333 if (UCharacter.getName(i) != null) { 1334 errln("FAIL between codepoints are not null \\u" + 1335 Integer.toHexString(old.integer) + " and " + 1336 Integer.toHexString(element.integer) + " has " + 1337 Integer.toHexString(i) + " with a name " + 1338 UCharacter.getName(i)); 1339 break; 1340 } 1341 } 1342 old.integer = element.integer; 1343 } 1344 1345 iterator = UCharacter.getExtendedNameIterator(); 1346 old.integer = 0; 1347 while (iterator.next(element)) { 1348 if (element.integer != 0 && element.integer != old.integer) { 1349 errln("FAIL next returned a codepoint \\u" + 1350 Integer.toHexString(element.integer) + 1351 " different from \\u" + 1352 Integer.toHexString(old.integer)); 1353 break; 1354 } 1355 if (!UCharacter.getExtendedName(element.integer).equals( 1356 element.value)) { 1357 errln("FAIL next codepoint \\u" + 1358 Integer.toHexString(element.integer) + 1359 " name should be " 1360 + UCharacter.getExtendedName(element.integer) + 1361 " instead of " + (String)element.value); 1362 break; 1363 } 1364 old.integer++; 1365 } 1366 iterator = UCharacter.getName1_0Iterator(); 1367 old.integer = 0; 1368 while (iterator.next(element)) { 1369 logln(Integer.toHexString(element.integer) + " " + 1370 (String)element.value); 1371 if (element.integer != 0 && element.integer <= old.integer) { 1372 errln("FAIL next returned a less codepoint \\u" + 1373 Integer.toHexString(element.integer) + " than \\u" + 1374 Integer.toHexString(old.integer)); 1375 break; 1376 } 1377 if (!element.value.equals(UCharacter.getName1_0( 1378 element.integer))) { 1379 errln("FAIL next codepoint \\u" + 1380 Integer.toHexString(element.integer) + 1381 " name cannot be null"); 1382 break; 1383 } 1384 for (int i = old.integer + 1; i < element.integer; i ++) { 1385 if (UCharacter.getName1_0(i) != null) { 1386 errln("FAIL between codepoints are not null \\u" + 1387 Integer.toHexString(old.integer) + " and " + 1388 Integer.toHexString(element.integer) + " has " + 1389 Integer.toHexString(i) + " with a name " + 1390 UCharacter.getName1_0(i)); 1391 break; 1392 } 1393 } 1394 old.integer = element.integer; 1395 } 1396 } catch(Exception e){ 1397 // !!! wouldn't preflighting be simpler? This looks like 1398 // it is effectively be doing that. It seems that for every 1399 // true error the code will call errln, which will throw the error, which 1400 // this will catch, which this will then rethrow the error. Just seems 1401 // cumbersome. 1402 if(e.getMessage().indexOf("unames.icu") >= 0){ 1403 warnln("Could not find unames.icu"); 1404 } else { 1405 errln(e.getMessage()); 1406 } 1407 } 1408 } 1409 1410 /** 1411 * Testing the for illegal characters 1412 */ TestIsLegal()1413 public void TestIsLegal() 1414 { 1415 int illegal[] = {0xFFFE, 0x00FFFF, 0x005FFFE, 0x005FFFF, 0x0010FFFE, 1416 0x0010FFFF, 0x110000, 0x00FDD0, 0x00FDDF, 0x00FDE0, 1417 0x00FDEF, 0xD800, 0xDC00, -1}; 1418 int legal[] = {0x61, 0x00FFFD, 0x0010000, 0x005FFFD, 0x0060000, 1419 0x0010FFFD, 0xFDCF, 0x00FDF0}; 1420 for (int count = 0; count < illegal.length; count ++) { 1421 if (UCharacter.isLegal(illegal[count])) { 1422 errln("FAIL \\u" + hex(illegal[count]) + 1423 " is not a legal character"); 1424 } 1425 } 1426 1427 for (int count = 0; count < legal.length; count ++) { 1428 if (!UCharacter.isLegal(legal[count])) { 1429 errln("FAIL \\u" + hex(legal[count]) + 1430 " is a legal character"); 1431 } 1432 } 1433 1434 String illegalStr = "This is an illegal string "; 1435 String legalStr = "This is a legal string "; 1436 1437 for (int count = 0; count < illegal.length; count ++) { 1438 StringBuffer str = new StringBuffer(illegalStr); 1439 if (illegal[count] < 0x10000) { 1440 str.append((char)illegal[count]); 1441 } 1442 else { 1443 char lead = UTF16.getLeadSurrogate(illegal[count]); 1444 char trail = UTF16.getTrailSurrogate(illegal[count]); 1445 str.append(lead); 1446 str.append(trail); 1447 } 1448 if (UCharacter.isLegal(str.toString())) { 1449 errln("FAIL " + hex(str.toString()) + 1450 " is not a legal string"); 1451 } 1452 } 1453 1454 for (int count = 0; count < legal.length; count ++) { 1455 StringBuffer str = new StringBuffer(legalStr); 1456 if (legal[count] < 0x10000) { 1457 str.append((char)legal[count]); 1458 } 1459 else { 1460 char lead = UTF16.getLeadSurrogate(legal[count]); 1461 char trail = UTF16.getTrailSurrogate(legal[count]); 1462 str.append(lead); 1463 str.append(trail); 1464 } 1465 if (!UCharacter.isLegal(str.toString())) { 1466 errln("FAIL " + hex(str.toString()) + " is a legal string"); 1467 } 1468 } 1469 } 1470 1471 /** 1472 * Test getCodePoint 1473 */ TestCodePoint()1474 public void TestCodePoint() 1475 { 1476 int ch = 0x10000; 1477 for (char i = 0xD800; i < 0xDC00; i ++) { 1478 for (char j = 0xDC00; j <= 0xDFFF; j ++) { 1479 if (UCharacter.getCodePoint(i, j) != ch) { 1480 errln("Error getting codepoint for surrogate " + 1481 "characters \\u" 1482 + Integer.toHexString(i) + " \\u" + 1483 Integer.toHexString(j)); 1484 } 1485 ch ++; 1486 } 1487 } 1488 try 1489 { 1490 UCharacter.getCodePoint((char)0xD7ff, (char)0xDC00); 1491 errln("Invalid surrogate characters should not form a " + 1492 "supplementary"); 1493 } catch(Exception e) { 1494 } 1495 for (char i = 0; i < 0xFFFF; i++) { 1496 if (i == 0xFFFE || 1497 (i >= 0xD800 && i <= 0xDFFF) || 1498 (i >= 0xFDD0 && i <= 0xFDEF)) { 1499 // not a character 1500 try { 1501 UCharacter.getCodePoint(i); 1502 errln("Not a character is not a valid codepoint"); 1503 } catch (Exception e) { 1504 } 1505 } 1506 else { 1507 if (UCharacter.getCodePoint(i) != i) { 1508 errln("A valid codepoint should return itself"); 1509 } 1510 } 1511 } 1512 } 1513 1514 /** 1515 * This method is a little different from the type test in icu4c. 1516 * But combined with testUnicodeData, they basically do the same thing. 1517 */ TestIteration()1518 public void TestIteration() 1519 { 1520 int limit = 0; 1521 int prevtype = -1; 1522 int shouldBeDir; 1523 int test[][]={{0x41, UCharacterCategory.UPPERCASE_LETTER}, 1524 {0x308, UCharacterCategory.NON_SPACING_MARK}, 1525 {0xfffe, UCharacterCategory.GENERAL_OTHER_TYPES}, 1526 {0xe0041, UCharacterCategory.FORMAT}, 1527 {0xeffff, UCharacterCategory.UNASSIGNED}}; 1528 1529 // default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header 1530 int defaultBidi[][]={ 1531 { 0x0590, UCharacterDirection.LEFT_TO_RIGHT }, 1532 { 0x0600, UCharacterDirection.RIGHT_TO_LEFT }, 1533 { 0x07C0, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1534 { 0x08A0, UCharacterDirection.RIGHT_TO_LEFT }, 1535 { 0x0900, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */ 1536 { 0x20A0, UCharacterDirection.LEFT_TO_RIGHT }, 1537 { 0x20D0, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR }, /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */ 1538 { 0xFB1D, UCharacterDirection.LEFT_TO_RIGHT }, 1539 { 0xFB50, UCharacterDirection.RIGHT_TO_LEFT }, 1540 { 0xFE00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1541 { 0xFE70, UCharacterDirection.LEFT_TO_RIGHT }, 1542 { 0xFF00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1543 { 0x10800, UCharacterDirection.LEFT_TO_RIGHT }, 1544 { 0x11000, UCharacterDirection.RIGHT_TO_LEFT }, 1545 { 0x1E800, UCharacterDirection.LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */ 1546 { 0x1EE00, UCharacterDirection.RIGHT_TO_LEFT }, 1547 { 0x1EF00, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */ 1548 { 0x1F000, UCharacterDirection.RIGHT_TO_LEFT }, 1549 { 0x110000, UCharacterDirection.LEFT_TO_RIGHT } 1550 }; 1551 1552 RangeValueIterator iterator = UCharacter.getTypeIterator(); 1553 RangeValueIterator.Element result = new RangeValueIterator.Element(); 1554 while (iterator.next(result)) { 1555 if (result.start != limit) { 1556 errln("UCharacterIteration failed: Ranges not continuous " + 1557 "0x" + Integer.toHexString(result.start)); 1558 } 1559 1560 limit = result.limit; 1561 if (result.value == prevtype) { 1562 errln("Type of the next set of enumeration should be different"); 1563 } 1564 prevtype = result.value; 1565 1566 for (int i = result.start; i < limit; i ++) { 1567 int temptype = UCharacter.getType(i); 1568 if (temptype != result.value) { 1569 errln("UCharacterIteration failed: Codepoint \\u" + 1570 Integer.toHexString(i) + " should be of type " + 1571 temptype + " not " + result.value); 1572 } 1573 } 1574 1575 for (int i = 0; i < test.length; ++ i) { 1576 if (result.start <= test[i][0] && test[i][0] < result.limit) { 1577 if (result.value != test[i][1]) { 1578 errln("error: getTypes() has range [" 1579 + Integer.toHexString(result.start) + ", " 1580 + Integer.toHexString(result.limit) 1581 + "] with type " + result.value 1582 + " instead of [" 1583 + Integer.toHexString(test[i][0]) + ", " 1584 + Integer.toHexString(test[i][1])); 1585 } 1586 } 1587 } 1588 1589 // LineBreak.txt specifies: 1590 // # - Assigned characters that are not listed explicitly are given the value 1591 // # "AL". 1592 // # - Unassigned characters are given the value "XX". 1593 // 1594 // PUA characters are listed explicitly with "XX". 1595 // Verify that no assigned character has "XX". 1596 if (result.value != UCharacterCategory.UNASSIGNED 1597 && result.value != UCharacterCategory.PRIVATE_USE) { 1598 int c = result.start; 1599 while (c < result.limit) { 1600 if (0 == UCharacter.getIntPropertyValue(c, 1601 UProperty.LINE_BREAK)) { 1602 logln("error UProperty.LINE_BREAK(assigned \\u" 1603 + Utility.hex(c, 4) + ")=XX"); 1604 } 1605 ++ c; 1606 } 1607 } 1608 1609 /* 1610 * Verify default Bidi classes. 1611 * For recent Unicode versions, see UCD.html. 1612 * 1613 * For older Unicode versions: 1614 * See table 3-7 "Bidirectional Character Types" in UAX #9. 1615 * http://www.unicode.org/reports/tr9/ 1616 * 1617 * See also DerivedBidiClass.txt for Cn code points! 1618 * 1619 * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html) 1620 * changed some default values. 1621 * In particular, non-characters and unassigned Default Ignorable Code Points 1622 * change from L to BN. 1623 * 1624 * UCD.html version 4.0.1 does not yet reflect these changes. 1625 */ 1626 if (result.value == UCharacterCategory.UNASSIGNED 1627 || result.value == UCharacterCategory.PRIVATE_USE) { 1628 int c = result.start; 1629 for (int i = 0; i < defaultBidi.length && c < result.limit; 1630 ++ i) { 1631 if (c < defaultBidi[i][0]) { 1632 while (c < result.limit && c < defaultBidi[i][0]) { 1633 // TODO change to public UCharacter.isNonCharacter(c) once it's available 1634 if(com.ibm.icu.impl.UCharacterUtility.isNonCharacter(c) || UCharacter.hasBinaryProperty(c, UProperty.DEFAULT_IGNORABLE_CODE_POINT)) { 1635 shouldBeDir=UCharacter.BOUNDARY_NEUTRAL; 1636 } else { 1637 shouldBeDir=defaultBidi[i][1]; 1638 } 1639 1640 if (UCharacter.getDirection(c) != shouldBeDir 1641 || UCharacter.getIntPropertyValue(c, 1642 UProperty.BIDI_CLASS) 1643 != shouldBeDir) { 1644 errln("error: getDirection(unassigned/PUA " 1645 + Integer.toHexString(c) 1646 + ") should be " 1647 + shouldBeDir); 1648 } 1649 ++ c; 1650 } 1651 } 1652 } 1653 } 1654 } 1655 1656 iterator.reset(); 1657 if (iterator.next(result) == false || result.start != 0) { 1658 System.out.println("result " + result.start); 1659 errln("UCharacterIteration reset() failed"); 1660 } 1661 } 1662 1663 /** 1664 * Testing getAge 1665 */ TestGetAge()1666 public void TestGetAge() 1667 { 1668 int ages[] = {0x41, 1, 1, 0, 0, 1669 0xffff, 1, 1, 0, 0, 1670 0x20ab, 2, 0, 0, 0, 1671 0x2fffe, 2, 0, 0, 0, 1672 0x20ac, 2, 1, 0, 0, 1673 0xfb1d, 3, 0, 0, 0, 1674 0x3f4, 3, 1, 0, 0, 1675 0x10300, 3, 1, 0, 0, 1676 0x220, 3, 2, 0, 0, 1677 0xff60, 3, 2, 0, 0}; 1678 for (int i = 0; i < ages.length; i += 5) { 1679 VersionInfo age = UCharacter.getAge(ages[i]); 1680 if (age != VersionInfo.getInstance(ages[i + 1], ages[i + 2], 1681 ages[i + 3], ages[i + 4])) { 1682 errln("error: getAge(\\u" + Integer.toHexString(ages[i]) + 1683 ") == " + age.toString() + " instead of " + 1684 ages[i + 1] + "." + ages[i + 2] + "." + ages[i + 3] + 1685 "." + ages[i + 4]); 1686 } 1687 } 1688 1689 int[] valid_tests = { 1690 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1, 1691 UCharacter.MAX_VALUE-1, UCharacter.MAX_VALUE}; 1692 int[] invalid_tests = { 1693 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 1694 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 1695 1696 for(int i=0; i< valid_tests.length; i++){ 1697 try{ 1698 UCharacter.getAge(valid_tests[i]); 1699 } catch(Exception e){ 1700 errln("UCharacter.getAge(int) was not suppose to have " + 1701 "an exception. Value passed: " + valid_tests[i]); 1702 } 1703 } 1704 1705 for(int i=0; i< invalid_tests.length; i++){ 1706 try{ 1707 UCharacter.getAge(invalid_tests[i]); 1708 errln("UCharacter.getAge(int) was suppose to have " + 1709 "an exception. Value passed: " + invalid_tests[i]); 1710 } catch(Exception e){ 1711 } 1712 } 1713 } 1714 1715 /** 1716 * Test binary non core properties 1717 */ TestAdditionalProperties()1718 public void TestAdditionalProperties() 1719 { 1720 // test data for hasBinaryProperty() 1721 int props[][] = { // code point, property 1722 { 0x0627, UProperty.ALPHABETIC, 1 }, 1723 { 0x1034a, UProperty.ALPHABETIC, 1 }, 1724 { 0x2028, UProperty.ALPHABETIC, 0 }, 1725 1726 { 0x0066, UProperty.ASCII_HEX_DIGIT, 1 }, 1727 { 0x0067, UProperty.ASCII_HEX_DIGIT, 0 }, 1728 1729 { 0x202c, UProperty.BIDI_CONTROL, 1 }, 1730 { 0x202f, UProperty.BIDI_CONTROL, 0 }, 1731 1732 { 0x003c, UProperty.BIDI_MIRRORED, 1 }, 1733 { 0x003d, UProperty.BIDI_MIRRORED, 0 }, 1734 1735 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ 1736 { 0x2018, UProperty.BIDI_MIRRORED, 0 }, 1737 { 0x201d, UProperty.BIDI_MIRRORED, 0 }, 1738 { 0x201f, UProperty.BIDI_MIRRORED, 0 }, 1739 { 0x301e, UProperty.BIDI_MIRRORED, 0 }, 1740 1741 { 0x058a, UProperty.DASH, 1 }, 1742 { 0x007e, UProperty.DASH, 0 }, 1743 1744 { 0x0c4d, UProperty.DIACRITIC, 1 }, 1745 { 0x3000, UProperty.DIACRITIC, 0 }, 1746 1747 { 0x0e46, UProperty.EXTENDER, 1 }, 1748 { 0x0020, UProperty.EXTENDER, 0 }, 1749 1750 { 0xfb1d, UProperty.FULL_COMPOSITION_EXCLUSION, 1 }, 1751 { 0x1d15f, UProperty.FULL_COMPOSITION_EXCLUSION, 1 }, 1752 { 0xfb1e, UProperty.FULL_COMPOSITION_EXCLUSION, 0 }, 1753 1754 { 0x110a, UProperty.NFD_INERT, 1 }, /* Jamo L */ 1755 { 0x0308, UProperty.NFD_INERT, 0 }, 1756 1757 { 0x1164, UProperty.NFKD_INERT, 1 }, /* Jamo V */ 1758 { 0x1d79d, UProperty.NFKD_INERT, 0 }, /* math compat version of xi */ 1759 1760 { 0x0021, UProperty.NFC_INERT, 1 }, /* ! */ 1761 { 0x0061, UProperty.NFC_INERT, 0 }, /* a */ 1762 { 0x00e4, UProperty.NFC_INERT, 0 }, /* a-umlaut */ 1763 { 0x0102, UProperty.NFC_INERT, 0 }, /* a-breve */ 1764 { 0xac1c, UProperty.NFC_INERT, 0 }, /* Hangul LV */ 1765 { 0xac1d, UProperty.NFC_INERT, 1 }, /* Hangul LVT */ 1766 1767 { 0x1d79d, UProperty.NFKC_INERT, 0 }, /* math compat version of xi */ 1768 { 0x2a6d6, UProperty.NFKC_INERT, 1 }, /* Han, last of CJK ext. B */ 1769 1770 { 0x00e4, UProperty.SEGMENT_STARTER, 1 }, 1771 { 0x0308, UProperty.SEGMENT_STARTER, 0 }, 1772 { 0x110a, UProperty.SEGMENT_STARTER, 1 }, /* Jamo L */ 1773 { 0x1164, UProperty.SEGMENT_STARTER, 0 },/* Jamo V */ 1774 { 0xac1c, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LV */ 1775 { 0xac1d, UProperty.SEGMENT_STARTER, 1 }, /* Hangul LVT */ 1776 1777 { 0x0044, UProperty.HEX_DIGIT, 1 }, 1778 { 0xff46, UProperty.HEX_DIGIT, 1 }, 1779 { 0x0047, UProperty.HEX_DIGIT, 0 }, 1780 1781 { 0x30fb, UProperty.HYPHEN, 1 }, 1782 { 0xfe58, UProperty.HYPHEN, 0 }, 1783 1784 { 0x2172, UProperty.ID_CONTINUE, 1 }, 1785 { 0x0307, UProperty.ID_CONTINUE, 1 }, 1786 { 0x005c, UProperty.ID_CONTINUE, 0 }, 1787 1788 { 0x2172, UProperty.ID_START, 1 }, 1789 { 0x007a, UProperty.ID_START, 1 }, 1790 { 0x0039, UProperty.ID_START, 0 }, 1791 1792 { 0x4db5, UProperty.IDEOGRAPHIC, 1 }, 1793 { 0x2f999, UProperty.IDEOGRAPHIC, 1 }, 1794 { 0x2f99, UProperty.IDEOGRAPHIC, 0 }, 1795 1796 { 0x200c, UProperty.JOIN_CONTROL, 1 }, 1797 { 0x2029, UProperty.JOIN_CONTROL, 0 }, 1798 1799 { 0x1d7bc, UProperty.LOWERCASE, 1 }, 1800 { 0x0345, UProperty.LOWERCASE, 1 }, 1801 { 0x0030, UProperty.LOWERCASE, 0 }, 1802 1803 { 0x1d7a9, UProperty.MATH, 1 }, 1804 { 0x2135, UProperty.MATH, 1 }, 1805 { 0x0062, UProperty.MATH, 0 }, 1806 1807 { 0xfde1, UProperty.NONCHARACTER_CODE_POINT, 1 }, 1808 { 0x10ffff, UProperty.NONCHARACTER_CODE_POINT, 1 }, 1809 { 0x10fffd, UProperty.NONCHARACTER_CODE_POINT, 0 }, 1810 1811 { 0x0022, UProperty.QUOTATION_MARK, 1 }, 1812 { 0xff62, UProperty.QUOTATION_MARK, 1 }, 1813 { 0xd840, UProperty.QUOTATION_MARK, 0 }, 1814 1815 { 0x061f, UProperty.TERMINAL_PUNCTUATION, 1 }, 1816 { 0xe003f, UProperty.TERMINAL_PUNCTUATION, 0 }, 1817 1818 { 0x1d44a, UProperty.UPPERCASE, 1 }, 1819 { 0x2162, UProperty.UPPERCASE, 1 }, 1820 { 0x0345, UProperty.UPPERCASE, 0 }, 1821 1822 { 0x0020, UProperty.WHITE_SPACE, 1 }, 1823 { 0x202f, UProperty.WHITE_SPACE, 1 }, 1824 { 0x3001, UProperty.WHITE_SPACE, 0 }, 1825 1826 { 0x0711, UProperty.XID_CONTINUE, 1 }, 1827 { 0x1d1aa, UProperty.XID_CONTINUE, 1 }, 1828 { 0x007c, UProperty.XID_CONTINUE, 0 }, 1829 1830 { 0x16ee, UProperty.XID_START, 1 }, 1831 { 0x23456, UProperty.XID_START, 1 }, 1832 { 0x1d1aa, UProperty.XID_START, 0 }, 1833 1834 /* 1835 * Version break: 1836 * The following properties are only supported starting with the 1837 * Unicode version indicated in the second field. 1838 */ 1839 { -1, 0x320, 0 }, 1840 1841 { 0x180c, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 }, 1842 { 0xfe02, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 1 }, 1843 { 0x1801, UProperty.DEFAULT_IGNORABLE_CODE_POINT, 0 }, 1844 1845 { 0x0149, UProperty.DEPRECATED, 1 }, /* changed in Unicode 5.2 */ 1846 { 0x0341, UProperty.DEPRECATED, 0 }, /* changed in Unicode 5.2 */ 1847 { 0xe0041, UProperty.DEPRECATED, 1 }, /* Changed from Unicode 5 to 5.1 */ 1848 { 0xe0100, UProperty.DEPRECATED, 0 }, 1849 1850 { 0x00a0, UProperty.GRAPHEME_BASE, 1 }, 1851 { 0x0a4d, UProperty.GRAPHEME_BASE, 0 }, 1852 { 0xff9d, UProperty.GRAPHEME_BASE, 1 }, 1853 { 0xff9f, UProperty.GRAPHEME_BASE, 0 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */ 1854 1855 { 0x0300, UProperty.GRAPHEME_EXTEND, 1 }, 1856 { 0xff9d, UProperty.GRAPHEME_EXTEND, 0 }, 1857 { 0xff9f, UProperty.GRAPHEME_EXTEND, 1 }, /* changed from Unicode 3.2 to 4 and again 5 to 5.1 */ 1858 { 0x0603, UProperty.GRAPHEME_EXTEND, 0 }, 1859 1860 { 0x0a4d, UProperty.GRAPHEME_LINK, 1 }, 1861 { 0xff9f, UProperty.GRAPHEME_LINK, 0 }, 1862 1863 { 0x2ff7, UProperty.IDS_BINARY_OPERATOR, 1 }, 1864 { 0x2ff3, UProperty.IDS_BINARY_OPERATOR, 0 }, 1865 1866 { 0x2ff3, UProperty.IDS_TRINARY_OPERATOR, 1 }, 1867 { 0x2f03, UProperty.IDS_TRINARY_OPERATOR, 0 }, 1868 1869 { 0x0ec1, UProperty.LOGICAL_ORDER_EXCEPTION, 1 }, 1870 { 0xdcba, UProperty.LOGICAL_ORDER_EXCEPTION, 0 }, 1871 1872 { 0x2e9b, UProperty.RADICAL, 1 }, 1873 { 0x4e00, UProperty.RADICAL, 0 }, 1874 1875 { 0x012f, UProperty.SOFT_DOTTED, 1 }, 1876 { 0x0049, UProperty.SOFT_DOTTED, 0 }, 1877 1878 { 0xfa11, UProperty.UNIFIED_IDEOGRAPH, 1 }, 1879 { 0xfa12, UProperty.UNIFIED_IDEOGRAPH, 0 }, 1880 1881 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */ 1882 1883 { 0x002e, UProperty.S_TERM, 1 }, 1884 { 0x0061, UProperty.S_TERM, 0 }, 1885 1886 { 0x180c, UProperty.VARIATION_SELECTOR, 1 }, 1887 { 0xfe03, UProperty.VARIATION_SELECTOR, 1 }, 1888 { 0xe01ef, UProperty.VARIATION_SELECTOR, 1 }, 1889 { 0xe0200, UProperty.VARIATION_SELECTOR, 0 }, 1890 1891 /* enum/integer type properties */ 1892 /* test default Bidi classes for unassigned code points */ 1893 { 0x0590, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1894 { 0x05cf, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1895 { 0x05ed, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1896 { 0x07f2, UProperty.BIDI_CLASS, UCharacterDirection.DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */ 1897 { 0x07fe, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, /* unassigned R */ 1898 { 0x089f, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1899 { 0xfb37, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1900 { 0xfb42, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1901 { 0x10806, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1902 { 0x10909, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1903 { 0x10fe4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 1904 1905 { 0x061d, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1906 { 0x063f, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1907 { 0x070e, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1908 { 0x0775, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1909 { 0xfbc2, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1910 { 0xfd90, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1911 { 0xfefe, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 1912 1913 { 0x02AF, UProperty.BLOCK, UCharacter.UnicodeBlock.IPA_EXTENSIONS.getID() }, 1914 { 0x0C4E, UProperty.BLOCK, UCharacter.UnicodeBlock.TELUGU.getID()}, 1915 { 0x155A, UProperty.BLOCK, UCharacter.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS.getID() }, 1916 { 0x1717, UProperty.BLOCK, UCharacter.UnicodeBlock.TAGALOG.getID() }, 1917 { 0x1900, UProperty.BLOCK, UCharacter.UnicodeBlock.LIMBU.getID() }, 1918 { 0x1CBF, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID()}, 1919 { 0x3040, UProperty.BLOCK, UCharacter.UnicodeBlock.HIRAGANA.getID()}, 1920 { 0x1D0FF, UProperty.BLOCK, UCharacter.UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS.getID()}, 1921 { 0x50000, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID() }, 1922 { 0xEFFFF, UProperty.BLOCK, UCharacter.UnicodeBlock.NO_BLOCK.getID() }, 1923 { 0x10D0FF, UProperty.BLOCK, UCharacter.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B.getID() }, 1924 1925 /* UProperty.CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */ 1926 { 0xd7d7, UProperty.CANONICAL_COMBINING_CLASS, 0 }, 1927 1928 { 0x00A0, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NOBREAK }, 1929 { 0x00A8, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.COMPAT }, 1930 { 0x00bf, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NONE }, 1931 { 0x00c0, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL }, 1932 { 0x1E9B, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL }, 1933 { 0xBCDE, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.CANONICAL }, 1934 { 0xFB5D, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.MEDIAL }, 1935 { 0x1D736, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.FONT }, 1936 { 0xe0033, UProperty.DECOMPOSITION_TYPE, UCharacter.DecompositionType.NONE }, 1937 1938 { 0x0009, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL }, 1939 { 0x0020, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NARROW }, 1940 { 0x00B1, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS }, 1941 { 0x20A9, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.HALFWIDTH }, 1942 { 0x2FFB, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1943 { 0x3000, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.FULLWIDTH }, 1944 { 0x35bb, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1945 { 0x58bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1946 { 0xD7A3, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1947 { 0xEEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS }, 1948 { 0x1D198, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL }, 1949 { 0x20000, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1950 { 0x2F8C7, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1951 { 0x3a5bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.WIDE }, 1952 { 0x5a5bd, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.NEUTRAL }, 1953 { 0xFEEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS }, 1954 { 0x10EEEE, UProperty.EAST_ASIAN_WIDTH, UCharacter.EastAsianWidth.AMBIGUOUS }, 1955 1956 /* UProperty.GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */ 1957 { 0xd7c7, UProperty.GENERAL_CATEGORY, 0 }, 1958 { 0xd7d7, UProperty.GENERAL_CATEGORY, UCharacterEnums.ECharacterCategory.OTHER_LETTER }, /* changed in Unicode 5.2 */ 1959 1960 { 0x0444, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.NO_JOINING_GROUP }, 1961 { 0x0639, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.AIN }, 1962 { 0x072A, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.DALATH_RISH }, 1963 { 0x0647, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH }, 1964 { 0x06C1, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.HEH_GOAL }, 1965 1966 { 0x200C, UProperty.JOINING_TYPE, UCharacter.JoiningType.NON_JOINING }, 1967 { 0x200D, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING }, 1968 { 0x0639, UProperty.JOINING_TYPE, UCharacter.JoiningType.DUAL_JOINING }, 1969 { 0x0640, UProperty.JOINING_TYPE, UCharacter.JoiningType.JOIN_CAUSING }, 1970 { 0x06C3, UProperty.JOINING_TYPE, UCharacter.JoiningType.RIGHT_JOINING }, 1971 { 0x0300, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT }, 1972 { 0x070F, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT }, 1973 { 0xe0033, UProperty.JOINING_TYPE, UCharacter.JoiningType.TRANSPARENT }, 1974 1975 /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */ 1976 { 0xe7e7, UProperty.LINE_BREAK, UCharacter.LineBreak.UNKNOWN }, 1977 { 0x10fffd, UProperty.LINE_BREAK, UCharacter.LineBreak.UNKNOWN }, 1978 { 0x0028, UProperty.LINE_BREAK, UCharacter.LineBreak.OPEN_PUNCTUATION }, 1979 { 0x232A, UProperty.LINE_BREAK, UCharacter.LineBreak.CLOSE_PUNCTUATION }, 1980 { 0x3401, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC }, 1981 { 0x4e02, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC }, 1982 { 0x20004, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC }, 1983 { 0xf905, UProperty.LINE_BREAK, UCharacter.LineBreak.IDEOGRAPHIC }, 1984 { 0xdb7e, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE }, 1985 { 0xdbfd, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE }, 1986 { 0xdffc, UProperty.LINE_BREAK, UCharacter.LineBreak.SURROGATE }, 1987 { 0x2762, UProperty.LINE_BREAK, UCharacter.LineBreak.EXCLAMATION }, 1988 { 0x002F, UProperty.LINE_BREAK, UCharacter.LineBreak.BREAK_SYMBOLS }, 1989 { 0x1D49C, UProperty.LINE_BREAK, UCharacter.LineBreak.ALPHABETIC }, 1990 { 0x1731, UProperty.LINE_BREAK, UCharacter.LineBreak.ALPHABETIC }, 1991 1992 /* UProperty.NUMERIC_TYPE tested in TestNumericProperties() */ 1993 1994 /* UProperty.SCRIPT tested in TestUScriptCodeAPI() */ 1995 1996 { 0x10ff, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 1997 { 0x1100, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, 1998 { 0x1111, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, 1999 { 0x1159, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, 2000 { 0x115a, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */ 2001 { 0x115e, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */ 2002 { 0x115f, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, 2003 2004 { 0xa95f, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2005 { 0xa960, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */ 2006 { 0xa97c, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LEADING_JAMO }, /* changed in Unicode 5.2 */ 2007 { 0xa97d, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2008 2009 { 0x1160, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, 2010 { 0x1161, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, 2011 { 0x1172, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, 2012 { 0x11a2, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, 2013 { 0x11a3, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2014 { 0x11a7, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2015 2016 { 0xd7af, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2017 { 0xd7b0, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2018 { 0xd7c6, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2019 { 0xd7c7, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2020 2021 { 0x11a8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, 2022 { 0x11b8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, 2023 { 0x11c8, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, 2024 { 0x11f9, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, 2025 { 0x11fa, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2026 { 0x11ff, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2027 { 0x1200, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2028 2029 { 0xd7ca, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2030 { 0xd7cb, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2031 { 0xd7fb, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2032 { 0xd7fc, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2033 2034 { 0xac00, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE }, 2035 { 0xac1c, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE }, 2036 { 0xc5ec, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE }, 2037 { 0xd788, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LV_SYLLABLE }, 2038 2039 { 0xac01, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2040 { 0xac1b, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2041 { 0xac1d, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2042 { 0xc5ee, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2043 { 0xd7a3, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.LVT_SYLLABLE }, 2044 2045 { 0xd7a4, UProperty.HANGUL_SYLLABLE_TYPE, 0 }, 2046 2047 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */ 2048 2049 { 0x00d7, UProperty.PATTERN_SYNTAX, 1 }, 2050 { 0xfe45, UProperty.PATTERN_SYNTAX, 1 }, 2051 { 0x0061, UProperty.PATTERN_SYNTAX, 0 }, 2052 2053 { 0x0020, UProperty.PATTERN_WHITE_SPACE, 1 }, 2054 { 0x0085, UProperty.PATTERN_WHITE_SPACE, 1 }, 2055 { 0x200f, UProperty.PATTERN_WHITE_SPACE, 1 }, 2056 { 0x00a0, UProperty.PATTERN_WHITE_SPACE, 0 }, 2057 { 0x3000, UProperty.PATTERN_WHITE_SPACE, 0 }, 2058 2059 { 0x1d200, UProperty.BLOCK, UCharacter.UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION_ID }, 2060 { 0x2c8e, UProperty.BLOCK, UCharacter.UnicodeBlock.COPTIC_ID }, 2061 { 0xfe17, UProperty.BLOCK, UCharacter.UnicodeBlock.VERTICAL_FORMS_ID }, 2062 2063 { 0x1a00, UProperty.SCRIPT, UScript.BUGINESE }, 2064 { 0x2cea, UProperty.SCRIPT, UScript.COPTIC }, 2065 { 0xa82b, UProperty.SCRIPT, UScript.SYLOTI_NAGRI }, 2066 { 0x103d0, UProperty.SCRIPT, UScript.OLD_PERSIAN }, 2067 2068 { 0xcc28, UProperty.LINE_BREAK, UCharacter.LineBreak.H2 }, 2069 { 0xcc29, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 }, 2070 { 0xac03, UProperty.LINE_BREAK, UCharacter.LineBreak.H3 }, 2071 { 0x115f, UProperty.LINE_BREAK, UCharacter.LineBreak.JL }, 2072 { 0x11aa, UProperty.LINE_BREAK, UCharacter.LineBreak.JT }, 2073 { 0x11a1, UProperty.LINE_BREAK, UCharacter.LineBreak.JV }, 2074 2075 { 0xb2c9, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.LVT }, 2076 { 0x036f, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.EXTEND }, 2077 { 0x0000, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.CONTROL }, 2078 { 0x1160, UProperty.GRAPHEME_CLUSTER_BREAK, UCharacter.GraphemeClusterBreak.V }, 2079 2080 { 0x05f4, UProperty.WORD_BREAK, UCharacter.WordBreak.MIDLETTER }, 2081 { 0x4ef0, UProperty.WORD_BREAK, UCharacter.WordBreak.OTHER }, 2082 { 0x19d9, UProperty.WORD_BREAK, UCharacter.WordBreak.NUMERIC }, 2083 { 0x2044, UProperty.WORD_BREAK, UCharacter.WordBreak.MIDNUM }, 2084 2085 { 0xfffd, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.OTHER }, 2086 { 0x1ffc, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.UPPER }, 2087 { 0xff63, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.CLOSE }, 2088 { 0x2028, UProperty.SENTENCE_BREAK, UCharacter.SentenceBreak.SEP }, 2089 2090 { -1, 0x520, 0 }, /* version break for Unicode 5.2 */ 2091 2092 /* unassigned code points in new default Bidi R blocks */ 2093 { 0x1ede4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 2094 { 0x1efe4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT }, 2095 2096 /* test some script codes >127 */ 2097 { 0xa6e6, UProperty.SCRIPT, UScript.BAMUM }, 2098 { 0xa4d0, UProperty.SCRIPT, UScript.LISU }, 2099 { 0x10a7f, UProperty.SCRIPT, UScript.OLD_SOUTH_ARABIAN }, 2100 2101 { -1, 0x600, 0 }, /* version break for Unicode 6.0 */ 2102 2103 /* value changed in Unicode 6.0 */ 2104 { 0x06C3, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.TEH_MARBUTA_GOAL }, 2105 2106 { -1, 0x610, 0 }, /* version break for Unicode 6.1 */ 2107 2108 /* unassigned code points in new/changed default Bidi AL blocks */ 2109 { 0x08ba, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 2110 { 0x1eee4, UProperty.BIDI_CLASS, UCharacterDirection.RIGHT_TO_LEFT_ARABIC }, 2111 2112 { -1, 0x630, 0 }, /* version break for Unicode 6.3 */ 2113 2114 /* unassigned code points in the currency symbols block now default to ET */ 2115 { 0x20C0, UProperty.BIDI_CLASS, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR }, 2116 { 0x20CF, UProperty.BIDI_CLASS, UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR }, 2117 2118 /* new property in Unicode 6.3 */ 2119 { 0x0027, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.NONE }, 2120 { 0x0028, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.OPEN }, 2121 { 0x0029, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.CLOSE }, 2122 { 0xFF5C, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.NONE }, 2123 { 0xFF5B, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.OPEN }, 2124 { 0xFF5D, UProperty.BIDI_PAIRED_BRACKET_TYPE, UCharacter.BidiPairedBracketType.CLOSE }, 2125 2126 { -1, 0x700, 0 }, /* version break for Unicode 7.0 */ 2127 2128 /* new character range with Joining_Group values */ 2129 { 0x10ABF, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.NO_JOINING_GROUP }, 2130 { 0x10AC0, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.MANICHAEAN_ALEPH }, 2131 { 0x10AC1, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.MANICHAEAN_BETH }, 2132 { 0x10AEF, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.MANICHAEAN_HUNDRED }, 2133 { 0x10AF0, UProperty.JOINING_GROUP, UCharacter.JoiningGroup.NO_JOINING_GROUP }, 2134 2135 /* undefined UProperty values */ 2136 { 0x61, 0x4a7, 0 }, 2137 { 0x234bc, 0x15ed, 0 } 2138 }; 2139 2140 2141 if (UCharacter.getIntPropertyMinValue(UProperty.DASH) != 0 2142 || UCharacter.getIntPropertyMinValue(UProperty.BIDI_CLASS) != 0 2143 || UCharacter.getIntPropertyMinValue(UProperty.BLOCK)!= 0 /* j2478 */ 2144 || UCharacter.getIntPropertyMinValue(UProperty.SCRIPT)!= 0 /* JB#2410 */ 2145 || UCharacter.getIntPropertyMinValue(0x2345) != 0) { 2146 errln("error: UCharacter.getIntPropertyMinValue() wrong"); 2147 } 2148 2149 if( UCharacter.getIntPropertyMaxValue(UProperty.DASH)!=1) { 2150 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DASH) wrong\n"); 2151 } 2152 if( UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE)!=1) { 2153 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.ID_CONTINUE) wrong\n"); 2154 } 2155 if( UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1)!=1) { 2156 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BINARY_LIMIT-1) wrong\n"); 2157 } 2158 2159 if( UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)!=UCharacterDirection.CHAR_DIRECTION_COUNT-1 ) { 2160 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS) wrong\n"); 2161 } 2162 if( UCharacter.getIntPropertyMaxValue(UProperty.BLOCK)!=UCharacter.UnicodeBlock.COUNT-1 ) { 2163 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BLOCK) wrong\n"); 2164 } 2165 if(UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK)!=UCharacter.LineBreak.COUNT-1) { 2166 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK) wrong\n"); 2167 } 2168 if(UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)!=UScript.CODE_LIMIT-1) { 2169 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT) wrong\n"); 2170 } 2171 if(UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE)!=UCharacter.NumericType.COUNT-1) { 2172 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE) wrong\n"); 2173 } 2174 if(UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY)!=UCharacterCategory.CHAR_CATEGORY_COUNT-1) { 2175 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GENERAL_CATEGORY) wrong\n"); 2176 } 2177 if(UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE)!=UCharacter.HangulSyllableType.COUNT-1) { 2178 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE) wrong\n"); 2179 } 2180 if(UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK)!=UCharacter.GraphemeClusterBreak.COUNT-1) { 2181 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK) wrong\n"); 2182 } 2183 if(UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK)!=UCharacter.SentenceBreak.COUNT-1) { 2184 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK) wrong\n"); 2185 } 2186 if(UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK)!=UCharacter.WordBreak.COUNT-1) { 2187 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK) wrong\n"); 2188 } 2189 if(UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE)!=UCharacter.BidiPairedBracketType.COUNT-1) { 2190 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE) wrong\n"); 2191 } 2192 /*JB#2410*/ 2193 if( UCharacter.getIntPropertyMaxValue(0x2345)!=-1) { 2194 errln("error: UCharacter.getIntPropertyMaxValue(0x2345) wrong\n"); 2195 } 2196 if( UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) != (UCharacter.DecompositionType.COUNT - 1)) { 2197 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE) wrong\n"); 2198 } 2199 if( UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) != (UCharacter.JoiningGroup.COUNT -1)) { 2200 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_GROUP) wrong\n"); 2201 } 2202 if( UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) != (UCharacter.JoiningType.COUNT -1)) { 2203 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE) wrong\n"); 2204 } 2205 if( UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) != (UCharacter.EastAsianWidth.COUNT -1)) { 2206 errln("error: UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH) wrong\n"); 2207 } 2208 2209 VersionInfo version = UCharacter.getUnicodeVersion(); 2210 2211 // test hasBinaryProperty() 2212 for (int i = 0; i < props.length; ++ i) { 2213 int which = props[i][1]; 2214 if (props[i][0] < 0) { 2215 if (version.compareTo(VersionInfo.getInstance(which >> 8, 2216 (which >> 4) & 0xF, 2217 which & 0xF, 2218 0)) < 0) { 2219 break; 2220 } 2221 continue; 2222 } 2223 String whichName; 2224 try { 2225 whichName = UCharacter.getPropertyName(which, UProperty.NameChoice.LONG); 2226 } catch(IllegalArgumentException e) { 2227 // There are intentionally invalid property integer values ("which"). 2228 // Catch and ignore the exception from getPropertyName(). 2229 whichName = "undefined UProperty value"; 2230 } 2231 boolean expect = true; 2232 if (props[i][2] == 0) { 2233 expect = false; 2234 } 2235 if (which < UProperty.INT_START) { 2236 if (UCharacter.hasBinaryProperty(props[i][0], which) 2237 != expect) { 2238 errln("error: UCharacter.hasBinaryProperty(U+" + 2239 Utility.hex(props[i][0], 4) + ", " + 2240 whichName + ") has an error, expected=" + expect); 2241 } 2242 } 2243 2244 int retVal = UCharacter.getIntPropertyValue(props[i][0], which); 2245 if (retVal != props[i][2]) { 2246 errln("error: UCharacter.getIntPropertyValue(U+" + 2247 Utility.hex(props[i][0], 4) + 2248 ", " + whichName + ") is wrong, expected=" 2249 + props[i][2] + " actual=" + retVal); 2250 } 2251 2252 // test separate functions, too 2253 switch (which) { 2254 case UProperty.ALPHABETIC: 2255 if (UCharacter.isUAlphabetic(props[i][0]) != expect) { 2256 errln("error: UCharacter.isUAlphabetic(\\u" + 2257 Integer.toHexString(props[i][0]) + 2258 ") is wrong expected " + props[i][2]); 2259 } 2260 break; 2261 case UProperty.LOWERCASE: 2262 if (UCharacter.isULowercase(props[i][0]) != expect) { 2263 errln("error: UCharacter.isULowercase(\\u" + 2264 Integer.toHexString(props[i][0]) + 2265 ") is wrong expected " +props[i][2]); 2266 } 2267 break; 2268 case UProperty.UPPERCASE: 2269 if (UCharacter.isUUppercase(props[i][0]) != expect) { 2270 errln("error: UCharacter.isUUppercase(\\u" + 2271 Integer.toHexString(props[i][0]) + 2272 ") is wrong expected " + props[i][2]); 2273 } 2274 break; 2275 case UProperty.WHITE_SPACE: 2276 if (UCharacter.isUWhiteSpace(props[i][0]) != expect) { 2277 errln("error: UCharacter.isUWhiteSpace(\\u" + 2278 Integer.toHexString(props[i][0]) + 2279 ") is wrong expected " + props[i][2]); 2280 } 2281 break; 2282 default: 2283 break; 2284 } 2285 } 2286 } 2287 TestNumericProperties()2288 public void TestNumericProperties() 2289 { 2290 // see UnicodeData.txt, DerivedNumericValues.txt 2291 double values[][] = { 2292 // Code point, numeric type, numeric value. 2293 // If a fourth value is specified, it is the getNumericValue(). 2294 // Otherwise it is expected to be the same as the getUnicodeNumericValue(), 2295 // where UCharacter.NO_NUMERIC_VALUE is turned into -1. 2296 // getNumericValue() returns -2 if the code point has a value 2297 // which is not a non-negative integer. (This is mostly auto-converted to -2.) 2298 { 0x0F33, UCharacter.NumericType.NUMERIC, -1./2. }, 2299 { 0x0C66, UCharacter.NumericType.DECIMAL, 0 }, 2300 { 0x96f6, UCharacter.NumericType.NUMERIC, 0 }, 2301 { 0xa833, UCharacter.NumericType.NUMERIC, 1./16. }, 2302 { 0x2152, UCharacter.NumericType.NUMERIC, 1./10. }, 2303 { 0x2151, UCharacter.NumericType.NUMERIC, 1./9. }, 2304 { 0x1245f, UCharacter.NumericType.NUMERIC, 1./8. }, 2305 { 0x2150, UCharacter.NumericType.NUMERIC, 1./7. }, 2306 { 0x2159, UCharacter.NumericType.NUMERIC, 1./6. }, 2307 { 0x09f6, UCharacter.NumericType.NUMERIC, 3./16. }, 2308 { 0x2155, UCharacter.NumericType.NUMERIC, 1./5. }, 2309 { 0x00BD, UCharacter.NumericType.NUMERIC, 1./2. }, 2310 { 0x0031, UCharacter.NumericType.DECIMAL, 1. }, 2311 { 0x4e00, UCharacter.NumericType.NUMERIC, 1. }, 2312 { 0x58f1, UCharacter.NumericType.NUMERIC, 1. }, 2313 { 0x10320, UCharacter.NumericType.NUMERIC, 1. }, 2314 { 0x0F2B, UCharacter.NumericType.NUMERIC, 3./2. }, 2315 { 0x00B2, UCharacter.NumericType.DIGIT, 2. }, /* Unicode 4.0 change */ 2316 { 0x5f10, UCharacter.NumericType.NUMERIC, 2. }, 2317 { 0x1813, UCharacter.NumericType.DECIMAL, 3. }, 2318 { 0x5f0e, UCharacter.NumericType.NUMERIC, 3. }, 2319 { 0x2173, UCharacter.NumericType.NUMERIC, 4. }, 2320 { 0x8086, UCharacter.NumericType.NUMERIC, 4. }, 2321 { 0x278E, UCharacter.NumericType.DIGIT, 5. }, 2322 { 0x1D7F2, UCharacter.NumericType.DECIMAL, 6. }, 2323 { 0x247A, UCharacter.NumericType.DIGIT, 7. }, 2324 { 0x7396, UCharacter.NumericType.NUMERIC, 9. }, 2325 { 0x1372, UCharacter.NumericType.NUMERIC, 10. }, 2326 { 0x216B, UCharacter.NumericType.NUMERIC, 12. }, 2327 { 0x16EE, UCharacter.NumericType.NUMERIC, 17. }, 2328 { 0x249A, UCharacter.NumericType.NUMERIC, 19. }, 2329 { 0x303A, UCharacter.NumericType.NUMERIC, 30. }, 2330 { 0x5345, UCharacter.NumericType.NUMERIC, 30. }, 2331 { 0x32B2, UCharacter.NumericType.NUMERIC, 37. }, 2332 { 0x1375, UCharacter.NumericType.NUMERIC, 40. }, 2333 { 0x10323, UCharacter.NumericType.NUMERIC, 50. }, 2334 { 0x0BF1, UCharacter.NumericType.NUMERIC, 100. }, 2335 { 0x964c, UCharacter.NumericType.NUMERIC, 100. }, 2336 { 0x217E, UCharacter.NumericType.NUMERIC, 500. }, 2337 { 0x2180, UCharacter.NumericType.NUMERIC, 1000. }, 2338 { 0x4edf, UCharacter.NumericType.NUMERIC, 1000. }, 2339 { 0x2181, UCharacter.NumericType.NUMERIC, 5000. }, 2340 { 0x137C, UCharacter.NumericType.NUMERIC, 10000. }, 2341 { 0x4e07, UCharacter.NumericType.NUMERIC, 10000. }, 2342 { 0x12432, UCharacter.NumericType.NUMERIC, 216000. }, 2343 { 0x12433, UCharacter.NumericType.NUMERIC, 432000. }, 2344 { 0x4ebf, UCharacter.NumericType.NUMERIC, 100000000. }, 2345 { 0x5146, UCharacter.NumericType.NUMERIC, 1000000000000. }, 2346 { -1, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2347 { 0x61, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE, 10. }, 2348 { 0x3000, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2349 { 0xfffe, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2350 { 0x10301, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2351 { 0xe0033, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2352 { 0x10ffff, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE }, 2353 { 0x110000, UCharacter.NumericType.NONE, UCharacter.NO_NUMERIC_VALUE } 2354 }; 2355 2356 for (int i = 0; i < values.length; ++ i) { 2357 int c = (int)values[i][0]; 2358 int type = UCharacter.getIntPropertyValue(c, 2359 UProperty.NUMERIC_TYPE); 2360 double nv = UCharacter.getUnicodeNumericValue(c); 2361 2362 if (type != values[i][1]) { 2363 errln("UProperty.NUMERIC_TYPE(\\u" + Utility.hex(c, 4) 2364 + ") = " + type + " should be " + (int)values[i][1]); 2365 } 2366 if (0.000001 <= Math.abs(nv - values[i][2])) { 2367 errln("UCharacter.getUnicodeNumericValue(\\u" + Utility.hex(c, 4) 2368 + ") = " + nv + " should be " + values[i][2]); 2369 } 2370 2371 // Test getNumericValue() as well. 2372 // It can only return the subset of numeric values that are 2373 // non-negative and fit into an int. 2374 int expectedInt; 2375 if (values[i].length == 3) { 2376 if (values[i][2] == UCharacter.NO_NUMERIC_VALUE) { 2377 expectedInt = -1; 2378 } else { 2379 expectedInt = (int)values[i][2]; 2380 if (expectedInt < 0 || expectedInt != values[i][2]) { 2381 // The numeric value is not a non-negative integer. 2382 expectedInt = -2; 2383 } 2384 } 2385 } else { 2386 expectedInt = (int)values[i][3]; 2387 } 2388 int nvInt = UCharacter.getNumericValue(c); 2389 if (nvInt != expectedInt) { 2390 errln("UCharacter.getNumericValue(\\u" + Utility.hex(c, 4) 2391 + ") = " + nvInt + " should be " + expectedInt); 2392 } 2393 } 2394 } 2395 2396 /** 2397 * Test the property values API. See JB#2410. 2398 */ TestPropertyValues()2399 public void TestPropertyValues() { 2400 int i, p, min, max; 2401 2402 /* Min should be 0 for everything. */ 2403 /* Until JB#2478 is fixed, the one exception is UProperty.BLOCK. */ 2404 for (p=UProperty.INT_START; p<UProperty.INT_LIMIT; ++p) { 2405 min = UCharacter.getIntPropertyMinValue(p); 2406 if (min != 0) { 2407 if (p == UProperty.BLOCK) { 2408 /* This is okay...for now. See JB#2487. 2409 TODO Update this for JB#2487. */ 2410 } else { 2411 String name; 2412 name = UCharacter.getPropertyName(p, UProperty.NameChoice.LONG); 2413 errln("FAIL: UCharacter.getIntPropertyMinValue(" + name + ") = " + 2414 min + ", exp. 0"); 2415 } 2416 } 2417 } 2418 2419 if (UCharacter.getIntPropertyMinValue(UProperty.GENERAL_CATEGORY_MASK) 2420 != 0 2421 || UCharacter.getIntPropertyMaxValue( 2422 UProperty.GENERAL_CATEGORY_MASK) 2423 != -1) { 2424 errln("error: UCharacter.getIntPropertyMin/MaxValue(" 2425 + "UProperty.GENERAL_CATEGORY_MASK) is wrong"); 2426 } 2427 2428 /* Max should be -1 for invalid properties. */ 2429 max = UCharacter.getIntPropertyMaxValue(-1); 2430 if (max != -1) { 2431 errln("FAIL: UCharacter.getIntPropertyMaxValue(-1) = " + 2432 max + ", exp. -1"); 2433 } 2434 2435 /* Script should return 0 for an invalid code point. If the API 2436 throws an exception then that's fine too. */ 2437 for (i=0; i<2; ++i) { 2438 try { 2439 int script = 0; 2440 String desc = null; 2441 switch (i) { 2442 case 0: 2443 script = UScript.getScript(-1); 2444 desc = "UScript.getScript(-1)"; 2445 break; 2446 case 1: 2447 script = UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT); 2448 desc = "UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT)"; 2449 break; 2450 } 2451 if (script != 0) { 2452 errln("FAIL: " + desc + " = " + script + ", exp. 0"); 2453 } 2454 } catch (IllegalArgumentException e) {} 2455 } 2456 } 2457 TestBidiPairedBracketType()2458 public void TestBidiPairedBracketType() { 2459 // BidiBrackets-6.3.0.txt says: 2460 // 2461 // The set of code points listed in this file was originally derived 2462 // using the character properties General_Category (gc), Bidi_Class (bc), 2463 // Bidi_Mirrored (Bidi_M), and Bidi_Mirroring_Glyph (bmg), as follows: 2464 // two characters, A and B, form a pair if A has gc=Ps and B has gc=Pe, 2465 // both have bc=ON and Bidi_M=Y, and bmg of A is B. Bidi_Paired_Bracket 2466 // maps A to B and vice versa, and their Bidi_Paired_Bracket_Type 2467 // property values are Open and Close, respectively. 2468 UnicodeSet bpt = new UnicodeSet("[:^bpt=n:]"); 2469 assertTrue("bpt!=None is not empty", !bpt.isEmpty()); 2470 // The following should always be true. 2471 UnicodeSet mirrored = new UnicodeSet("[:Bidi_M:]"); 2472 UnicodeSet other_neutral = new UnicodeSet("[:bc=ON:]"); 2473 assertTrue("bpt!=None is a subset of Bidi_M", mirrored.containsAll(bpt)); 2474 assertTrue("bpt!=None is a subset of bc=ON", other_neutral.containsAll(bpt)); 2475 // The following are true at least initially in Unicode 6.3. 2476 UnicodeSet bpt_open = new UnicodeSet("[:bpt=o:]"); 2477 UnicodeSet bpt_close = new UnicodeSet("[:bpt=c:]"); 2478 UnicodeSet ps = new UnicodeSet("[:Ps:]"); 2479 UnicodeSet pe = new UnicodeSet("[:Pe:]"); 2480 assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open)); 2481 assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close)); 2482 } 2483 TestIsBMP()2484 public void TestIsBMP() 2485 { 2486 int ch[] = {0x0, -1, 0xffff, 0x10ffff, 0xff, 0x1ffff}; 2487 boolean flag[] = {true, false, true, false, true, false}; 2488 for (int i = 0; i < ch.length; i ++) { 2489 if (UCharacter.isBMP(ch[i]) != flag[i]) { 2490 errln("Fail: \\u" + Utility.hex(ch[i], 8) 2491 + " failed at UCharacter.isBMP"); 2492 } 2493 } 2494 } 2495 showADiffB(UnicodeSet a, UnicodeSet b, String a_name, String b_name, boolean expect, boolean diffIsError)2496 private boolean showADiffB(UnicodeSet a, UnicodeSet b, 2497 String a_name, String b_name, 2498 boolean expect, 2499 boolean diffIsError){ 2500 int i, start, end; 2501 boolean equal=true; 2502 for(i=0; i < a.getRangeCount(); ++i) { 2503 start = a.getRangeStart(i); 2504 end = a.getRangeEnd(i); 2505 if(expect!=b.contains(start, end)) { 2506 equal=false; 2507 while(start<=end) { 2508 if(expect!=b.contains(start)) { 2509 if(diffIsError) { 2510 if(expect) { 2511 errln("error: "+ a_name +" contains "+ hex(start)+" but "+ b_name +" does not"); 2512 } else { 2513 errln("error: "+a_name +" and "+ b_name+" both contain "+hex(start) +" but should not intersect"); 2514 } 2515 } else { 2516 if(expect) { 2517 logln("info: "+a_name +" contains "+hex(start)+ "but " + b_name +" does not"); 2518 } else { 2519 logln("info: "+a_name +" and "+b_name+" both contain "+hex(start)+" but should not intersect"); 2520 } 2521 } 2522 } 2523 ++start; 2524 } 2525 } 2526 } 2527 return equal; 2528 } showAMinusB(UnicodeSet a, UnicodeSet b, String a_name, String b_name, boolean diffIsError)2529 private boolean showAMinusB(UnicodeSet a, UnicodeSet b, 2530 String a_name, String b_name, 2531 boolean diffIsError) { 2532 2533 return showADiffB(a, b, a_name, b_name, true, diffIsError); 2534 } 2535 showAIntersectB(UnicodeSet a, UnicodeSet b, String a_name, String b_name, boolean diffIsError)2536 private boolean showAIntersectB(UnicodeSet a, UnicodeSet b, 2537 String a_name, String b_name, 2538 boolean diffIsError) { 2539 return showADiffB(a, b, a_name, b_name, false, diffIsError); 2540 } 2541 compareUSets(UnicodeSet a, UnicodeSet b, String a_name, String b_name, boolean diffIsError)2542 private boolean compareUSets(UnicodeSet a, UnicodeSet b, 2543 String a_name, String b_name, 2544 boolean diffIsError) { 2545 return 2546 showAMinusB(a, b, a_name, b_name, diffIsError) && 2547 showAMinusB(b, a, b_name, a_name, diffIsError); 2548 } 2549 2550 /* various tests for consistency of UCD data and API behavior */ TestConsistency()2551 public void TestConsistency() { 2552 UnicodeSet set1, set2, set3, set4; 2553 2554 int start, end; 2555 int i, length; 2556 2557 String hyphenPattern = "[:Hyphen:]"; 2558 String dashPattern = "[:Dash:]"; 2559 String lowerPattern = "[:Lowercase:]"; 2560 String formatPattern = "[:Cf:]"; 2561 String alphaPattern = "[:Alphabetic:]"; 2562 2563 /* 2564 * It used to be that UCD.html and its precursors said 2565 * "Those dashes used to mark connections between pieces of words, 2566 * plus the Katakana middle dot." 2567 * 2568 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash 2569 * but not from Hyphen. 2570 * UTC 94 (2003mar) decided to leave it that way and to change UCD.html. 2571 * Therefore, do not show errors when testing the Hyphen property. 2572 */ 2573 logln("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n" 2574 + "known to the UTC and not considered errors.\n"); 2575 2576 set1=new UnicodeSet(hyphenPattern); 2577 set2=new UnicodeSet(dashPattern); 2578 2579 /* remove the Katakana middle dot(s) from set1 */ 2580 set1.remove(0x30fb); 2581 set2.remove (0xff65); /* halfwidth variant */ 2582 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", false); 2583 2584 2585 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */ 2586 set3=new UnicodeSet(formatPattern); 2587 set4=new UnicodeSet(alphaPattern); 2588 2589 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", false); 2590 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", true); 2591 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", true); 2592 /* 2593 * Check that each lowercase character has "small" in its name 2594 * and not "capital". 2595 * There are some such characters, some of which seem odd. 2596 * Use the verbose flag to see these notices. 2597 */ 2598 set1=new UnicodeSet(lowerPattern); 2599 2600 for(i=0;; ++i) { 2601 // try{ 2602 // length=set1.getItem(set1, i, &start, &end, NULL, 0, &errorCode); 2603 // }catch(Exception e){ 2604 // break; 2605 // } 2606 start = set1.getRangeStart(i); 2607 end = set1.getRangeEnd(i); 2608 length = i<set1.getRangeCount() ? set1.getRangeCount() : 0; 2609 if(length!=0) { 2610 break; /* done with code points, got a string or -1 */ 2611 } 2612 2613 while(start<=end) { 2614 String name=UCharacter.getName(start); 2615 2616 if( (name.indexOf("SMALL")< 0 || name.indexOf("CAPITAL")<-1) && 2617 name.indexOf("SMALL CAPITAL")==-1 2618 ) { 2619 logln("info: [:Lowercase:] contains U+"+hex(start) + " whose name does not suggest lowercase: " + name); 2620 } 2621 ++start; 2622 } 2623 } 2624 2625 2626 /* 2627 * Test for an example that unorm_getCanonStartSet() delivers 2628 * all characters that compose from the input one, 2629 * even in multiple steps. 2630 * For example, the set for "I" (0049) should contain both 2631 * I-diaeresis (00CF) and I-diaeresis-acute (1E2E). 2632 * In general, the set for the middle such character should be a subset 2633 * of the set for the first. 2634 */ 2635 Normalizer2 norm2=Normalizer2.getNFDInstance(); 2636 set1=new UnicodeSet(); 2637 Norm2AllModes.getNFCInstance().impl. 2638 ensureCanonIterData().getCanonStartSet(0x49, set1); 2639 set2=new UnicodeSet(); 2640 2641 /* enumerate all characters that are plausible to be latin letters */ 2642 for(start=0xa0; start<0x2000; ++start) { 2643 String decomp=norm2.normalize(UTF16.valueOf(start)); 2644 if(decomp.length() > 1 && decomp.charAt(0)==0x49) { 2645 set2.add(start); 2646 } 2647 } 2648 2649 compareUSets(set1, set2, 2650 "[canon start set of 0049]", "[all c with canon decomp with 0049]", 2651 false); 2652 2653 } 2654 2655 public void TestCoverage() { 2656 //cover forDigit 2657 char ch1 = UCharacter.forDigit(7, 11); 2658 assertEquals("UCharacter.forDigit ", "7", String.valueOf(ch1)); 2659 char ch2 = UCharacter.forDigit(17, 20); 2660 assertEquals("UCharacter.forDigit ", "h", String.valueOf(ch2)); 2661 2662 //Jitterbug 4451, for coverage 2663 for (int i = 0x0041; i < 0x005B; i++) { 2664 if (!UCharacter.isJavaLetter(i)) 2665 errln("FAIL \\u" + hex(i) + " expected to be a letter"); 2666 if (!UCharacter.isJavaIdentifierStart(i)) 2667 errln("FAIL \\u" + hex(i) + " expected to be a Java identifier start character"); 2668 if (!UCharacter.isJavaLetterOrDigit(i)) 2669 errln("FAIL \\u" + hex(i) + " expected not to be a Java letter"); 2670 if (!UCharacter.isJavaIdentifierPart(i)) 2671 errln("FAIL \\u" + hex(i) + " expected to be a Java identifier part character"); 2672 } 2673 char[] spaces = {'\t','\n','\f','\r',' '}; 2674 for (int i = 0; i < spaces.length; i++){ 2675 if (!UCharacter.isSpace(spaces[i])) 2676 errln("FAIL \\u" + hex(spaces[i]) + " expected to be a Java space"); 2677 } 2678 } 2679 2680 public void TestBlockData() 2681 { 2682 Class ubc = UCharacter.UnicodeBlock.class; 2683 2684 for (int b = 1; b < UCharacter.UnicodeBlock.COUNT; b += 1) { 2685 UCharacter.UnicodeBlock blk = UCharacter.UnicodeBlock.getInstance(b); 2686 int id = blk.getID(); 2687 String name = blk.toString(); 2688 2689 if (id != b) { 2690 errln("UCharacter.UnicodeBlock.getInstance(" + b + ") returned a block with id = " + id); 2691 } 2692 2693 try { 2694 if (ubc.getField(name + "_ID").getInt(blk) != b) { 2695 errln("UCharacter.UnicodeBlock.getInstance(" + b + ") returned a block with a name of " + name + 2696 " which does not match the block id."); 2697 } 2698 } catch (Exception e) { 2699 errln("Couldn't get the id name for id " + b); 2700 } 2701 } 2702 } 2703 2704 /* 2705 * The following method tests 2706 * public static UnicodeBlock getInstance(int id) 2707 */ 2708 public void TestGetInstance(){ 2709 // Testing values for invalid and valid ID 2710 int[] invalid_test = {-1,-10,-100}; 2711 for(int i=0; i< invalid_test.length; i++){ 2712 if(UCharacter.UnicodeBlock.INVALID_CODE != UCharacter.UnicodeBlock.getInstance(invalid_test[i])){ 2713 errln("UCharacter.UnicodeBlock.getInstance(invalid_test[i]) was " + 2714 "suppose to return UCharacter.UnicodeBlock.INVALID_CODE. Got " + 2715 UCharacter.UnicodeBlock.getInstance(invalid_test[i]) + ". Expected " + 2716 UCharacter.UnicodeBlock.INVALID_CODE); 2717 } 2718 } 2719 } 2720 2721 /* 2722 * The following method tests 2723 * public static UnicodeBlock of(int ch) 2724 */ 2725 public void TestOf(){ 2726 if(UCharacter.UnicodeBlock.INVALID_CODE != UCharacter.UnicodeBlock.of(UTF16.CODEPOINT_MAX_VALUE+1)){ 2727 errln("UCharacter.UnicodeBlock.of(UTF16.CODEPOINT_MAX_VALUE+1) was " + 2728 "suppose to return UCharacter.UnicodeBlock.INVALID_CODE. Got " + 2729 UCharacter.UnicodeBlock.of(UTF16.CODEPOINT_MAX_VALUE+1) + ". Expected " + 2730 UCharacter.UnicodeBlock.INVALID_CODE); 2731 } 2732 } 2733 2734 /* 2735 * The following method tests 2736 * public static final UnicodeBlock forName(String blockName) 2737 */ 2738 public void TestForName(){ 2739 //UCharacter.UnicodeBlock.forName(""); 2740 //Tests when "if (b == null)" is true 2741 } 2742 2743 /* 2744 * The following method tests 2745 * public static int getNumericValue(int ch) 2746 */ 2747 public void TestGetNumericValue(){ 2748 // The following tests the else statement when 2749 // if(numericType<NumericType.COUNT) is false 2750 // The following values were obtained by testing all values from 2751 // UTF16.CODEPOINT_MIN_VALUE to UTF16.CODEPOINT_MAX_VALUE inclusively 2752 // to obtain the value to go through the else statement. 2753 int[] valid_values = 2754 {3058,3442,4988,8558,8559,8574,8575,8576,8577,8578,8583,8584,19975, 2755 20159,20191,20740,20806,21315,33836,38433,65819,65820,65821,65822, 2756 65823,65824,65825,65826,65827,65828,65829,65830,65831,65832,65833, 2757 65834,65835,65836,65837,65838,65839,65840,65841,65842,65843,65861, 2758 65862,65863,65868,65869,65870,65875,65876,65877,65878,65899,65900, 2759 65901,65902,65903,65904,65905,65906,66378,68167}; 2760 2761 int[] results = 2762 {1000,1000,10000,500,1000,500,1000,1000,5000,10000,50000,100000, 2763 10000,100000000,1000,100000000,-2,1000,10000,1000,300,400,500, 2764 600,700,800,900,1000,2000,3000,4000,5000,6000,7000,8000,9000, 2765 10000,20000,30000,40000,50000,60000,70000,80000,90000,500,5000, 2766 50000,500,1000,5000,500,1000,10000,50000,300,500,500,500,500,500, 2767 1000,5000,900,1000}; 2768 2769 if(valid_values.length != results.length){ 2770 errln("The valid_values array and the results array need to be "+ 2771 "the same length."); 2772 } else { 2773 for(int i = 0; i < valid_values.length; i++){ 2774 try{ 2775 if(UCharacter.getNumericValue(valid_values[i]) != results[i]){ 2776 errln("UCharacter.getNumericValue(i) returned a " + 2777 "different value from the expected result. " + 2778 "Got " + UCharacter.getNumericValue(valid_values[i]) + 2779 "Expected" + results[i]); 2780 } 2781 } catch(Exception e){ 2782 errln("UCharacter.getNumericValue(int) returned an exception " + 2783 "with the parameter value"); 2784 } 2785 } 2786 } 2787 } 2788 2789 /* 2790 * The following method tests 2791 * public static double getUnicodeNumericValue(int ch) 2792 */ 2793 // The following tests covers if(mant==0), else if(mant > 9), and default 2794 public void TestGetUnicodeNumericValue(){ 2795 /* The code coverage for if(mant==0), else if(mant > 9), and default 2796 * could not be covered even with input values from UTF16.CODEPOINT_MIN_VALUE 2797 * to UTF16.CODEPOINT_MAX_VALUE. I also tested from UTF16.CODEPOINT_MAX_VALUE to 2798 * Integer.MAX_VALUE and didn't recieve any code coverage there too. 2799 * Therefore, the code could either be dead code or meaningless. 2800 */ 2801 } 2802 2803 /* 2804 * The following method tests 2805 * public static String toString(int ch) 2806 */ 2807 public void TestToString(){ 2808 int[] valid_tests = { 2809 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1, 2810 UCharacter.MAX_VALUE-1, UCharacter.MAX_VALUE}; 2811 int[] invalid_tests = { 2812 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 2813 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 2814 2815 for(int i=0; i< valid_tests.length; i++){ 2816 if(UCharacter.toString(valid_tests[i]) == null){ 2817 errln("UCharacter.toString(int) was not suppose to return " + 2818 "null because it was given a valid parameter. Value passed: " + 2819 valid_tests[i] + ". Got null."); 2820 } 2821 } 2822 2823 for(int i=0; i< invalid_tests.length; i++){ 2824 if(UCharacter.toString(invalid_tests[i]) != null){ 2825 errln("UCharacter.toString(int) was suppose to return " + 2826 "null because it was given an invalid parameter. Value passed: " + 2827 invalid_tests[i] + ". Got: " + UCharacter.toString(invalid_tests[i])); 2828 } 2829 } 2830 } 2831 2832 /* 2833 * The following method tests 2834 * public static int getCombiningClass(int ch) 2835 */ 2836 public void TestGetCombiningClass(){ 2837 int[] valid_tests = { 2838 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1, 2839 UCharacter.MAX_VALUE-1, UCharacter.MAX_VALUE}; 2840 int[] invalid_tests = { 2841 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 2842 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 2843 2844 for(int i=0; i< valid_tests.length; i++){ 2845 try{ 2846 UCharacter.getCombiningClass(valid_tests[i]); 2847 } catch(Exception e){ 2848 errln("UCharacter.getCombiningClass(int) was not supposed to have " + 2849 "an exception. Value passed: " + valid_tests[i]); 2850 } 2851 } 2852 2853 for(int i=0; i< invalid_tests.length; i++){ 2854 try{ 2855 assertEquals("getCombiningClass(out of range)", 2856 0, UCharacter.getCombiningClass(invalid_tests[i])); 2857 } catch(Exception e){ 2858 errln("UCharacter.getCombiningClass(int) was not supposed to have " + 2859 "an exception. Value passed: " + invalid_tests[i]); 2860 } 2861 } 2862 } 2863 2864 /* 2865 * The following method tests 2866 * public static String getName(int ch) 2867 */ 2868 public void TestGetName(){ 2869 // Need to test on other "one characters" for the getName() method 2870 String[] data = {"a","z"}; 2871 String[] results = {"LATIN SMALL LETTER A","LATIN SMALL LETTER Z"}; 2872 if(data.length != results.length){ 2873 errln("The data array and the results array need to be "+ 2874 "the same length."); 2875 } else { 2876 for(int i=0; i < data.length; i++){ 2877 if(UCharacter.getName(data[i], "").compareTo(results[i]) != 0){ 2878 errln("UCharacter.getName(String, String) was suppose " + 2879 "to have the same result for the data in the parameter. " + 2880 "Value passed: " + data[i] + ". Got: " + 2881 UCharacter.getName(data[i], "") + ". Expected: " + 2882 results[i]); 2883 } 2884 } 2885 } 2886 } 2887 2888 /* 2889 * The following method tests 2890 * public static String getISOComment(int ch) 2891 */ 2892 public void TestGetISOComment(){ 2893 int[] invalid_tests = { 2894 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 2895 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 2896 2897 for(int i=0; i< invalid_tests.length; i++){ 2898 if(UCharacter.getISOComment(invalid_tests[i]) != null){ 2899 errln("UCharacter.getISOComment(int) was suppose to return " + 2900 "null because it was given an invalid parameter. Value passed: " + 2901 invalid_tests[i] + ". Got: " + UCharacter.getISOComment(invalid_tests[i])); 2902 } 2903 } 2904 } 2905 2906 /* 2907 * The following method tests 2908 * public void setLimit(int lim) 2909 */ 2910 public void TestSetLimit(){ 2911 // TODO: Tests when "if(0<=lim && lim<=s.length())" is false 2912 } 2913 2914 /* 2915 * The following method tests 2916 * public int nextCaseMapCP() 2917 */ 2918 public void TestNextCaseMapCP(){ 2919 // TODO: Tests when "if(UTF16.LEAD_SURROGATE_MIN_VALUE<=c || c<=UTF16.TRAIL_SURROGATE_MAX_VALUE)" is false 2920 /* TODO: Tests when "if( c<=UTF16.LEAD_SURROGATE_MAX_VALUE && cpLimit<limit && 2921 * UTF16.TRAIL_SURROGATE_MIN_VALUE<=(c2=s.charAt(cpLimit)) && c2<=UTF16.TRAIL_SURROGATE_MAX_VALUE)" is false 2922 */ 2923 } 2924 2925 /* 2926 * The following method tests 2927 * public void reset(int direction) 2928 */ 2929 public void TestReset(){ 2930 // The method reset() is never called by another function 2931 // TODO: Tests when "else if(direction<0)" is false 2932 } 2933 2934 /* 2935 * The following method tests 2936 * public static String toTitleCase(Locale locale, String str, BreakIterator breakiter) 2937 */ 2938 public void TestToTitleCaseCoverage(){ 2939 //Calls the function "toTitleCase(Locale locale, String str, BreakIterator breakiter)" 2940 String[] locale={"en","fr","zh","ko","ja","it","de",""}; 2941 for(int i=0; i<locale.length; i++){ 2942 UCharacter.toTitleCase(new Locale(locale[i]), "", null); 2943 } 2944 2945 // Calls the function "String toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)" 2946 // Tests when "if (locale == null)" is true 2947 UCharacter.toTitleCase((ULocale)null, "", null, 0); 2948 2949 // TODO: Tests when "if(index==BreakIterator.DONE || index>srcLength)" is true 2950 // TODO: Tests when "while((c=iter.nextCaseMapCP())>=0 && UCaseProps.NONE==gCsp.getType(c))" is false 2951 // TODO: Tests when "if(prev<titleStart)" is false 2952 // TODO: Tests when "if(c<=0xffff)" is false 2953 // TODO: Tests when "if(c<=0xffff)" is false 2954 // TODO: Tests when "if(titleLimit<index)" is false 2955 // TODO: Tests when "else if((nc=iter.nextCaseMapCP())>=0)" is false 2956 } 2957 /* 2958 * The following method tests 2959 * public static String toUpperCase(ULocale locale, String str) 2960 */ 2961 public void TestToUpperCase(){ 2962 // TODO: Tests when "while((c=iter.nextCaseMapCP())>=0)" is false 2963 } 2964 2965 /* 2966 * The following method tests 2967 * public static String toLowerCase(ULocale locale, String str) 2968 */ 2969 public void TestToLowerCase(){ 2970 // Test when locale is null 2971 String[] cases = {"","a","A","z","Z","Dummy","DUMMY","dummy","a z","A Z", 2972 "'","\"","0","9","0a","a0","*","~!@#$%^&*()_+"}; 2973 for(int i=0; i<cases.length; i++){ 2974 try{ 2975 UCharacter.toLowerCase((ULocale) null, cases[i]); 2976 } catch(Exception e){ 2977 errln("UCharacter.toLowerCase was not suppose to return an " + 2978 "exception for input of null and string: " + cases[i]); 2979 } 2980 } 2981 // TODO: Tests when "while((c=iter.nextCaseMapCP())>=0)" is false 2982 } 2983 2984 /* 2985 * The following method tests 2986 * public static int getHanNumericValue(int ch) 2987 */ 2988 public void TestGetHanNumericValue(){ 2989 int[] valid = { 2990 0x3007, //IDEOGRAPHIC_NUMBER_ZERO_ 2991 0x96f6, //CJK_IDEOGRAPH_COMPLEX_ZERO_ 2992 0x4e00, //CJK_IDEOGRAPH_FIRST_ 2993 0x58f9, //CJK_IDEOGRAPH_COMPLEX_ONE_ 2994 0x4e8c, //CJK_IDEOGRAPH_SECOND_ 2995 0x8cb3, //CJK_IDEOGRAPH_COMPLEX_TWO_ 2996 0x4e09, //CJK_IDEOGRAPH_THIRD_ 2997 0x53c3, //CJK_IDEOGRAPH_COMPLEX_THREE_ 2998 0x56db, //CJK_IDEOGRAPH_FOURTH_ 2999 0x8086, //CJK_IDEOGRAPH_COMPLEX_FOUR_ 3000 0x4e94, //CJK_IDEOGRAPH_FIFTH_ 3001 0x4f0d, //CJK_IDEOGRAPH_COMPLEX_FIVE_ 3002 0x516d, //CJK_IDEOGRAPH_SIXTH_ 3003 0x9678, //CJK_IDEOGRAPH_COMPLEX_SIX_ 3004 0x4e03, //CJK_IDEOGRAPH_SEVENTH_ 3005 0x67d2, //CJK_IDEOGRAPH_COMPLEX_SEVEN_ 3006 0x516b, //CJK_IDEOGRAPH_EIGHTH_ 3007 0x634c, //CJK_IDEOGRAPH_COMPLEX_EIGHT_ 3008 0x4e5d, //CJK_IDEOGRAPH_NINETH_ 3009 0x7396, //CJK_IDEOGRAPH_COMPLEX_NINE_ 3010 0x5341, //CJK_IDEOGRAPH_TEN_ 3011 0x62fe, //CJK_IDEOGRAPH_COMPLEX_TEN_ 3012 0x767e, //CJK_IDEOGRAPH_HUNDRED_ 3013 0x4f70, //CJK_IDEOGRAPH_COMPLEX_HUNDRED_ 3014 0x5343, //CJK_IDEOGRAPH_THOUSAND_ 3015 0x4edf, //CJK_IDEOGRAPH_COMPLEX_THOUSAND_ 3016 0x824c, //CJK_IDEOGRAPH_TEN_THOUSAND_ 3017 0x5104, //CJK_IDEOGRAPH_HUNDRED_MILLION_ 3018 }; 3019 3020 int[] invalid = {-5,-2,-1,0}; 3021 3022 int[] results = {0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,100,100, 3023 1000,1000,10000,100000000}; 3024 3025 if(valid.length != results.length){ 3026 errln("The arrays valid and results are suppose to be the same length " + 3027 "to test getHanNumericValue(int ch)."); 3028 } else{ 3029 for(int i=0; i<valid.length; i++){ 3030 if(UCharacter.getHanNumericValue(valid[i]) != results[i]){ 3031 errln("UCharacter.getHanNumericValue does not return the " + 3032 "same result as expected. Passed value: " + valid[i] + 3033 ". Got: " + UCharacter.getHanNumericValue(valid[i]) + 3034 ". Expected: " + results[i]); 3035 } 3036 } 3037 } 3038 3039 for(int i=0; i<invalid.length; i++){ 3040 if(UCharacter.getHanNumericValue(invalid[i]) != -1){ 3041 errln("UCharacter.getHanNumericValue does not return the " + 3042 "same result as expected. Passed value: " + invalid[i] + 3043 ". Got: " + UCharacter.getHanNumericValue(invalid[i]) + 3044 ". Expected: -1"); 3045 } 3046 } 3047 } 3048 3049 /* 3050 * The following method tests 3051 * public static boolean hasBinaryProperty(int ch, int property) 3052 */ 3053 public void TestHasBinaryProperty(){ 3054 // Testing when "if (ch < MIN_VALUE || ch > MAX_VALUE)" is true 3055 int[] invalid = { 3056 UCharacter.MIN_VALUE-1, UCharacter.MIN_VALUE-2, 3057 UCharacter.MAX_VALUE+1, UCharacter.MAX_VALUE+2}; 3058 int[] valid = { 3059 UCharacter.MIN_VALUE, UCharacter.MIN_VALUE+1, 3060 UCharacter.MAX_VALUE, UCharacter.MAX_VALUE-1}; 3061 3062 for(int i=0; i<invalid.length; i++){ 3063 try{ 3064 if (UCharacter.hasBinaryProperty(invalid[i], 1)) { 3065 errln("UCharacter.hasBinaryProperty(ch, property) should return " + 3066 "false for out-of-range code points but " + 3067 "returns true for " + invalid[i]); 3068 } 3069 } catch(Exception e) { 3070 errln("UCharacter.hasBinaryProperty(ch, property) should not " + 3071 "throw an exception for any input. Value passed: " + 3072 invalid[i]); 3073 } 3074 } 3075 3076 for(int i=0; i<valid.length; i++){ 3077 try{ 3078 UCharacter.hasBinaryProperty(valid[i], 1); 3079 } catch(Exception e) { 3080 errln("UCharacter.hasBinaryProperty(ch, property) should not " + 3081 "throw an exception for any input. Value passed: " + 3082 valid[i]); 3083 } 3084 } 3085 } 3086 3087 /* 3088 * The following method tests 3089 * public static int getIntPropertyValue(int ch, int type) 3090 */ 3091 public void TestGetIntPropertyValue(){ 3092 /* Testing UCharacter.getIntPropertyValue(ch, type) */ 3093 // Testing when "if (type < UProperty.BINARY_START)" is true 3094 int[] negative_cases = {-100,-50,-10,-5,-2,-1}; 3095 for(int i=0; i<negative_cases.length; i++){ 3096 if(UCharacter.getIntPropertyValue(0, negative_cases[i]) != 0){ 3097 errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " + 3098 "when passing a negative value of " + negative_cases[i]); 3099 3100 } 3101 } 3102 3103 // Testing when "if(ch<NormalizerImpl.JAMO_L_BASE)" is true 3104 for(int i=Normalizer2Impl.Hangul.JAMO_L_BASE-5; i<Normalizer2Impl.Hangul.JAMO_L_BASE; i++){ 3105 if(UCharacter.getIntPropertyValue(i, UProperty.HANGUL_SYLLABLE_TYPE) != 0){ 3106 errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " + 3107 "when passing ch: " + i + "and type of Property.HANGUL_SYLLABLE_TYPE"); 3108 3109 } 3110 } 3111 3112 // Testing when "else if((ch-=NormalizerImpl.HANGUL_BASE)<0)" is true 3113 for(int i=Normalizer2Impl.Hangul.HANGUL_BASE-5; i<Normalizer2Impl.Hangul.HANGUL_BASE; i++){ 3114 if(UCharacter.getIntPropertyValue(i, UProperty.HANGUL_SYLLABLE_TYPE) != 0){ 3115 errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " + 3116 "when passing ch: " + i + "and type of Property.HANGUL_SYLLABLE_TYPE"); 3117 3118 } 3119 } 3120 } 3121 3122 /* 3123 * The following method tests 3124 * public static int getIntPropertyMaxValue(int type) 3125 */ 3126 public void TestGetIntPropertyMaxValue(){ 3127 /* Testing UCharacter.getIntPropertyMaxValue(type) */ 3128 // Testing when "else if (type < UProperty.INT_START)" is true 3129 int[] cases = {UProperty.BINARY_LIMIT, UProperty.BINARY_LIMIT+1, 3130 UProperty.INT_START-2, UProperty.INT_START-1}; 3131 for(int i=0; i<cases.length; i++){ 3132 if(UCharacter.getIntPropertyMaxValue(cases[i]) != -1){ 3133 errln("UCharacter.getIntPropertyMaxValue was suppose to return -1 " + 3134 "but got " + UCharacter.getIntPropertyMaxValue(cases[i])); 3135 } 3136 } 3137 3138 // TODO: Testing when the case statment reaches "default" 3139 // After testing between values of UProperty.INT_START and 3140 // UProperty.INT_LIMIT are covered, none of the values reaches default. 3141 } 3142 3143 /* 3144 * The following method tests 3145 * public static final int codePointAt(CharSequence seq, int index) 3146 * public static final int codePointAt(char[] text, int index, int limit) 3147 */ 3148 public void TestCodePointAt(){ 3149 3150 // {LEAD_SURROGATE_MIN_VALUE, 3151 // LEAD_SURROGATE_MAX_VALUE, LEAD_SURROGATE_MAX_VALUE-1 3152 String[] cases = {"\uD800","\uDBFF","\uDBFE"}; 3153 int[] result = {55296,56319,56318}; 3154 for(int i=0; i < cases.length; i++){ 3155 /* Testing UCharacter.codePointAt(seq, index) */ 3156 // Testing when "if (index < seq.length())" is false 3157 if(UCharacter.codePointAt((CharSequence) cases[i], 0) != result[i]) 3158 errln("UCharacter.codePointAt(CharSequence ...) did not return as expected. " + 3159 "Passed value: " + cases[i] + ". Expected: " + 3160 result[i] + ". Got: " + 3161 UCharacter.codePointAt((CharSequence) cases[i], 0)); 3162 3163 /* Testing UCharacter.codePointAt(text, index) */ 3164 // Testing when "if (index < text.length)" is false 3165 if(UCharacter.codePointAt(cases[i].toCharArray(), 0) != result[i]) 3166 errln("UCharacter.codePointAt(char[] ...) did not return as expected. " + 3167 "Passed value: " + cases[i] + ". Expected: " + 3168 result[i] + ". Got: " + 3169 UCharacter.codePointAt(cases[i].toCharArray(), 0)); 3170 3171 /* Testing UCharacter.codePointAt(text, index, limit) */ 3172 // Testing when "if (index < limit)" is false 3173 if(UCharacter.codePointAt(cases[i].toCharArray(), 0, 1) != result[i]) 3174 errln("UCharacter.codePointAt(char[], int, int) did not return as expected. " + 3175 "Passed value: " + cases[i] + ". Expected: " + 3176 result[i] + ". Got: " + 3177 UCharacter.codePointAt(cases[i].toCharArray(), 0, 1)); 3178 } 3179 3180 /* Testing UCharacter.codePointAt(text, index, limit) */ 3181 // Testing when "if (index >= limit || limit > text.length)" is true 3182 char[] empty_text = {}; 3183 char[] one_char_text = {'a'}; 3184 char[] reg_text = {'d','u','m','m','y'}; 3185 int[] limitCases = {2,3,5,10,25}; 3186 3187 // When index >= limit 3188 for(int i=0; i < limitCases.length; i++){ 3189 try{ 3190 UCharacter.codePointAt(reg_text, 100, limitCases[i]); 3191 errln("UCharacter.codePointAt was suppose to return an exception " + 3192 "but got " + UCharacter.codePointAt(reg_text, 100, limitCases[i]) + 3193 ". The following passed parameters were Text: " + String.valueOf(reg_text) + ", Start: " + 3194 100 + ", Limit: " + limitCases[i] + "."); 3195 } catch(Exception e){ 3196 } 3197 } 3198 3199 // When limit > text.length 3200 for(int i=0; i < limitCases.length; i++){ 3201 try{ 3202 UCharacter.codePointAt(empty_text, 0, limitCases[i]); 3203 errln("UCharacter.codePointAt was suppose to return an exception " + 3204 "but got " + UCharacter.codePointAt(empty_text, 0, limitCases[i]) + 3205 ". The following passed parameters were Text: " + String.valueOf(empty_text) + ", Start: " + 3206 0 + ", Limit: " + limitCases[i] + "."); 3207 } catch(Exception e){ 3208 } 3209 3210 try{ 3211 UCharacter.codePointCount(one_char_text, 0, limitCases[i]); 3212 errln("UCharacter.codePointCount was suppose to return an exception " + 3213 "but got " + UCharacter.codePointCount(one_char_text, 0, limitCases[i]) + 3214 ". The following passed parameters were Text: " + String.valueOf(one_char_text) + ", Start: " + 3215 0 + ", Limit: " + limitCases[i] + "."); 3216 } catch(Exception e){ 3217 } 3218 } 3219 } 3220 3221 /* 3222 * The following method tests 3223 * public static final int codePointBefore(CharSequence seq, int index) 3224 * public static final int codePointBefore(char[] text, int index) 3225 * public static final int codePointBefore(char[] text, int index, int limit) 3226 */ 3227 public void TestCodePointBefore(){ 3228 // {TRAIL_SURROGATE_MIN_VALUE, 3229 // TRAIL_SURROGATE_MAX_VALUE, TRAIL_SURROGATE_MAX_VALUE -1 3230 String[] cases = {"\uDC00","\uDFFF","\uDDFE"}; 3231 int[] result = {56320,57343,56830}; 3232 for(int i=0; i < cases.length; i++){ 3233 /* Testing UCharacter.codePointBefore(seq, index) */ 3234 // Testing when "if (index > 0)" is false 3235 if(UCharacter.codePointBefore((CharSequence) cases[i], 1) != result[i]) 3236 errln("UCharacter.codePointBefore(CharSequence ...) did not return as expected. " + 3237 "Passed value: " + cases[i] + ". Expected: " + 3238 result[i] + ". Got: " + 3239 UCharacter.codePointBefore((CharSequence) cases[i], 1)); 3240 3241 /* Testing UCharacter.codePointBefore(text, index) */ 3242 // Testing when "if (index > 0)" is false 3243 if(UCharacter.codePointBefore(cases[i].toCharArray(), 1) != result[i]) 3244 errln("UCharacter.codePointBefore(char[] ...) did not return as expected. " + 3245 "Passed value: " + cases[i] + ". Expected: " + 3246 result[i] + ". Got: " + 3247 UCharacter.codePointBefore(cases[i].toCharArray(), 1)); 3248 3249 /* Testing UCharacter.codePointBefore(text, index, limit) */ 3250 // Testing when "if (index > limit)" is false 3251 if(UCharacter.codePointBefore(cases[i].toCharArray(), 1, 0) != result[i]) 3252 errln("UCharacter.codePointBefore(char[], int, int) did not return as expected. " + 3253 "Passed value: " + cases[i] + ". Expected: " + 3254 result[i] + ". Got: " + 3255 UCharacter.codePointBefore(cases[i].toCharArray(), 1, 0)); 3256 } 3257 3258 /* Testing UCharacter.codePointBefore(text, index, limit) */ 3259 char[] dummy = {'d','u','m','m','y'}; 3260 // Testing when "if (index <= limit || limit < 0)" is true 3261 int[] negative_cases = {-100,-10,-5,-2,-1}; 3262 int[] index_cases = {0,1,2,5,10,100}; 3263 3264 for(int i=0; i < negative_cases.length; i++){ 3265 try{ 3266 UCharacter.codePointBefore(dummy, 10000, negative_cases[i]); 3267 errln("UCharacter.codePointBefore(text, index, limit) was suppose to return an exception " + 3268 "when the parameter limit of " + negative_cases[i] + " is a negative number."); 3269 } catch(Exception e) {} 3270 } 3271 3272 for(int i=0; i < index_cases.length; i++){ 3273 try{ 3274 UCharacter.codePointBefore(dummy, index_cases[i], 101); 3275 errln("UCharacter.codePointBefore(text, index, limit) was suppose to return an exception " + 3276 "when the parameter index of " + index_cases[i] + " is a negative number."); 3277 } catch(Exception e) {} 3278 } 3279 } 3280 3281 /* 3282 * The following method tests 3283 * public static final int toChars(int cp, char[] dst, int dstIndex) 3284 * public static final char[] toChars(int cp) 3285 */ 3286 public void TestToChars(){ 3287 int[] positive_cases = {1,2,5,10,100}; 3288 char[] dst = {'a'}; 3289 3290 /* Testing UCharacter.toChars(cp, dst, dstIndex) */ 3291 for(int i=0; i < positive_cases.length; i++){ 3292 // Testing negative values when cp < 0 for if (cp >= 0) 3293 try{ 3294 UCharacter.toChars(-1*positive_cases[i],dst,0); 3295 errln("UCharacter.toChars(int,char[],int) was suppose to return an exception " + 3296 "when the parameter " + (-1*positive_cases[i]) + " is a negative number."); 3297 } catch(Exception e){ 3298 } 3299 3300 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is true 3301 if(UCharacter.toChars(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i], dst, 0) != 1){ 3302 errln("UCharacter.toChars(int,char[],int) was suppose to return a value of 1. Got: " + 3303 UCharacter.toChars(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i], dst, 0)); 3304 } 3305 3306 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is false and 3307 // when "if (cp <= MAX_CODE_POINT)" is false 3308 try{ 3309 UCharacter.toChars(UCharacter.MAX_CODE_POINT+positive_cases[i],dst,0); 3310 errln("UCharacter.toChars(int,char[],int) was suppose to return an exception " + 3311 "when the parameter " + (UCharacter.MAX_CODE_POINT+positive_cases[i]) + 3312 " is a large number."); 3313 } catch(Exception e){ 3314 } 3315 } 3316 3317 3318 /* Testing UCharacter.toChars(cp)*/ 3319 for(int i=0; i<positive_cases.length; i++){ 3320 // Testing negative values when cp < 0 for if (cp >= 0) 3321 try{ 3322 UCharacter.toChars(-1*positive_cases[i]); 3323 errln("UCharacter.toChars(cint) was suppose to return an exception " + 3324 "when the parameter " + positive_cases[i] + " is a negative number."); 3325 } catch(Exception e){ 3326 } 3327 3328 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is true 3329 if(UCharacter.toChars(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i]).length <= 0){ 3330 errln("UCharacter.toChars(int) was suppose to return some result result when the parameter " + 3331 (UCharacter.MIN_SUPPLEMENTARY_CODE_POINT-positive_cases[i]) + "is passed."); 3332 } 3333 3334 // Testing when "if (cp < MIN_SUPPLEMENTARY_CODE_POINT)" is false and 3335 // when "if (cp <= MAX_CODE_POINT)" is false 3336 try{ 3337 UCharacter.toChars(UCharacter.MAX_CODE_POINT+positive_cases[i]); 3338 errln("UCharacter.toChars(int) was suppose to return an exception " + 3339 "when the parameter " + positive_cases[i] + " is a large number."); 3340 } catch(Exception e){ 3341 } 3342 } 3343 } 3344 3345 /* 3346 * The following method tests 3347 * public static int codePointCount(CharSequence text, int start, int limit) 3348 * public static int codePointCount(char[] text, int start, int limit) 3349 */ 3350 public void TestCodePointCount(){ 3351 // The following tests the first if statement to make it true: 3352 // if (start < 0 || limit < start || limit > text.length) 3353 // which will throw an exception. 3354 char[] empty_text = {}; 3355 char[] one_char_text = {'a'}; 3356 char[] reg_text = {'d','u','m','m','y'}; 3357 int[] invalid_startCases = {-1,-2,-5,-10,-100}; 3358 int[] limitCases = {2,3,5,10,25}; 3359 3360 // When start < 0 3361 for(int i=0; i < invalid_startCases.length; i++){ 3362 try{ 3363 UCharacter.codePointCount(reg_text, invalid_startCases[i], 1); 3364 errln("UCharacter.codePointCount was suppose to return an exception " + 3365 "but got " + UCharacter.codePointCount(reg_text, invalid_startCases[i], 1) + 3366 ". The following passed parameters were Text: " + String.valueOf(reg_text) + ", Start: " + 3367 invalid_startCases[i] + ", Limit: " + 1 + "."); 3368 } catch(Exception e){ 3369 } 3370 } 3371 3372 // When limit < start 3373 for(int i=0; i < limitCases.length; i++){ 3374 try{ 3375 UCharacter.codePointCount(reg_text, 100, limitCases[i]); 3376 errln("UCharacter.codePointCount was suppose to return an exception " + 3377 "but got " + UCharacter.codePointCount(reg_text, 100, limitCases[i]) + 3378 ". The following passed parameters were Text: " + String.valueOf(reg_text) + ", Start: " + 3379 100 + ", Limit: " + limitCases[i] + "."); 3380 } catch(Exception e){ 3381 } 3382 } 3383 3384 // When limit > text.length 3385 for(int i=0; i < limitCases.length; i++){ 3386 try{ 3387 UCharacter.codePointCount(empty_text, 0, limitCases[i]); 3388 errln("UCharacter.codePointCount was suppose to return an exception " + 3389 "but got " + UCharacter.codePointCount(empty_text, 0, limitCases[i]) + 3390 ". The following passed parameters were Text: " + String.valueOf(empty_text) + ", Start: " + 3391 0 + ", Limit: " + limitCases[i] + "."); 3392 } catch(Exception e){ 3393 } 3394 3395 try{ 3396 UCharacter.codePointCount(one_char_text, 0, limitCases[i]); 3397 errln("UCharacter.codePointCount was suppose to return an exception " + 3398 "but got " + UCharacter.codePointCount(one_char_text, 0, limitCases[i]) + 3399 ". The following passed parameters were Text: " + String.valueOf(one_char_text) + ", Start: " + 3400 0 + ", Limit: " + limitCases[i] + "."); 3401 } catch(Exception e){ 3402 } 3403 } 3404 } 3405 3406 /* 3407 * The following method tests 3408 * private static int getEuropeanDigit(int ch) 3409 * The method needs to use the method "digit" in order to access the 3410 * getEuropeanDigit method. 3411 */ 3412 public void TestGetEuropeanDigit(){ 3413 //The number retrieved from 0xFF41 to 0xFF5A is due to 3414 // exhaustive testing from UTF16.CODEPOINT_MIN_VALUE to 3415 // UTF16.CODEPOINT_MAX_VALUE return a value of -1. 3416 3417 int[] radixResult = { 3418 10,11,12,13,14,15,16,17,18,19,20,21,22, 3419 23,24,25,26,27,28,29,30,31,32,33,34,35}; 3420 // Invalid and too-small-for-these-digits radix values. 3421 int[] radixCase1 = {0,1,5,10,100}; 3422 // Radix values that work for at least some of the "digits". 3423 int[] radixCase2 = {12,16,20,36}; 3424 3425 for(int i=0xFF41; i<=0xFF5A; i++){ 3426 for(int j=0; j < radixCase1.length; j++){ 3427 if(UCharacter.digit(i, radixCase1[j]) != -1){ 3428 errln("UCharacter.digit(int,int) was supposed to return -1 for radix " + radixCase1[j] 3429 + ". Value passed: U+" + Integer.toHexString(i) + ". Got: " + UCharacter.digit(i, radixCase1[j])); 3430 } 3431 } 3432 for(int j=0; j < radixCase2.length; j++){ 3433 int radix = radixCase2[j]; 3434 int expected = (radixResult[i-0xFF41] < radix) ? radixResult[i-0xFF41] : -1; 3435 int actual = UCharacter.digit(i, radix); 3436 if(actual != expected){ 3437 errln("UCharacter.digit(int,int) was supposed to return " + 3438 expected + " for radix " + radix + 3439 ". Value passed: U+" + Integer.toHexString(i) + ". Got: " + actual); 3440 break; 3441 } 3442 } 3443 } 3444 } 3445 3446 /* Tests the method 3447 * private static final int getProperty(int ch) 3448 * from public static int getType(int ch) 3449 */ 3450 public void TestGetProperty(){ 3451 int[] cases = {UTF16.CODEPOINT_MAX_VALUE+1, UTF16.CODEPOINT_MAX_VALUE+2}; 3452 for(int i=0; i < cases.length; i++) 3453 if(UCharacter.getType(cases[i]) != 0) 3454 errln("UCharacter.getType for testing UCharacter.getProperty " 3455 + "did not return 0 for passed value of " + cases[i] + 3456 " but got " + UCharacter.getType(cases[i])); 3457 } 3458 3459 /* Tests the class 3460 * abstract public static class XSymbolTable implements SymbolTable 3461 */ 3462 public void TestXSymbolTable(){ 3463 class MyXSymbolTable extends UnicodeSet.XSymbolTable {} 3464 MyXSymbolTable st = new MyXSymbolTable(); 3465 3466 // Tests "public UnicodeMatcher lookupMatcher(int i)" 3467 if(st.lookupMatcher(0) != null) 3468 errln("XSymbolTable.lookupMatcher(int i) was suppose to return null."); 3469 3470 // Tests "public boolean applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result)" 3471 if(st.applyPropertyAlias("", "", new UnicodeSet()) != false) 3472 errln("XSymbolTable.applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result) was suppose to return false."); 3473 3474 // Tests "public char[] lookup(String s)" 3475 if(st.lookup("") != null) 3476 errln("XSymbolTable.lookup(String s) was suppose to return null."); 3477 3478 // Tests "public String parseReference(String text, ParsePosition pos, int limit)" 3479 if(st.parseReference("", null, 0) != null) 3480 errln("XSymbolTable.parseReference(String text, ParsePosition pos, int limit) was suppose to return null."); 3481 } 3482 3483 /* Tests the method 3484 * public boolean isFrozen() 3485 */ 3486 public void TestIsFrozen(){ 3487 UnicodeSet us = new UnicodeSet(); 3488 if(us.isFrozen() != false) 3489 errln("Unicode.isFrozen() was suppose to return false."); 3490 3491 us.freeze(); 3492 if(us.isFrozen() != true) 3493 errln("Unicode.isFrozen() was suppose to return true."); 3494 } 3495 } 3496